summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFlorin Coras <fcoras@cisco.com>2016-08-08 16:04:26 +0200
committerDave Barach <openvpp@barachs.net>2016-08-11 14:04:11 +0000
commit3590ac5881261c95a3c575360e24903d60fac392 (patch)
tree430cb5d803cf7a22c0cc7faf8ee11a1d978d6e72
parent77ae107ad647f25d22471fabdcf4b31097b1789c (diff)
VPP-196 LISP L2/L3 tunnel multihoming
Change-Id: If96d9ff23a7aacdb684494f854d4029f55837065 Signed-off-by: Florin Coras <fcoras@cisco.com>
-rw-r--r--vnet/vnet/lisp-cp/control.c54
-rw-r--r--vnet/vnet/lisp-cp/gid_dictionary.c2
-rw-r--r--vnet/vnet/lisp-cp/lisp_types.h8
-rw-r--r--vnet/vnet/lisp-gpe/interface.c137
-rw-r--r--vnet/vnet/lisp-gpe/ip_forward.c53
-rw-r--r--vnet/vnet/lisp-gpe/lisp_gpe.c230
-rw-r--r--vnet/vnet/lisp-gpe/lisp_gpe.h36
7 files changed, 407 insertions, 113 deletions
diff --git a/vnet/vnet/lisp-cp/control.c b/vnet/vnet/lisp-cp/control.c
index 4424b60199a..c28276562c8 100644
--- a/vnet/vnet/lisp-cp/control.c
+++ b/vnet/vnet/lisp-cp/control.c
@@ -239,13 +239,13 @@ dp_del_fwd_entry (lisp_cp_main_t * lcm, u32 src_map_index, u32 dst_map_index)
*
*/
static u32
-get_locator_pair (lisp_cp_main_t* lcm, mapping_t * lcl_map, mapping_t * rmt_map,
- locator_pair_t ** locator_pairs)
+get_locator_pairs (lisp_cp_main_t* lcm, mapping_t * lcl_map,
+ mapping_t * rmt_map, locator_pair_t ** locator_pairs)
{
- u32 i, minp = ~0, limitp = 0, li, check_index = 0, done = 0, esi;
+ u32 i, limitp = 0, li, found = 0, esi;
locator_set_t * rmt_ls, * lcl_ls;
- ip_address_t _lcl, * lcl = &_lcl;
- locator_t * l, * rmt = 0;
+ ip_address_t _lcl_addr, * lcl_addr = &_lcl_addr;
+ locator_t * lp, * rmt = 0;
uword * checked = 0;
locator_pair_t pair;
@@ -255,7 +255,7 @@ get_locator_pair (lisp_cp_main_t* lcm, mapping_t * lcl_map, mapping_t * rmt_map,
if (!rmt_ls || vec_len(rmt_ls->locator_indices) == 0)
return 0;
- while (!done)
+ while (1)
{
rmt = 0;
@@ -266,22 +266,28 @@ get_locator_pair (lisp_cp_main_t* lcm, mapping_t * lcl_map, mapping_t * rmt_map,
continue;
li = vec_elt(rmt_ls->locator_indices, i);
- l = pool_elt_at_index(lcm->locator_pool, li);
+ lp = pool_elt_at_index(lcm->locator_pool, li);
/* we don't support non-IP locators for now */
- if (gid_address_type(&l->address) != GID_ADDR_IP_PREFIX)
+ if (gid_address_type(&lp->address) != GID_ADDR_IP_PREFIX)
continue;
- if (l->priority < minp && l->priority >= limitp)
+ if ((found && lp->priority == limitp)
+ || (!found && lp->priority >= limitp))
{
- minp = l->priority;
- rmt = l;
- check_index = i;
+ rmt = lp;
+
+ /* don't search for locators with lower priority and don't
+ * check this locator again*/
+ limitp = lp->priority;
+ hash_set(checked, i, 1);
+ break;
}
}
/* check if a local locator with a route to remote locator exists */
if (rmt != 0)
{
+ /* find egress sw_if_index for rmt locator */
esi = ip_fib_get_egress_iface_for_dst (
lcm, &gid_address_ip(&rmt->address));
if ((u32) ~0 == esi)
@@ -292,31 +298,31 @@ get_locator_pair (lisp_cp_main_t* lcm, mapping_t * lcl_map, mapping_t * rmt_map,
li = vec_elt (lcl_ls->locator_indices, i);
locator_t * sl = pool_elt_at_index (lcm->locator_pool, li);
- /* found local locator */
+ /* found local locator with the needed sw_if_index*/
if (sl->sw_if_index == esi)
{
+ /* and it has an address */
if (0 == ip_interface_get_first_ip_address (lcm,
sl->sw_if_index,
- gid_address_ip_version(&rmt->address), lcl))
+ gid_address_ip_version(&rmt->address), lcl_addr))
continue;
memset(&pair, 0, sizeof(pair));
- ip_address_copy(&pair.rmt_loc, &gid_address_ip(&rmt->address));
- ip_address_copy(&pair.lcl_loc, lcl);
+ ip_address_copy (&pair.rmt_loc,
+ &gid_address_ip(&rmt->address));
+ ip_address_copy(&pair.lcl_loc, lcl_addr);
+ pair.weight = rmt->weight;
vec_add1(locator_pairs[0], pair);
- done = 2;
+ found = 1;
}
}
-
- /* skip this remote locator in next searches */
- limitp = minp;
- hash_set(checked, check_index, 1);
}
else
- done = 1;
+ break;
}
+
hash_free(checked);
- return (done == 2) ? 1 : 0;
+ return found;
}
static void
@@ -369,7 +375,7 @@ dp_add_fwd_entry (lisp_cp_main_t* lcm, u32 src_map_index, u32 dst_map_index)
}
/* find best locator pair that 1) verifies LISP policy 2) are connected */
- if (0 == get_locator_pair (lcm, src_map, dst_map, &a->locator_pairs))
+ if (0 == get_locator_pairs (lcm, src_map, dst_map, &a->locator_pairs))
{
/* negative entry */
a->is_negative = 1;
diff --git a/vnet/vnet/lisp-cp/gid_dictionary.c b/vnet/vnet/lisp-cp/gid_dictionary.c
index a6699b99f16..d4875f25d41 100644
--- a/vnet/vnet/lisp-cp/gid_dictionary.c
+++ b/vnet/vnet/lisp-cp/gid_dictionary.c
@@ -21,7 +21,7 @@ mac_lookup (gid_dictionary_t * db, u32 vni, u8 * key)
int rv;
BVT(clib_bihash_kv) kv, value;
- kv.key[0] = ((u64 *)key)[0] & MAC_BIT_MASK;
+ kv.key[0] = mac_to_u64(key);
kv.key[1] = (u64)vni;
kv.key[2] = 0;
diff --git a/vnet/vnet/lisp-cp/lisp_types.h b/vnet/vnet/lisp-cp/lisp_types.h
index 2587fce526b..d0ca6e0bf5d 100644
--- a/vnet/vnet/lisp-cp/lisp_types.h
+++ b/vnet/vnet/lisp-cp/lisp_types.h
@@ -231,7 +231,11 @@ void _n ## _copy (void * dst , void * src);
foreach_gid_address_type_fcns
#undef _
-#define MAC_BIT_MASK (((u64)1 << 48) - 1)
+always_inline u64
+mac_to_u64(u8 *m)
+{
+ return (*((u64 *)m) & 0xffffffffffff);
+}
typedef struct
{
@@ -286,7 +290,7 @@ typedef struct locator_pair
ip_address_t lcl_loc;
ip_address_t rmt_loc;
- u8 priority;
+ u8 priority; /* TODO remove */
u8 weight;
} locator_pair_t;
diff --git a/vnet/vnet/lisp-gpe/interface.c b/vnet/vnet/lisp-gpe/interface.c
index 77ad9a33171..c7bba47b1c2 100644
--- a/vnet/vnet/lisp-gpe/interface.c
+++ b/vnet/vnet/lisp-gpe/interface.c
@@ -73,19 +73,25 @@ get_one_tunnel_inline (lisp_gpe_main_t * lgm, vlib_buffer_t * b0,
always_inline void
encap_one_inline (lisp_gpe_main_t * lgm, vlib_buffer_t * b0,
- lisp_gpe_tunnel_t * t0, u32 * next0, u8 is_v4)
+ lisp_gpe_tunnel_t * t0, u32 * next0)
{
ASSERT(sizeof(ip4_udp_lisp_gpe_header_t) == 36);
ASSERT(sizeof(ip6_udp_lisp_gpe_header_t) == 56);
- if (is_v4)
+ lisp_gpe_sub_tunnel_t * st0;
+ u32 * sti0;
+
+ sti0 = vec_elt_at_index(t0->sub_tunnels_lbv,
+ vnet_buffer(b0)->ip.flow_hash % t0->sub_tunnels_lbv_count);
+ st0 = vec_elt_at_index(t0->sub_tunnels, sti0[0]);
+ if (st0->is_ip4)
{
- ip_udp_encap_one (lgm->vlib_main, b0, t0->rewrite, 36, 1);
+ ip_udp_encap_one (lgm->vlib_main, b0, st0->rewrite, 36, 1);
next0[0] = LISP_GPE_TX_NEXT_IP4_LOOKUP;
}
else
{
- ip_udp_encap_one (lgm->vlib_main, b0, t0->rewrite, 56, 0);
+ ip_udp_encap_one (lgm->vlib_main, b0, st0->rewrite, 56, 0);
next0[0] = LISP_GPE_TX_NEXT_IP6_LOOKUP;
}
@@ -129,22 +135,51 @@ get_two_tunnels_inline (lisp_gpe_main_t * lgm, vlib_buffer_t * b0,
always_inline void
encap_two_inline (lisp_gpe_main_t * lgm, vlib_buffer_t * b0, vlib_buffer_t * b1,
lisp_gpe_tunnel_t * t0, lisp_gpe_tunnel_t * t1, u32 * next0,
- u32 * next1, u8 is_v4)
+ u32 * next1)
{
ASSERT(sizeof(ip4_udp_lisp_gpe_header_t) == 36);
ASSERT(sizeof(ip6_udp_lisp_gpe_header_t) == 56);
- if (is_v4)
+ lisp_gpe_sub_tunnel_t * st0, * st1;
+ u32 * sti0, * sti1;
+ sti0 = vec_elt_at_index(t0->sub_tunnels_lbv,
+ vnet_buffer(b0)->ip.flow_hash % t0->sub_tunnels_lbv_count);
+ sti1 = vec_elt_at_index(t1->sub_tunnels_lbv,
+ vnet_buffer(b1)->ip.flow_hash % t1->sub_tunnels_lbv_count);
+ st0 = vec_elt_at_index(t0->sub_tunnels, sti0[0]);
+ st1 = vec_elt_at_index(t1->sub_tunnels, sti1[0]);
+
+ if (PREDICT_TRUE(st0->is_ip4 == st1->is_ip4))
{
- ip_udp_encap_one (lgm->vlib_main, b0, t0->rewrite, 36, 1);
- ip_udp_encap_one (lgm->vlib_main, b1, t1->rewrite, 36, 1);
- next0[0] = next1[0] = LISP_GPE_TX_NEXT_IP4_LOOKUP;
+ if (st0->is_ip4)
+ {
+ ip_udp_encap_one (lgm->vlib_main, b0, st0->rewrite, 36, 1);
+ ip_udp_encap_one (lgm->vlib_main, b1, st1->rewrite, 36, 1);
+ next0[0] = next1[0] = LISP_GPE_TX_NEXT_IP4_LOOKUP;
+ }
+ else
+ {
+ ip_udp_encap_one (lgm->vlib_main, b0, st0->rewrite, 56, 0);
+ ip_udp_encap_one (lgm->vlib_main, b1, st1->rewrite, 56, 0);
+ next0[0] = next1[0] = LISP_GPE_TX_NEXT_IP6_LOOKUP;
+ }
}
else
{
- ip_udp_encap_one (lgm->vlib_main, b0, t0->rewrite, 56, 0);
- ip_udp_encap_one (lgm->vlib_main, b1, t1->rewrite, 56, 0);
- next0[0] = next1[0] = LISP_GPE_TX_NEXT_IP6_LOOKUP;
+ if (st0->is_ip4)
+ {
+ ip_udp_encap_one (lgm->vlib_main, b0, st0->rewrite, 36, 1);
+ ip_udp_encap_one (lgm->vlib_main, b1, st1->rewrite, 56, 1);
+ next0[0] = LISP_GPE_TX_NEXT_IP4_LOOKUP;
+ next1[0] = LISP_GPE_TX_NEXT_IP6_LOOKUP;
+ }
+ else
+ {
+ ip_udp_encap_one (lgm->vlib_main, b0, st0->rewrite, 56, 1);
+ ip_udp_encap_one (lgm->vlib_main, b1, st1->rewrite, 36, 1);
+ next0[0] = LISP_GPE_TX_NEXT_IP6_LOOKUP;
+ next1[0] = LISP_GPE_TX_NEXT_IP4_LOOKUP;
+ }
}
/* Reset to look up tunnel partner in the configured FIB */
@@ -223,19 +258,7 @@ lisp_gpe_interface_tx (vlib_main_t * vm, vlib_node_runtime_t * node,
get_one_tunnel_inline (lgm, b1, &t1, is_v4_eid1 ? 1 : 0);
}
- if (PREDICT_TRUE(
- ip_addr_version(&t0->dst) == ip_addr_version(&t1->dst)))
- {
- encap_two_inline (lgm, b0, b1, t0, t1, &next0, &next1,
- ip_addr_version(&t0->dst) == IP4 ? 1 : 0);
- }
- else
- {
- encap_one_inline (lgm, b0, t0, &next0,
- ip_addr_version(&t0->dst) == IP4 ? 1 : 0);
- encap_one_inline (lgm, b1, t1, &next1,
- ip_addr_version(&t1->dst) == IP4 ? 1 : 0);
- }
+ encap_two_inline (lgm, b0, b1, t0, t1, &next0, &next1);
if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
{
@@ -274,8 +297,7 @@ lisp_gpe_interface_tx (vlib_main_t * vm, vlib_node_runtime_t * node,
is_v4_0 = is_v4_packet(vlib_buffer_get_current (b0));
get_one_tunnel_inline (lgm, b0, &t0, is_v4_0 ? 1 : 0);
- encap_one_inline (lgm, b0, t0, &next0,
- ip_addr_version(&t0->dst) == IP4 ? 1 : 0);
+ encap_one_inline (lgm, b0, t0, &next0);
if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
{
@@ -517,6 +539,44 @@ l2_process_tunnel_action (vlib_buffer_t * b0, u8 action, u32 * next0)
}
}
+always_inline u32
+ip_flow_hash (void * data)
+{
+ ip4_header_t * iph = (ip4_header_t *) data;
+
+ if ((iph->ip_version_and_header_length & 0xF0) == 0x40)
+ return ip4_compute_flow_hash (iph, IP_FLOW_HASH_DEFAULT);
+ else
+ return ip6_compute_flow_hash ((ip6_header_t *) iph, IP_FLOW_HASH_DEFAULT);
+}
+
+always_inline u32
+l2_flow_hash (vlib_buffer_t * b0)
+{
+ ethernet_header_t * eh;
+ u64 a, b, c;
+ uword is_ip, eh_size;
+ u16 eh_type;
+
+ eh = vlib_buffer_get_current (b0);
+ eh_type = clib_net_to_host_u16(eh->type);
+ eh_size = ethernet_buffer_header_size(b0);
+
+ is_ip = (eh_type == ETHERNET_TYPE_IP4 || eh_type == ETHERNET_TYPE_IP6);
+
+ /* since we have 2 cache lines, use them */
+ if (is_ip)
+ a = ip_flow_hash ((u8 *) vlib_buffer_get_current (b0) + eh_size);
+ else
+ a = eh->type;
+
+ b = mac_to_u64((u8 *)eh->dst_address);
+ c = mac_to_u64((u8 *)eh->src_address);
+ hash_mix64 (a, b, c);
+
+ return (u32) c;
+}
+
always_inline void
l2_process_one (lisp_gpe_main_t * lgm, vlib_buffer_t * b0, u32 ti0, u32 * next0)
{
@@ -527,8 +587,10 @@ l2_process_one (lisp_gpe_main_t * lgm, vlib_buffer_t * b0, u32 ti0, u32 * next0)
if (PREDICT_TRUE(LISP_NO_ACTION == t0->action))
{
- encap_one_inline (lgm, b0, t0, next0,
- ip_addr_version(&t0->dst) == IP4 ? 1 : 0);
+ /* compute 'flow' hash */
+ if (PREDICT_TRUE(t0->sub_tunnels_lbv_count > 1))
+ vnet_buffer(b0)->ip.flow_hash = l2_flow_hash (b0);
+ encap_one_inline (lgm, b0, t0, next0);
}
else
{
@@ -550,21 +612,26 @@ l2_process_two (lisp_gpe_main_t * lgm, vlib_buffer_t * b0, vlib_buffer_t * b1,
if (PREDICT_TRUE(LISP_NO_ACTION == t0->action
&& LISP_NO_ACTION == t1->action))
{
- encap_two_inline (lgm, b0, b1, t0, t1, next0, next1,
- ip_addr_version(&t0->dst) == IP4 ? 1 : 0);
+ if (PREDICT_TRUE(t0->sub_tunnels_lbv_count > 1))
+ vnet_buffer(b0)->ip.flow_hash = l2_flow_hash(b0);
+ if (PREDICT_TRUE(t1->sub_tunnels_lbv_count > 1))
+ vnet_buffer(b1)->ip.flow_hash = l2_flow_hash(b1);
+ encap_two_inline (lgm, b0, b1, t0, t1, next0, next1);
}
else
{
if (LISP_NO_ACTION == t0->action)
{
- encap_one_inline (lgm, b0, t0, next0,
- ip_addr_version(&t0->dst) == IP4 ? 1 : 0);
+ if (PREDICT_TRUE(t0->sub_tunnels_lbv_count > 1))
+ vnet_buffer(b0)->ip.flow_hash = l2_flow_hash(b0);
+ encap_one_inline (lgm, b0, t0, next0);
l2_process_tunnel_action (b1, t1->action, next1);
}
else if (LISP_NO_ACTION == t1->action)
{
- encap_one_inline (lgm, b1, t1, next1,
- ip_addr_version(&t1->dst) == IP4 ? 1 : 0);
+ if (PREDICT_TRUE(t1->sub_tunnels_lbv_count > 1))
+ vnet_buffer(b1)->ip.flow_hash = l2_flow_hash(b1);
+ encap_one_inline (lgm, b1, t1, next1);
l2_process_tunnel_action (b0, t0->action, next0);
}
else
diff --git a/vnet/vnet/lisp-gpe/ip_forward.c b/vnet/vnet/lisp-gpe/ip_forward.c
index 607687305c5..47f3f7b3755 100644
--- a/vnet/vnet/lisp-gpe/ip_forward.c
+++ b/vnet/vnet/lisp-gpe/ip_forward.c
@@ -832,8 +832,8 @@ lgpe_ip4_lookup (vlib_main_t * vm, vlib_node_runtime_t * node,
u32 bi0, bi1;
vlib_buffer_t * b0, * b1;
ip4_header_t * ip0, * ip1;
- u32 dst_adj_index0, src_adj_index0, src_fib_index0, dst_adj_index1,
- src_adj_index1, src_fib_index1;
+ u32 dst_adj_index0, src_adj_index0, src_fib_index0;
+ u32 dst_adj_index1, src_adj_index1, src_fib_index1;
ip_adjacency_t * dst_adj0, * src_adj0, * dst_adj1, * src_adj1;
u32 next0, next1;
@@ -900,6 +900,17 @@ lgpe_ip4_lookup (vlib_main_t * vm, vlib_node_runtime_t * node,
src_adj0->rewrite_header.sw_if_index;
vnet_buffer (b1)->sw_if_index[VLIB_TX] =
src_adj1->rewrite_header.sw_if_index;
+
+ /* if multipath: saved_lookup_next_index is reused to store
+ * nb of sub-tunnels. If greater than 1, multipath is on.
+ * Note that flow hash should be 0 after ipx lookup! */
+ if (PREDICT_TRUE(src_adj0->saved_lookup_next_index > 1))
+ vnet_buffer (b0)->ip.flow_hash = ip4_compute_flow_hash (
+ ip0, IP_FLOW_HASH_DEFAULT);
+
+ if (PREDICT_TRUE(src_adj1->saved_lookup_next_index > 1))
+ vnet_buffer (b1)->ip.flow_hash = ip4_compute_flow_hash (
+ ip1, IP_FLOW_HASH_DEFAULT);
}
else
{
@@ -910,6 +921,10 @@ lgpe_ip4_lookup (vlib_main_t * vm, vlib_node_runtime_t * node,
next0 = src_adj0->explicit_fib_index;
vnet_buffer (b0)->sw_if_index[VLIB_TX] =
src_adj0->rewrite_header.sw_if_index;
+
+ if (PREDICT_TRUE(src_adj0->saved_lookup_next_index > 1))
+ vnet_buffer (b0)->ip.flow_hash = ip4_compute_flow_hash (
+ ip0, IP_FLOW_HASH_DEFAULT);
}
else
{
@@ -923,6 +938,9 @@ lgpe_ip4_lookup (vlib_main_t * vm, vlib_node_runtime_t * node,
next1 = src_adj1->explicit_fib_index;
vnet_buffer (b1)->sw_if_index[VLIB_TX] =
src_adj1->rewrite_header.sw_if_index;
+ if (PREDICT_TRUE(src_adj1->saved_lookup_next_index > 1))
+ vnet_buffer (b1)->ip.flow_hash = ip4_compute_flow_hash (
+ ip1, IP_FLOW_HASH_DEFAULT);
}
else
{
@@ -978,6 +996,12 @@ lgpe_ip4_lookup (vlib_main_t * vm, vlib_node_runtime_t * node,
/* prepare packet for lisp-gpe output node */
vnet_buffer (b0)->sw_if_index[VLIB_TX] =
src_adj0->rewrite_header.sw_if_index;
+
+ /* if multipath: saved_lookup_next_index is reused to store
+ * nb of sub-tunnels. If greater than 1, multipath is on */
+ if (PREDICT_TRUE(src_adj0->saved_lookup_next_index > 1))
+ vnet_buffer (b0)->ip.flow_hash = ip4_compute_flow_hash (
+ ip0, IP_FLOW_HASH_DEFAULT);
}
else
{
@@ -1163,6 +1187,17 @@ lgpe_ip6_lookup (vlib_main_t * vm, vlib_node_runtime_t * node,
src_adj0->rewrite_header.sw_if_index;
vnet_buffer (b1)->sw_if_index[VLIB_TX] =
src_adj1->rewrite_header.sw_if_index;
+
+ /* if multipath: saved_lookup_next_index is reused to store
+ * nb of sub-tunnels. If greater than 1, multipath is on.
+ * Note that flow hash should be 0 after ipx lookup! */
+ if (PREDICT_TRUE(src_adj0->saved_lookup_next_index > 1))
+ vnet_buffer (b0)->ip.flow_hash = ip6_compute_flow_hash (
+ ip0, IP_FLOW_HASH_DEFAULT);
+
+ if (PREDICT_TRUE(src_adj1->saved_lookup_next_index > 1))
+ vnet_buffer (b1)->ip.flow_hash = ip6_compute_flow_hash (
+ ip1, IP_FLOW_HASH_DEFAULT);
}
else
{
@@ -1173,6 +1208,10 @@ lgpe_ip6_lookup (vlib_main_t * vm, vlib_node_runtime_t * node,
next0 = src_adj0->explicit_fib_index;
vnet_buffer (b0)->sw_if_index[VLIB_TX] =
src_adj0->rewrite_header.sw_if_index;
+
+ if (PREDICT_TRUE(src_adj0->saved_lookup_next_index > 1))
+ vnet_buffer (b0)->ip.flow_hash = ip6_compute_flow_hash (
+ ip0, IP_FLOW_HASH_DEFAULT);
}
else
{
@@ -1186,6 +1225,10 @@ lgpe_ip6_lookup (vlib_main_t * vm, vlib_node_runtime_t * node,
next1 = src_adj1->explicit_fib_index;
vnet_buffer (b1)->sw_if_index[VLIB_TX] =
src_adj1->rewrite_header.sw_if_index;
+
+ if (PREDICT_TRUE(src_adj1->saved_lookup_next_index > 1))
+ vnet_buffer (b1)->ip.flow_hash = ip6_compute_flow_hash (
+ ip1, IP_FLOW_HASH_DEFAULT);
}
else
{
@@ -1241,6 +1284,12 @@ lgpe_ip6_lookup (vlib_main_t * vm, vlib_node_runtime_t * node,
/* prepare packet for lisp-gpe output node */
vnet_buffer (b0)->sw_if_index[VLIB_TX] =
src_adj0->rewrite_header.sw_if_index;
+
+ /* if multipath: saved_lookup_next_index is reused to store
+ * nb of sub-tunnels. If greater than 1, multipath is on */
+ if (PREDICT_TRUE(src_adj0->saved_lookup_next_index > 1))
+ vnet_buffer (b0)->ip.flow_hash = ip6_compute_flow_hash (
+ ip0, IP_FLOW_HASH_DEFAULT);
}
else
{
diff --git a/vnet/vnet/lisp-gpe/lisp_gpe.c b/vnet/vnet/lisp-gpe/lisp_gpe.c
index e5d3500fb06..922788538a3 100644
--- a/vnet/vnet/lisp-gpe/lisp_gpe.c
+++ b/vnet/vnet/lisp-gpe/lisp_gpe.c
@@ -14,17 +14,19 @@
*/
#include <vnet/lisp-gpe/lisp_gpe.h>
+#include <vppinfra/math.h>
lisp_gpe_main_t lisp_gpe_main;
static int
-lisp_gpe_rewrite (lisp_gpe_tunnel_t * t)
+lisp_gpe_rewrite (lisp_gpe_tunnel_t * t, lisp_gpe_sub_tunnel_t * st,
+ locator_pair_t * lp)
{
u8 *rw = 0;
lisp_gpe_header_t * lisp0;
int len;
- if (ip_addr_version(&t->src) == IP4)
+ if (ip_addr_version(&lp->lcl_loc) == IP4)
{
ip4_header_t * ip0;
ip4_udp_lisp_gpe_header_t * h0;
@@ -41,8 +43,8 @@ lisp_gpe_rewrite (lisp_gpe_tunnel_t * t)
ip0->protocol = IP_PROTOCOL_UDP;
/* we fix up the ip4 header length and checksum after-the-fact */
- ip_address_copy_addr(&ip0->src_address, &t->src);
- ip_address_copy_addr(&ip0->dst_address, &t->dst);
+ ip_address_copy_addr(&ip0->src_address, &lp->lcl_loc);
+ ip_address_copy_addr(&ip0->dst_address, &lp->rmt_loc);
ip0->checksum = ip4_header_checksum (ip0);
/* UDP header, randomize src port on something, maybe? */
@@ -70,8 +72,8 @@ lisp_gpe_rewrite (lisp_gpe_tunnel_t * t)
ip0->protocol = IP_PROTOCOL_UDP;
/* we fix up the ip6 header length after-the-fact */
- ip_address_copy_addr(&ip0->src_address, &t->src);
- ip_address_copy_addr(&ip0->dst_address, &t->dst);
+ ip_address_copy_addr(&ip0->src_address, &lp->lcl_loc);
+ ip_address_copy_addr(&ip0->dst_address, &lp->rmt_loc);
/* UDP header, randomize src port on something, maybe? */
h0->udp.src_port = clib_host_to_net_u16 (4341);
@@ -87,10 +89,133 @@ lisp_gpe_rewrite (lisp_gpe_tunnel_t * t)
lisp0->next_protocol = t->next_protocol;
lisp0->iid = clib_host_to_net_u32 (t->vni);
- t->rewrite = rw;
+ st->is_ip4 = ip_addr_version(&lp->lcl_loc) == IP4;
+ st->rewrite = rw;
return 0;
}
+static int
+weight_cmp (normalized_sub_tunnel_weights_t *a,
+ normalized_sub_tunnel_weights_t *b)
+{
+ int cmp = a->weight - b->weight;
+ return (cmp == 0
+ ? a->sub_tunnel_index - b->sub_tunnel_index
+ : (cmp > 0 ? -1 : 1));
+}
+
+/** Computes sub tunnel load balancing vector.
+ * Algorithm is identical to that used for building unequal-cost multipath
+ * adjacencies */
+static void
+compute_sub_tunnels_balancing_vector (lisp_gpe_tunnel_t * t)
+{
+ uword n_sts, i, n_nsts, n_nsts_left;
+ f64 sum_weight, norm, error, tolerance;
+ normalized_sub_tunnel_weights_t * nsts = 0, * stp;
+ lisp_gpe_sub_tunnel_t * sts = t->sub_tunnels;
+ u32 * st_lbv = 0;
+
+ /* Accept 1% error */
+ tolerance = .01;
+
+ n_sts = vec_len (sts);
+ vec_validate(nsts, 2 * n_sts - 1);
+
+ sum_weight = 0;
+ for (i = 0; i < n_sts; i++)
+ {
+ /* Find total weight to normalize weights. */
+ sum_weight += sts[i].weight;
+
+ /* build normalized sub tunnels vector */
+ nsts[i].weight = sts[i].weight;
+ nsts[i].sub_tunnel_index = i;
+ }
+
+ n_nsts = n_sts;
+ if (n_sts == 1)
+ {
+ nsts[0].weight = 1;
+ _vec_len(nsts) = 1;
+ goto build_lbv;
+ }
+
+ /* Sort sub-tunnels by weight */
+ qsort (nsts, n_nsts, sizeof(u32), (void * )weight_cmp);
+
+ /* Save copies of all next hop weights to avoid being overwritten in loop below. */
+ for (i = 0; i < n_nsts; i++)
+ nsts[n_nsts + i].weight = nsts[i].weight;
+
+ /* Try larger and larger power of 2 sized blocks until we
+ find one where traffic flows to within 1% of specified weights. */
+ for (n_nsts = max_pow2 (n_sts); ; n_nsts *= 2)
+ {
+ error = 0;
+
+ norm = n_nsts / sum_weight;
+ n_nsts_left = n_nsts;
+ for (i = 0; i < n_sts; i++)
+ {
+ f64 nf = nsts[n_sts + i].weight * norm;
+ word n = flt_round_nearest (nf);
+
+ n = n > n_nsts_left ? n_nsts_left : n;
+ n_nsts_left -= n;
+ error += fabs (nf - n);
+ nsts[i].weight = n;
+ }
+
+ nsts[0].weight += n_nsts_left;
+
+ /* Less than 5% average error per adjacency with this size adjacency block? */
+ if (error <= tolerance * n_nsts)
+ {
+ /* Truncate any next hops with zero weight. */
+ _vec_len (nsts) = i;
+ break;
+ }
+ }
+
+ build_lbv:
+
+ /* build load balancing vector */
+ vec_foreach (stp, nsts)
+ {
+ for (i = 0; i < stp[0].weight; i++)
+ vec_add1(st_lbv, stp[0].sub_tunnel_index);
+ }
+
+ t->sub_tunnels_lbv = st_lbv;
+ t->sub_tunnels_lbv_count = n_nsts;
+ t->norm_sub_tunnel_weights = nsts;
+}
+
+static void
+create_sub_tunnels (lisp_gpe_main_t * lgm, lisp_gpe_tunnel_t * t)
+{
+ lisp_gpe_sub_tunnel_t st;
+ locator_pair_t * lp = 0;
+ int i;
+
+ /* create sub-tunnels for all locator pairs */
+ for (i = 0; i < vec_len(t->locator_pairs); i++)
+ {
+ lp = &t->locator_pairs[i];
+ st.locator_pair_index = i;
+ st.parent_index = t - lgm->tunnels;
+ st.weight = lp->weight;
+
+ /* compute rewrite for sub-tunnel */
+ lisp_gpe_rewrite (t, &st, lp);
+ vec_add1(t->sub_tunnels, st);
+ }
+
+ /* normalize weights and compute sub-tunnel load balancing vector */
+ compute_sub_tunnels_balancing_vector(t);
+}
+
#define foreach_copy_field \
_(encap_fib_index) \
_(decap_fib_index) \
@@ -105,8 +230,8 @@ add_del_ip_tunnel (vnet_lisp_gpe_add_del_fwd_entry_args_t *a, u8 is_l2,
lisp_gpe_main_t * lgm = &lisp_gpe_main;
lisp_gpe_tunnel_t *t = 0;
lisp_gpe_tunnel_key_t key;
+ lisp_gpe_sub_tunnel_t * stp = 0;
uword * p;
- int rv;
/* prepare tunnel key */
memset(&key, 0, sizeof(key));
@@ -138,12 +263,7 @@ add_del_ip_tunnel (vnet_lisp_gpe_add_del_fwd_entry_args_t *a, u8 is_l2,
foreach_copy_field;
#undef _
- /* TODO multihoming */
- if (!a->is_negative)
- {
- ip_address_copy (&t->src, &a->locator_pairs[0].lcl_loc);
- ip_address_copy (&t->dst, &a->locator_pairs[0].rmt_loc);
- }
+ t->locator_pairs = vec_dup(a->locator_pairs);
/* if vni is non-default */
if (a->vni)
@@ -159,14 +279,9 @@ add_del_ip_tunnel (vnet_lisp_gpe_add_del_fwd_entry_args_t *a, u8 is_l2,
else
t->next_protocol = LISP_GPE_NEXT_PROTO_ETHERNET;
- /* compute rewrite */
- rv = lisp_gpe_rewrite (t);
-
- if (rv)
- {
- pool_put(lgm->tunnels, t);
- return rv;
- }
+ /* build sub-tunnels for lowest priority locator-pairs */
+ if (!a->is_negative)
+ create_sub_tunnels (lgm, t);
mhash_set(&lgm->lisp_gpe_tunnel_by_key, &key, t - lgm->tunnels, 0);
@@ -188,7 +303,13 @@ add_del_ip_tunnel (vnet_lisp_gpe_add_del_fwd_entry_args_t *a, u8 is_l2,
mhash_unset(&lgm->lisp_gpe_tunnel_by_key, &key, 0);
- vec_free(t->rewrite);
+ vec_foreach(stp, t->sub_tunnels)
+ {
+ vec_free(stp->rewrite);
+ }
+ vec_free(t->sub_tunnels);
+ vec_free(t->sub_tunnels_lbv);
+ vec_free(t->locator_pairs);
pool_put(lgm->tunnels, t);
}
@@ -197,8 +318,8 @@ add_del_ip_tunnel (vnet_lisp_gpe_add_del_fwd_entry_args_t *a, u8 is_l2,
static int
build_ip_adjacency (lisp_gpe_main_t * lgm, ip_adjacency_t * adj, u32 table_id,
- u32 vni, u32 tun_index, u8 is_negative, u8 action,
- u8 ip_ver)
+ u32 vni, u32 tun_index, u32 n_sub_tun, u8 is_negative,
+ u8 action, u8 ip_ver)
{
uword * lookup_next_index, * lgpe_sw_if_index, * lnip;
@@ -222,14 +343,15 @@ build_ip_adjacency (lisp_gpe_main_t * lgm, ip_adjacency_t * adj, u32 table_id,
/* the assumption is that the interface must've been created before
* programming the dp */
- ASSERT(lookup_next_index != 0);
- ASSERT(lgpe_sw_if_index != 0);
+ ASSERT(lookup_next_index != 0 && lgpe_sw_if_index != 0);
- /* hijack explicit fib index to store lisp interface node index and
- * if_address_index for the tunnel index */
+ /* hijack explicit fib index to store lisp interface node index,
+ * if_address_index for the tunnel index and saved lookup next index
+ * for the number of sub tunnels */
adj->explicit_fib_index = lookup_next_index[0];
adj->if_address_index = tun_index;
adj->rewrite_header.sw_if_index = lgpe_sw_if_index[0];
+ adj->saved_lookup_next_index = n_sub_tun;
}
/* negative mapping */
else
@@ -268,7 +390,8 @@ add_del_ip_fwd_entry (lisp_gpe_main_t * lgm,
vnet_lisp_gpe_add_del_fwd_entry_args_t * a)
{
ip_adjacency_t adj, * adjp;
- u32 rv, tun_index = ~0;
+ lisp_gpe_tunnel_t * t;
+ u32 rv, tun_index = ~0, n_sub_tuns = 0;
ip_prefix_t * rmt_pref, * lcl_pref;
u8 ip_ver;
@@ -285,11 +408,17 @@ add_del_ip_fwd_entry (lisp_gpe_main_t * lgm,
clib_warning ("failed to build tunnel!");
return rv;
}
+ if (a->is_add)
+ {
+ t = pool_elt_at_index(lgm->tunnels, tun_index);
+ n_sub_tuns = t->sub_tunnels_lbv_count;
+ }
}
/* setup adjacency for eid */
rv = build_ip_adjacency (lgm, &adj, a->table_id, a->vni, tun_index,
- a->is_negative, a->action, ip_ver);
+ n_sub_tuns, a->is_negative, a->action,
+ ip_ver);
/* add/delete route for eid */
rv |= ip_sd_fib_add_del_route (lgm, rmt_pref, lcl_pref, a->table_id, &adj,
@@ -321,8 +450,8 @@ static void
make_mac_fib_key (BVT(clib_bihash_kv) *kv, u16 bd_index, u8 src_mac[6],
u8 dst_mac[6])
{
- kv->key[0] = (((u64) bd_index) << 48) | (((u64 *)dst_mac)[0] & MAC_BIT_MASK);
- kv->key[1] = ((u64 *)src_mac)[0] & MAC_BIT_MASK;
+ kv->key[0] = (((u64) bd_index) << 48) | mac_to_u64(dst_mac);
+ kv->key[1] = mac_to_u64(src_mac);
kv->key[2] = 0;
}
@@ -593,18 +722,15 @@ u8 *
format_lisp_gpe_tunnel (u8 * s, va_list * args)
{
lisp_gpe_tunnel_t * t = va_arg (*args, lisp_gpe_tunnel_t *);
- lisp_gpe_main_t * lgm = &lisp_gpe_main;
+ lisp_gpe_main_t * lgm = vnet_lisp_gpe_get_main();
+ locator_pair_t * lp = 0;
+ normalized_sub_tunnel_weights_t * nstw;
- s = format (s,
- "[%d] %U (src) %U (dst) fibs: encap %d, decap %d",
- t - lgm->tunnels,
- format_ip_address, &t->src,
- format_ip_address, &t->dst,
- t->encap_fib_index,
- t->decap_fib_index);
-
- s = format (s, " decap next %U\n", format_decap_next, t->decap_next_index);
- s = format (s, "lisp ver %d ", (t->ver_res>>6));
+ s = format (s, "tunnel %d vni %d (0x%x)\n", t - lgm->tunnels, t->vni, t->vni);
+ s = format (s, " fibs: encap %d, decap %d decap next %U\n",
+ t->encap_fib_index, t->decap_fib_index, format_decap_next,
+ t->decap_next_index);
+ s = format (s, " lisp ver %d ", (t->ver_res>>6));
#define _(n,v) if (t->flags & v) s = format (s, "%s-bit ", #n);
foreach_lisp_gpe_flag_bit;
@@ -613,7 +739,21 @@ format_lisp_gpe_tunnel (u8 * s, va_list * args)
s = format (s, "next_protocol %d ver_res %x res %x\n",
t->next_protocol, t->ver_res, t->res);
- s = format (s, "iid %d (0x%x)\n", t->vni, t->vni);
+ s = format (s, " locator-pairs:\n");
+ vec_foreach(lp, t->locator_pairs)
+ {
+ s = format (s, " local: %U remote: %U weight %d\n",
+ format_ip_address, &lp->lcl_loc, format_ip_address,
+ &lp->rmt_loc, lp->weight);
+ }
+
+ s = format (s, " active sub-tunnels:\n");
+ vec_foreach(nstw, t->norm_sub_tunnel_weights)
+ {
+ lp = vec_elt_at_index(t->locator_pairs, nstw->sub_tunnel_index);
+ s = format (s, " local: %U remote: %U weight %d\n", format_ip_address,
+ &lp->lcl_loc, format_ip_address, &lp->rmt_loc, nstw->weight);
+ }
return s;
}
diff --git a/vnet/vnet/lisp-gpe/lisp_gpe.h b/vnet/vnet/lisp-gpe/lisp_gpe.h
index f3e75772ee7..145b5d493e5 100644
--- a/vnet/vnet/lisp-gpe/lisp_gpe.h
+++ b/vnet/vnet/lisp-gpe/lisp_gpe.h
@@ -54,17 +54,45 @@ typedef struct
};
} lisp_gpe_tunnel_key_t;
-typedef struct
+typedef struct lisp_gpe_sub_tunnel
{
/* Rewrite string. $$$$ embed vnet_rewrite header */
u8 * rewrite;
+ u32 parent_index;
+ u32 locator_pair_index;
+ u8 weight;
+ u8 is_ip4;
+} lisp_gpe_sub_tunnel_t;
+
+typedef struct nomalized_sub_tunnel
+{
+ u32 sub_tunnel_index;
+ u8 weight;
+} normalized_sub_tunnel_weights_t;
+
+typedef struct
+{
+ /* tunnel src and dst addresses */
+ locator_pair_t * locator_pairs;
+
+ /* locator-pairs with best priority become sub-tunnels */
+ lisp_gpe_sub_tunnel_t * sub_tunnels;
+
+ /* sub-tunnels load balancing vector: contains list of sub-tunnel
+ * indexes replicated according to weight */
+ u32 * sub_tunnels_lbv;
+
+ /* number of entries in load balancing vector */
+ u32 sub_tunnels_lbv_count;
+
+ /* normalized sub tunnel weights */
+ normalized_sub_tunnel_weights_t * norm_sub_tunnel_weights;
/* decap next index */
u32 decap_next_index;
- /* tunnel src and dst addresses */
- ip_address_t src;
- ip_address_t dst;
+ /* TODO remove */
+ ip_address_t src, dst;
/* FIB indices */
u32 encap_fib_index; /* tunnel partner lookup here */