aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEyal Bari <ebari@cisco.com>2016-12-21 12:45:47 +0200
committerDamjan Marion <dmarion.lists@gmail.com>2016-12-21 17:50:42 +0000
commitfff67c89c5135263989c48b9ed923eb785efa67e (patch)
treebaa33d7f49c49cbe18b8937b746155b102118d26
parent4348c85b3da576936dd99e0ac6b56921dd7fd9e9 (diff)
Support multiple VXLAN tunnels with same mcast dst
Support multiple vni's with the same group(multicast) address. Implemented by sharing local adj. and rewrite among tunnels. Change-Id: I8ca036b87af65dd6a8f1aff7da1855f72b4c8f4b Signed-off-by: Eyal Bari <ebari@cisco.com>
-rw-r--r--vnet/vnet/vxlan/vxlan.c179
-rw-r--r--vnet/vnet/vxlan/vxlan.h16
2 files changed, 121 insertions, 74 deletions
diff --git a/vnet/vnet/vxlan/vxlan.c b/vnet/vnet/vxlan/vxlan.c
index 894cc3dcbf2..abf0182f91f 100644
--- a/vnet/vnet/vxlan/vxlan.c
+++ b/vnet/vnet/vxlan/vxlan.c
@@ -289,14 +289,72 @@ static int vxlan_check_decap_next(vxlan_main_t * vxm, u32 is_ip6, u32 decap_next
return 0;
}
+static uword
+vtep_addr_ref(ip46_address_t *ip)
+{
+ if (!ip46_address_is_ip4(ip))
+ return 1; /* always create */
+ uword *pvtep = hash_get (vxlan_main.vtep4, ip->ip4.as_u32);
+ if (pvtep)
+ return ++pvtep[0];
+ hash_set (vxlan_main.vtep4, ip->ip4.as_u32, 1);
+ return 1;
+}
+
+static uword
+vtep_addr_unref(ip46_address_t *ip)
+{
+ if (!ip46_address_is_ip4(ip))
+ return 0; /* alwways destroy */
+ uword *pvtep = hash_get (vxlan_main.vtep4, ip->ip4.as_u32);
+ ASSERT(pvtep);
+ if (!(--pvtep[0]))
+ hash_unset (vxlan_main.vtep4, ip->ip4.as_u32);
+ return pvtep[0];
+}
+
+static
+mcast_remote_t *
+mcast_ep_get(ip46_address_t * ip)
+{
+ ASSERT(ip46_address_is_multicast(ip));
+ uword * ep_idx = hash_get_mem (vxlan_main.mcast_ep_by_ip, ip);
+ ASSERT(ep_idx);
+ return pool_elt_at_index(vxlan_main.mcast_eps, *ep_idx);
+}
+
+static void
+mcast_ep_add(mcast_remote_t * new_ep)
+{
+ mcast_remote_t * ep;
+
+ pool_get_aligned (vxlan_main.mcast_eps, ep, CLIB_CACHE_LINE_BYTES);
+ *ep = *new_ep;
+ hash_set_mem (vxlan_main.mcast_ep_by_ip, &ep->ip, ep - vxlan_main.mcast_eps);
+}
+
+static void
+mcast_ep_remove(mcast_remote_t * ep)
+{
+ hash_unset_mem (vxlan_main.mcast_ep_by_ip, &ep->ip);
+ pool_put (vxlan_main.mcast_eps, ep);
+}
+
+static void
+ip46_multicast_ethernet_address(u8 * ethernet_address, ip46_address_t * ip) {
+ if (ip46_address_is_ip4(ip))
+ ip4_multicast_ethernet_address(ethernet_address, &ip->ip4);
+ else
+ ip6_multicast_ethernet_address(ethernet_address, ip->ip6.as_u32[0]);
+}
+
int vnet_vxlan_add_del_tunnel
(vnet_vxlan_add_del_tunnel_args_t *a, u32 * sw_if_indexp)
{
vxlan_main_t * vxm = &vxlan_main;
vxlan_tunnel_t *t = 0;
vnet_main_t * vnm = vxm->vnet_main;
- vnet_hw_interface_t * hi;
- uword * p, * pvtep;
+ uword * p;
u32 hw_if_index = ~0;
u32 sw_if_index = ~0;
int rv;
@@ -309,10 +367,6 @@ int vnet_vxlan_add_del_tunnel
key4.src = a->dst.ip4.as_u32; /* decap src in key is encap dst in config */
key4.vni = clib_host_to_net_u32 (a->vni << 8);
p = hash_get (vxm->vxlan4_tunnel_by_key, key4.as_u64);
- if (ip4_address_is_multicast (&a->dst.ip4))
- pvtep = hash_get (vxm->vtep4, a->dst.ip4.as_u32);
- else
- pvtep = hash_get (vxm->vtep4, a->src.ip4.as_u32);
}
else
{
@@ -320,7 +374,6 @@ int vnet_vxlan_add_del_tunnel
key6.src.as_u64[1] = a->dst.ip6.as_u64[1];
key6.vni = clib_host_to_net_u32 (a->vni << 8);
p = hash_get_mem (vxm->vxlan6_tunnel_by_key, &key6);
- pvtep = NULL; /* ip6 vxlan-bypass not yet implemented */
}
if (a->is_add)
@@ -368,21 +421,11 @@ int vnet_vxlan_add_del_tunnel
}
if (!is_ip6)
- {
- hash_set (vxm->vxlan4_tunnel_by_key, key4.as_u64, t - vxm->tunnels);
- if (pvtep)
- pvtep[0]++;
- else
- {
- if (ip4_address_is_multicast (&a->dst.ip4))
- hash_set (vxm->vtep4, a->dst.ip4.as_u32, 1);
- else
- hash_set (vxm->vtep4, a->src.ip4.as_u32, 1);
- }
- }
+ hash_set (vxm->vxlan4_tunnel_by_key, key4.as_u64, t - vxm->tunnels);
else
hash_set_mem (vxm->vxlan6_tunnel_by_key, t->key6, t - vxm->tunnels);
-
+
+ vnet_hw_interface_t * hi;
if (vec_len (vxm->free_vxlan_tunnel_hw_if_indices) > 0)
{
vnet_interface_main_t * im = &vnm->interface_main;
@@ -441,6 +484,7 @@ int vnet_vxlan_add_del_tunnel
* when the forwarding for the entry updates, and the tunnel can
* re-stack accordingly
*/
+ vtep_addr_ref(&t->src);
t->fib_entry_index = fib_table_entry_special_add
(t->encap_fib_index, &tun_dst_pfx, FIB_SOURCE_RR,
FIB_ENTRY_FLAG_NONE, ADJ_INDEX_INVALID);
@@ -448,41 +492,41 @@ int vnet_vxlan_add_del_tunnel
(t->fib_entry_index, FIB_NODE_TYPE_VXLAN_TUNNEL, t - vxm->tunnels);
vxlan_tunnel_restack_dpo(t);
}
- else if (pvtep == NULL)
+ else
{
/* Multicast tunnel -
* as the same mcast group can be used for mutiple mcast tunnels
* with different VNIs, create the output fib adjecency only if
* it does not already exist
*/
- fib_protocol_t fp;
- u8 mcast_mac[6];
- if (!is_ip6) {
- ip4_multicast_ethernet_address(mcast_mac, &t->dst.ip4);
- fp = FIB_PROTOCOL_IP4;
- } else {
- ip6_multicast_ethernet_address(mcast_mac, t->dst.ip6.as_u32[0]);
- fp = FIB_PROTOCOL_IP6;
- }
- t->mcast_adj_index = adj_rewrite_add_and_lock
- (fp, fib_proto_to_link(fp), t->mcast_sw_if_index, mcast_mac);
-
- flood_class = VNET_FLOOD_CLASS_TUNNEL_MASTER;
-
- /* Stack mcast dst mac addr rewrite on encap */
- dpo_proto_t dproto = fib_proto_to_dpo(fp);
+ fib_protocol_t fp = (is_ip6) ? FIB_PROTOCOL_IP6 : FIB_PROTOCOL_IP4;
dpo_id_t dpo = DPO_INVALID;
+ dpo_proto_t dproto = fib_proto_to_dpo(fp);
- dpo_set (&dpo, DPO_ADJACENCY, dproto, t->mcast_adj_index);
+ if (vtep_addr_ref(&t->dst) == 1)
+ {
+ u8 mcast_mac[6];
+
+ ip46_multicast_ethernet_address(mcast_mac, &t->dst);
+ receive_dpo_add_or_lock(dproto, ~0, NULL, &dpo);
+ mcast_remote_t new_ep = {
+ .ip = t->dst,
+ .mcast_adj_index = adj_rewrite_add_and_lock
+ (fp, fib_proto_to_link(fp), a->mcast_sw_if_index, mcast_mac),
+ /* Add VRF local mcast adj. */
+ .fib_entry_index = fib_table_entry_special_dpo_add
+ (t->encap_fib_index, &tun_dst_pfx,
+ FIB_SOURCE_SPECIAL, FIB_ENTRY_FLAG_NONE, &dpo)
+ };
+ mcast_ep_add(&new_ep);
+ dpo_reset(&dpo);
+ }
+ /* Stack shared mcast dst mac addr rewrite on encap */
+ dpo_set (&dpo, DPO_ADJACENCY, dproto,
+ mcast_ep_get(&t->dst)->mcast_adj_index);
dpo_stack_from_node (encap_index, &t->next_dpo, &dpo);
- dpo_reset(&dpo);
-
- /* Add local mcast adj. */
- receive_dpo_add_or_lock(dproto, ~0, NULL, &dpo);
- t->fib_entry_index = fib_table_entry_special_dpo_add
- (t->encap_fib_index, &tun_dst_pfx,
- FIB_SOURCE_SPECIAL, FIB_ENTRY_FLAG_NONE, &dpo);
- dpo_reset(&dpo);
+ dpo_reset (&dpo);
+ flood_class = VNET_FLOOD_CLASS_TUNNEL_MASTER;
}
/* Set vxlan tunnel output node */
@@ -505,39 +549,29 @@ int vnet_vxlan_add_del_tunnel
vxm->tunnel_index_by_sw_if_index[t->sw_if_index] = ~0;
+ if (!is_ip6)
+ hash_unset (vxm->vxlan4_tunnel_by_key, key4.as_u64);
+ else
+ {
+ hash_unset_mem (vxm->vxlan6_tunnel_by_key, t->key6);
+ clib_mem_free (t->key6);
+ }
+
if (!ip46_address_is_multicast(&t->dst))
{
+ vtep_addr_unref(&a->src);
fib_entry_child_remove(t->fib_entry_index, t->sibling_index);
fib_table_entry_delete_index(t->fib_entry_index, FIB_SOURCE_RR);
- fib_node_deinit(&t->node);
}
- else if (pvtep == NULL || pvtep[0] == 1)
+ else if (vtep_addr_unref(&t->dst) == 0)
{
- adj_unlock(t->mcast_adj_index);
- fib_table_entry_delete_index(t->fib_entry_index, FIB_SOURCE_SPECIAL);
- fib_node_deinit(&t->node);
+ mcast_remote_t* ep = mcast_ep_get(&t->dst);
+ adj_unlock(ep->mcast_adj_index);
+ fib_table_entry_delete_index(ep->fib_entry_index, FIB_SOURCE_SPECIAL);
+ mcast_ep_remove(ep);
}
- if (!is_ip6)
- {
- hash_unset (vxm->vxlan4_tunnel_by_key, key4.as_u64);
- if (pvtep)
- {
- pvtep[0]--;
- if (pvtep[0] == 0)
- {
- if (ip4_address_is_multicast (&a->dst.ip4))
- hash_unset (vxm->vtep4, a->dst.ip4.as_u32);
- else
- hash_unset (vxm->vtep4, a->src.ip4.as_u32);
- }
- }
- }
- else
- {
- hash_unset_mem (vxm->vxlan6_tunnel_by_key, t->key6);
- clib_mem_free (t->key6);
- }
+ fib_node_deinit(&t->node);
vec_free (t->rewrite);
pool_put (vxm->tunnels, t);
}
@@ -848,6 +882,9 @@ clib_error_t *vxlan_init (vlib_main_t *vm)
vxm->vxlan6_tunnel_by_key = hash_create_mem(0,
sizeof(vxlan6_tunnel_key_t),
sizeof(uword));
+ vxm->mcast_ep_by_ip = hash_create_mem(0,
+ sizeof(ip46_address_t),
+ sizeof(uword));
udp_register_dst_port (vm, UDP_DST_PORT_vxlan,
vxlan4_input_node.index, /* is_ip4 */ 1);
diff --git a/vnet/vnet/vxlan/vxlan.h b/vnet/vnet/vxlan/vxlan.h
index f475bbacf9d..7605d94aa98 100644
--- a/vnet/vnet/vxlan/vxlan.h
+++ b/vnet/vnet/vxlan/vxlan.h
@@ -138,18 +138,28 @@ typedef enum {
} vxlan_input_error_t;
typedef struct {
+ ip46_address_t ip;
+ fib_node_index_t fib_entry_index;
+ adj_index_t mcast_adj_index;
+} mcast_remote_t;
+
+typedef struct {
/* vector of encap tunnel instances */
- vxlan_tunnel_t *tunnels;
+ vxlan_tunnel_t * tunnels;
/* lookup tunnel by key */
uword * vxlan4_tunnel_by_key; /* keyed on ipv4.dst + vni */
uword * vxlan6_tunnel_by_key; /* keyed on ipv6.dst + vni */
- /* local VTEP IPs used by vxlan-bypass node to check if received
- VXLAN packet DIP matches any local VTEP address */
+ /* local VTEP IPs ref count used by vxlan-bypass node to check if
+ received VXLAN packet DIP matches any local VTEP address */
uword * vtep4; /* local ip4 VTEPs keyed on their ip4 addr */
uword * vtep6; /* local ip6 VTEPs keyed on their ip6 addr */
+ /* set of active remote mcast VTEP */
+ mcast_remote_t * mcast_eps;
+ uword * mcast_ep_by_ip; /* mcast VTEPs keyed on their ip46 addr */
+
/* Free vlib hw_if_indices */
u32 * free_vxlan_tunnel_hw_if_indices;