summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--vnet/vnet/vxlan/vxlan.c179
-rw-r--r--vnet/vnet/vxlan/vxlan.h16
2 files changed, 121 insertions, 74 deletions
diff --git a/vnet/vnet/vxlan/vxlan.c b/vnet/vnet/vxlan/vxlan.c
index 894cc3dcbf2..abf0182f91f 100644
--- a/vnet/vnet/vxlan/vxlan.c
+++ b/vnet/vnet/vxlan/vxlan.c
@@ -289,14 +289,72 @@ static int vxlan_check_decap_next(vxlan_main_t * vxm, u32 is_ip6, u32 decap_next
return 0;
}
+static uword
+vtep_addr_ref(ip46_address_t *ip)
+{
+ if (!ip46_address_is_ip4(ip))
+ return 1; /* always create */
+ uword *pvtep = hash_get (vxlan_main.vtep4, ip->ip4.as_u32);
+ if (pvtep)
+ return ++pvtep[0];
+ hash_set (vxlan_main.vtep4, ip->ip4.as_u32, 1);
+ return 1;
+}
+
+static uword
+vtep_addr_unref(ip46_address_t *ip)
+{
+ if (!ip46_address_is_ip4(ip))
+ return 0; /* alwways destroy */
+ uword *pvtep = hash_get (vxlan_main.vtep4, ip->ip4.as_u32);
+ ASSERT(pvtep);
+ if (!(--pvtep[0]))
+ hash_unset (vxlan_main.vtep4, ip->ip4.as_u32);
+ return pvtep[0];
+}
+
+static
+mcast_remote_t *
+mcast_ep_get(ip46_address_t * ip)
+{
+ ASSERT(ip46_address_is_multicast(ip));
+ uword * ep_idx = hash_get_mem (vxlan_main.mcast_ep_by_ip, ip);
+ ASSERT(ep_idx);
+ return pool_elt_at_index(vxlan_main.mcast_eps, *ep_idx);
+}
+
+static void
+mcast_ep_add(mcast_remote_t * new_ep)
+{
+ mcast_remote_t * ep;
+
+ pool_get_aligned (vxlan_main.mcast_eps, ep, CLIB_CACHE_LINE_BYTES);
+ *ep = *new_ep;
+ hash_set_mem (vxlan_main.mcast_ep_by_ip, &ep->ip, ep - vxlan_main.mcast_eps);
+}
+
+static void
+mcast_ep_remove(mcast_remote_t * ep)
+{
+ hash_unset_mem (vxlan_main.mcast_ep_by_ip, &ep->ip);
+ pool_put (vxlan_main.mcast_eps, ep);
+}
+
+static void
+ip46_multicast_ethernet_address(u8 * ethernet_address, ip46_address_t * ip) {
+ if (ip46_address_is_ip4(ip))
+ ip4_multicast_ethernet_address(ethernet_address, &ip->ip4);
+ else
+ ip6_multicast_ethernet_address(ethernet_address, ip->ip6.as_u32[0]);
+}
+
int vnet_vxlan_add_del_tunnel
(vnet_vxlan_add_del_tunnel_args_t *a, u32 * sw_if_indexp)
{
vxlan_main_t * vxm = &vxlan_main;
vxlan_tunnel_t *t = 0;
vnet_main_t * vnm = vxm->vnet_main;
- vnet_hw_interface_t * hi;
- uword * p, * pvtep;
+ uword * p;
u32 hw_if_index = ~0;
u32 sw_if_index = ~0;
int rv;
@@ -309,10 +367,6 @@ int vnet_vxlan_add_del_tunnel
key4.src = a->dst.ip4.as_u32; /* decap src in key is encap dst in config */
key4.vni = clib_host_to_net_u32 (a->vni << 8);
p = hash_get (vxm->vxlan4_tunnel_by_key, key4.as_u64);
- if (ip4_address_is_multicast (&a->dst.ip4))
- pvtep = hash_get (vxm->vtep4, a->dst.ip4.as_u32);
- else
- pvtep = hash_get (vxm->vtep4, a->src.ip4.as_u32);
}
else
{
@@ -320,7 +374,6 @@ int vnet_vxlan_add_del_tunnel
key6.src.as_u64[1] = a->dst.ip6.as_u64[1];
key6.vni = clib_host_to_net_u32 (a->vni << 8);
p = hash_get_mem (vxm->vxlan6_tunnel_by_key, &key6);
- pvtep = NULL; /* ip6 vxlan-bypass not yet implemented */
}
if (a->is_add)
@@ -368,21 +421,11 @@ int vnet_vxlan_add_del_tunnel
}
if (!is_ip6)
- {
- hash_set (vxm->vxlan4_tunnel_by_key, key4.as_u64, t - vxm->tunnels);
- if (pvtep)
- pvtep[0]++;
- else
- {
- if (ip4_address_is_multicast (&a->dst.ip4))
- hash_set (vxm->vtep4, a->dst.ip4.as_u32, 1);
- else
- hash_set (vxm->vtep4, a->src.ip4.as_u32, 1);
- }
- }
+ hash_set (vxm->vxlan4_tunnel_by_key, key4.as_u64, t - vxm->tunnels);
else
hash_set_mem (vxm->vxlan6_tunnel_by_key, t->key6, t - vxm->tunnels);
-
+
+ vnet_hw_interface_t * hi;
if (vec_len (vxm->free_vxlan_tunnel_hw_if_indices) > 0)
{
vnet_interface_main_t * im = &vnm->interface_main;
@@ -441,6 +484,7 @@ int vnet_vxlan_add_del_tunnel
* when the forwarding for the entry updates, and the tunnel can
* re-stack accordingly
*/
+ vtep_addr_ref(&t->src);
t->fib_entry_index = fib_table_entry_special_add
(t->encap_fib_index, &tun_dst_pfx, FIB_SOURCE_RR,
FIB_ENTRY_FLAG_NONE, ADJ_INDEX_INVALID);
@@ -448,41 +492,41 @@ int vnet_vxlan_add_del_tunnel
(t->fib_entry_index, FIB_NODE_TYPE_VXLAN_TUNNEL, t - vxm->tunnels);
vxlan_tunnel_restack_dpo(t);
}
- else if (pvtep == NULL)
+ else
{
/* Multicast tunnel -
* as the same mcast group can be used for mutiple mcast tunnels
* with different VNIs, create the output fib adjecency only if
* it does not already exist
*/
- fib_protocol_t fp;
- u8 mcast_mac[6];
- if (!is_ip6) {
- ip4_multicast_ethernet_address(mcast_mac, &t->dst.ip4);
- fp = FIB_PROTOCOL_IP4;
- } else {
- ip6_multicast_ethernet_address(mcast_mac, t->dst.ip6.as_u32[0]);
- fp = FIB_PROTOCOL_IP6;
- }
- t->mcast_adj_index = adj_rewrite_add_and_lock
- (fp, fib_proto_to_link(fp), t->mcast_sw_if_index, mcast_mac);
-
- flood_class = VNET_FLOOD_CLASS_TUNNEL_MASTER;
-
- /* Stack mcast dst mac addr rewrite on encap */
- dpo_proto_t dproto = fib_proto_to_dpo(fp);
+ fib_protocol_t fp = (is_ip6) ? FIB_PROTOCOL_IP6 : FIB_PROTOCOL_IP4;
dpo_id_t dpo = DPO_INVALID;
+ dpo_proto_t dproto = fib_proto_to_dpo(fp);
- dpo_set (&dpo, DPO_ADJACENCY, dproto, t->mcast_adj_index);
+ if (vtep_addr_ref(&t->dst) == 1)
+ {
+ u8 mcast_mac[6];
+
+ ip46_multicast_ethernet_address(mcast_mac, &t->dst);
+ receive_dpo_add_or_lock(dproto, ~0, NULL, &dpo);
+ mcast_remote_t new_ep = {
+ .ip = t->dst,
+ .mcast_adj_index = adj_rewrite_add_and_lock
+ (fp, fib_proto_to_link(fp), a->mcast_sw_if_index, mcast_mac),
+ /* Add VRF local mcast adj. */
+ .fib_entry_index = fib_table_entry_special_dpo_add
+ (t->encap_fib_index, &tun_dst_pfx,
+ FIB_SOURCE_SPECIAL, FIB_ENTRY_FLAG_NONE, &dpo)
+ };
+ mcast_ep_add(&new_ep);
+ dpo_reset(&dpo);
+ }
+ /* Stack shared mcast dst mac addr rewrite on encap */
+ dpo_set (&dpo, DPO_ADJACENCY, dproto,
+ mcast_ep_get(&t->dst)->mcast_adj_index);
dpo_stack_from_node (encap_index, &t->next_dpo, &dpo);
- dpo_reset(&dpo);
-
- /* Add local mcast adj. */
- receive_dpo_add_or_lock(dproto, ~0, NULL, &dpo);
- t->fib_entry_index = fib_table_entry_special_dpo_add
- (t->encap_fib_index, &tun_dst_pfx,
- FIB_SOURCE_SPECIAL, FIB_ENTRY_FLAG_NONE, &dpo);
- dpo_reset(&dpo);
+ dpo_reset (&dpo);
+ flood_class = VNET_FLOOD_CLASS_TUNNEL_MASTER;
}
/* Set vxlan tunnel output node */
@@ -505,39 +549,29 @@ int vnet_vxlan_add_del_tunnel
vxm->tunnel_index_by_sw_if_index[t->sw_if_index] = ~0;
+ if (!is_ip6)
+ hash_unset (vxm->vxlan4_tunnel_by_key, key4.as_u64);
+ else
+ {
+ hash_unset_mem (vxm->vxlan6_tunnel_by_key, t->key6);
+ clib_mem_free (t->key6);
+ }
+
if (!ip46_address_is_multicast(&t->dst))
{
+ vtep_addr_unref(&a->src);
fib_entry_child_remove(t->fib_entry_index, t->sibling_index);
fib_table_entry_delete_index(t->fib_entry_index, FIB_SOURCE_RR);
- fib_node_deinit(&t->node);
}
- else if (pvtep == NULL || pvtep[0] == 1)
+ else if (vtep_addr_unref(&t->dst) == 0)
{
- adj_unlock(t->mcast_adj_index);
- fib_table_entry_delete_index(t->fib_entry_index, FIB_SOURCE_SPECIAL);
- fib_node_deinit(&t->node);
+ mcast_remote_t* ep = mcast_ep_get(&t->dst);
+ adj_unlock(ep->mcast_adj_index);
+ fib_table_entry_delete_index(ep->fib_entry_index, FIB_SOURCE_SPECIAL);
+ mcast_ep_remove(ep);
}
- if (!is_ip6)
- {
- hash_unset (vxm->vxlan4_tunnel_by_key, key4.as_u64);
- if (pvtep)
- {
- pvtep[0]--;
- if (pvtep[0] == 0)
- {
- if (ip4_address_is_multicast (&a->dst.ip4))
- hash_unset (vxm->vtep4, a->dst.ip4.as_u32);
- else
- hash_unset (vxm->vtep4, a->src.ip4.as_u32);
- }
- }
- }
- else
- {
- hash_unset_mem (vxm->vxlan6_tunnel_by_key, t->key6);
- clib_mem_free (t->key6);
- }
+ fib_node_deinit(&t->node);
vec_free (t->rewrite);
pool_put (vxm->tunnels, t);
}
@@ -848,6 +882,9 @@ clib_error_t *vxlan_init (vlib_main_t *vm)
vxm->vxlan6_tunnel_by_key = hash_create_mem(0,
sizeof(vxlan6_tunnel_key_t),
sizeof(uword));
+ vxm->mcast_ep_by_ip = hash_create_mem(0,
+ sizeof(ip46_address_t),
+ sizeof(uword));
udp_register_dst_port (vm, UDP_DST_PORT_vxlan,
vxlan4_input_node.index, /* is_ip4 */ 1);
diff --git a/vnet/vnet/vxlan/vxlan.h b/vnet/vnet/vxlan/vxlan.h
index f475bbacf9d..7605d94aa98 100644
--- a/vnet/vnet/vxlan/vxlan.h
+++ b/vnet/vnet/vxlan/vxlan.h
@@ -138,18 +138,28 @@ typedef enum {
} vxlan_input_error_t;
typedef struct {
+ ip46_address_t ip;
+ fib_node_index_t fib_entry_index;
+ adj_index_t mcast_adj_index;
+} mcast_remote_t;
+
+typedef struct {
/* vector of encap tunnel instances */
- vxlan_tunnel_t *tunnels;
+ vxlan_tunnel_t * tunnels;
/* lookup tunnel by key */
uword * vxlan4_tunnel_by_key; /* keyed on ipv4.dst + vni */
uword * vxlan6_tunnel_by_key; /* keyed on ipv6.dst + vni */
- /* local VTEP IPs used by vxlan-bypass node to check if received
- VXLAN packet DIP matches any local VTEP address */
+ /* local VTEP IPs ref count used by vxlan-bypass node to check if
+ received VXLAN packet DIP matches any local VTEP address */
uword * vtep4; /* local ip4 VTEPs keyed on their ip4 addr */
uword * vtep6; /* local ip6 VTEPs keyed on their ip6 addr */
+ /* set of active remote mcast VTEP */
+ mcast_remote_t * mcast_eps;
+ uword * mcast_ep_by_ip; /* mcast VTEPs keyed on their ip46 addr */
+
/* Free vlib hw_if_indices */
u32 * free_vxlan_tunnel_hw_if_indices;