diff options
author | Mohsin Kazmi <sykazmi@cisco.com> | 2018-08-20 18:32:39 +0200 |
---|---|---|
committer | Neale Ranns <nranns@cisco.com> | 2018-09-10 12:38:30 +0000 |
commit | 61b94c6bc417dbcb11323962b40493e6f7ec2f7e (patch) | |
tree | aa10b1e25de448b4ea13eaf00c4f3e1e98833dc6 | |
parent | b414d0d23aff66f793c018a75d83af4d29fb1bd4 (diff) |
vxlan-gbp: Add support for vxlan gbp
This patch implements vxlan with extension of group based
policy support.
Change-Id: I70405bf7332c02867286da8958d9652837edd3c2
Signed-off-by: Mohsin Kazmi <sykazmi@cisco.com>
29 files changed, 3797 insertions, 38 deletions
diff --git a/src/plugins/gbp/gbp.api b/src/plugins/gbp/gbp.api index b2b32e90d4b..3e27abbb88a 100644 --- a/src/plugins/gbp/gbp.api +++ b/src/plugins/gbp/gbp.api @@ -24,7 +24,7 @@ option version = "1.0.0"; typeonly define gbp_endpoint { u32 sw_if_index; - u32 epg_id; + u16 epg_id; u8 is_ip6; u8 address[16]; u8 mac[6]; @@ -52,7 +52,7 @@ define gbp_endpoint_details typeonly define gbp_endpoint_group { - u32 epg_id; + u16 epg_id; u32 bd_id; u32 ip4_table_id; u32 ip6_table_id; @@ -81,8 +81,8 @@ define gbp_endpoint_group_details typeonly define gbp_recirc { - u32 epg_id; u32 sw_if_index; + u16 epg_id; u8 is_ext; }; @@ -110,7 +110,7 @@ typeonly define gbp_subnet { u32 table_id; u32 sw_if_index; - u32 epg_id; + u16 epg_id; u8 is_ip6; u8 is_internal; u8 address_length; @@ -139,8 +139,8 @@ define gbp_subnet_details typeonly define gbp_contract { - u32 src_epg; - u32 dst_epg; + u16 src_epg; + u16 dst_epg; u32 acl_index; }; diff --git a/src/plugins/gbp/gbp_api.c b/src/plugins/gbp/gbp_api.c index 1f24eedc3d0..f487695bdf3 100644 --- a/src/plugins/gbp/gbp_api.c +++ b/src/plugins/gbp/gbp_api.c @@ -93,7 +93,7 @@ vl_api_gbp_endpoint_add_del_t_handler (vl_api_gbp_endpoint_add_del_t * mp) if (mp->is_add) { rv = - gbp_endpoint_update (sw_if_index, &ip, ntohl (mp->endpoint.epg_id)); + gbp_endpoint_update (sw_if_index, &ip, ntohs (mp->endpoint.epg_id)); } else { @@ -137,7 +137,7 @@ gbp_endpoint_send_details (gbp_endpoint_t * gbpe, void *args) &gbpe->ge_key->gek_ip.ip4, sizeof (gbpe->ge_key->gek_ip.ip4)); - mp->endpoint.epg_id = ntohl (gbpe->ge_epg_id); + mp->endpoint.epg_id = ntohs (gbpe->ge_epg_id); vl_api_send_msg (ctx->reg, (u8 *) mp); @@ -175,7 +175,7 @@ static void if (mp->is_add) { - rv = gbp_endpoint_group_add (ntohl (mp->epg.epg_id), + rv = gbp_endpoint_group_add (ntohs (mp->epg.epg_id), ntohl (mp->epg.bd_id), ntohl (mp->epg.ip4_table_id), ntohl (mp->epg.ip6_table_id), @@ -183,7 +183,7 @@ static void } else { - gbp_endpoint_group_delete (ntohl (mp->epg.epg_id)); + gbp_endpoint_group_delete (ntohs (mp->epg.epg_id)); } BAD_SW_IF_INDEX_LABEL; @@ -211,7 +211,7 @@ vl_api_gbp_subnet_add_del_t_handler (vl_api_gbp_subnet_add_del_t * mp) rv = gbp_subnet_add_del (ntohl (mp->subnet.table_id), &pfx, ntohl (mp->subnet.sw_if_index), - ntohl (mp->subnet.epg_id), + ntohs (mp->subnet.epg_id), mp->is_add, mp->subnet.is_internal); REPLY_MACRO (VL_API_GBP_SUBNET_ADD_DEL_REPLY + GBP_MSG_BASE); @@ -237,7 +237,7 @@ gbp_subnet_send_details (u32 table_id, mp->subnet.is_internal = is_internal; mp->subnet.sw_if_index = ntohl (sw_if_index); - mp->subnet.epg_id = ntohl (epg); + mp->subnet.epg_id = ntohs (epg); mp->subnet.is_ip6 = (pfx->fp_proto == FIB_PROTOCOL_IP6); mp->subnet.address_length = pfx->fp_len; mp->subnet.table_id = ntohl (table_id); @@ -287,7 +287,7 @@ gbp_endpoint_group_send_details (gbp_endpoint_group_t * gepg, void *args) mp->context = ctx->context; mp->epg.uplink_sw_if_index = ntohl (gepg->gepg_uplink_sw_if_index); - mp->epg.epg_id = ntohl (gepg->gepg_id); + mp->epg.epg_id = ntohs (gepg->gepg_id); mp->epg.bd_id = ntohl (gepg->gepg_bd); mp->epg.ip4_table_id = ntohl (gepg->gepg_rd[FIB_PROTOCOL_IP4]); mp->epg.ip6_table_id = ntohl (gepg->gepg_rd[FIB_PROTOCOL_IP6]); @@ -328,7 +328,7 @@ vl_api_gbp_recirc_add_del_t_handler (vl_api_gbp_recirc_add_del_t * mp) if (mp->is_add) gbp_recirc_add (sw_if_index, - ntohl (mp->recirc.epg_id), mp->recirc.is_ext); + ntohs (mp->recirc.epg_id), mp->recirc.is_ext); else gbp_recirc_delete (sw_if_index); @@ -352,7 +352,7 @@ gbp_recirc_send_details (gbp_recirc_t * gr, void *args) mp->_vl_msg_id = ntohs (VL_API_GBP_RECIRC_DETAILS + GBP_MSG_BASE); mp->context = ctx->context; - mp->recirc.epg_id = ntohl (gr->gr_epg); + mp->recirc.epg_id = ntohs (gr->gr_epg); mp->recirc.sw_if_index = ntohl (gr->gr_sw_if_index); mp->recirc.is_ext = ntohl (gr->gr_is_ext); @@ -385,12 +385,12 @@ vl_api_gbp_contract_add_del_t_handler (vl_api_gbp_contract_add_del_t * mp) int rv = 0; if (mp->is_add) - gbp_contract_update (ntohl (mp->contract.src_epg), - ntohl (mp->contract.dst_epg), + gbp_contract_update (ntohs (mp->contract.src_epg), + ntohs (mp->contract.dst_epg), ntohl (mp->contract.acl_index)); else - gbp_contract_delete (ntohl (mp->contract.src_epg), - ntohl (mp->contract.dst_epg)); + gbp_contract_delete (ntohs (mp->contract.src_epg), + ntohs (mp->contract.dst_epg)); REPLY_MACRO (VL_API_GBP_CONTRACT_ADD_DEL_REPLY + GBP_MSG_BASE); } @@ -410,8 +410,8 @@ gbp_contract_send_details (gbp_contract_t * gbpc, void *args) mp->_vl_msg_id = ntohs (VL_API_GBP_CONTRACT_DETAILS + GBP_MSG_BASE); mp->context = ctx->context; - mp->contract.src_epg = ntohl (gbpc->gc_key.gck_src); - mp->contract.dst_epg = ntohl (gbpc->gc_key.gck_dst); + mp->contract.src_epg = ntohs (gbpc->gc_key.gck_src); + mp->contract.dst_epg = ntohs (gbpc->gc_key.gck_dst); mp->contract.acl_index = ntohl (gbpc->gc_value.gc_acl_index); vl_api_send_msg (ctx->reg, (u8 *) mp); diff --git a/src/plugins/gbp/gbp_classify.c b/src/plugins/gbp/gbp_classify.c index 6b91d8cf099..fdb1e9f6776 100644 --- a/src/plugins/gbp/gbp_classify.c +++ b/src/plugins/gbp/gbp_classify.c @@ -90,7 +90,7 @@ gbp_classify_inline (vlib_main_t * vm, if (GBP_SRC_CLASSIFY_NULL == type) { - src_epg = ~0; + src_epg = EPG_INVALID; next0 = vnet_l2_feature_next (b0, gscm->l2_input_feat_next[type], L2INPUT_FEAT_GBP_NULL_CLASSIFY); diff --git a/src/plugins/gbp/gbp_contract.c b/src/plugins/gbp/gbp_contract.c index a536f89e5fe..32dd94441db 100644 --- a/src/plugins/gbp/gbp_contract.c +++ b/src/plugins/gbp/gbp_contract.c @@ -45,7 +45,7 @@ gbp_contract_update (epg_id_t src_epg, epg_id_t dst_epg, u32 acl_index) gm->acl_plugin.register_user_module ("GBP ACL", "src-epg", "dst-epg"); } - p = hash_get (gbp_contract_db.gc_hash, key.as_u64); + p = hash_get (gbp_contract_db.gc_hash, key.as_u32); if (p != NULL) { value.as_u64 = p[0]; @@ -56,7 +56,7 @@ gbp_contract_update (epg_id_t src_epg, epg_id_t dst_epg, u32 acl_index) gm->acl_plugin.get_lookup_context_index (gm->gbp_acl_user_id, src_epg, dst_epg); value.gc_acl_index = acl_index; - hash_set (gbp_contract_db.gc_hash, key.as_u64, value.as_u64); + hash_set (gbp_contract_db.gc_hash, key.as_u32, value.as_u64); } if (value.gc_lc_index == ~0) @@ -77,13 +77,13 @@ gbp_contract_delete (epg_id_t src_epg, epg_id_t dst_epg) .gck_dst = dst_epg, }; - p = hash_get (gbp_contract_db.gc_hash, key.as_u64); + p = hash_get (gbp_contract_db.gc_hash, key.as_u32); if (p != NULL) { value.as_u64 = p[0]; gm->acl_plugin.put_lookup_context_index (value.gc_lc_index); } - hash_unset (gbp_contract_db.gc_hash, key.as_u64); + hash_unset (gbp_contract_db.gc_hash, key.as_u32); } void @@ -93,7 +93,7 @@ gbp_contract_walk (gbp_contract_cb_t cb, void *ctx) gbp_contract_value_t value; /* *INDENT-OFF* */ - hash_foreach(key.as_u64, value.as_u64, gbp_contract_db.gc_hash, + hash_foreach(key.as_u32, value.as_u64, gbp_contract_db.gc_hash, ({ gbp_contract_t gbpc = { .gc_key = key, @@ -174,7 +174,7 @@ gbp_contract_show (vlib_main_t * vm, vlib_cli_output (vm, "Contracts:"); /* *INDENT-OFF* */ - hash_foreach (key.as_u64, value.as_u64, gbp_contract_db.gc_hash, + hash_foreach (key.as_u32, value.as_u64, gbp_contract_db.gc_hash, { vlib_cli_output (vm, " {%d,%d} -> %d", key.gck_src, key.gck_dst, value.gc_acl_index); diff --git a/src/plugins/gbp/gbp_contract.h b/src/plugins/gbp/gbp_contract.h index 75f2edca116..121192c9c96 100644 --- a/src/plugins/gbp/gbp_contract.h +++ b/src/plugins/gbp/gbp_contract.h @@ -33,7 +33,7 @@ typedef struct gbp_contract_key_t_ epg_id_t gck_src; epg_id_t gck_dst; }; - u64 as_u64; + u32 as_u32; }; } gbp_contract_key_t; @@ -102,7 +102,7 @@ gbp_acl_lookup (gbp_contract_key_t * key) { uword *p; - p = hash_get (gbp_contract_db.gc_hash, key->as_u64); + p = hash_get (gbp_contract_db.gc_hash, key->as_u32); if (NULL != p) return (p[0]); diff --git a/src/plugins/gbp/gbp_policy.c b/src/plugins/gbp/gbp_policy.c index 38254644631..c0a8866e1ee 100644 --- a/src/plugins/gbp/gbp_policy.c +++ b/src/plugins/gbp/gbp_policy.c @@ -110,7 +110,7 @@ gbp_policy (vlib_main_t * vm, key0.gck_dst = gbp_port_to_epg (sw_if_index0); key0.gck_src = vnet_buffer2 (b0)->gbp.src_epg; - if (~0 != key0.gck_src) + if (EPG_INVALID != key0.gck_src) { if (PREDICT_FALSE (key0.gck_src == key0.gck_dst)) { diff --git a/src/plugins/gbp/gbp_policy_dpo.c b/src/plugins/gbp/gbp_policy_dpo.c index 4b8cd5bf6ad..3a06604121b 100644 --- a/src/plugins/gbp/gbp_policy_dpo.c +++ b/src/plugins/gbp/gbp_policy_dpo.c @@ -248,7 +248,7 @@ gbp_policy_dpo_inline (vlib_main_t * vm, key0.gck_src = vnet_buffer2 (b0)->gbp.src_epg; key0.gck_dst = gpd0->gpd_epg; - if (~0 != key0.gck_src) + if (EPG_INVALID != key0.gck_src) { if (PREDICT_FALSE (key0.gck_src == key0.gck_dst)) { diff --git a/src/plugins/gbp/gbp_subnet.c b/src/plugins/gbp/gbp_subnet.c index 3819a51595b..b3925110b7b 100644 --- a/src/plugins/gbp/gbp_subnet.c +++ b/src/plugins/gbp/gbp_subnet.c @@ -129,7 +129,7 @@ gbp_subnet_fib_table_walk (fib_node_index_t fei, void *arg) /* *INDENT-OFF* */ ctx->cb (table_id, pfx, ~0, // sw_if_index - ~0, // epg + EPG_INVALID, // epg 1, // is_internal ctx->ctx); /* *INDENT-ON* */ diff --git a/src/plugins/gbp/gbp_types.h b/src/plugins/gbp/gbp_types.h index fa337451028..afb17e3a5d5 100644 --- a/src/plugins/gbp/gbp_types.h +++ b/src/plugins/gbp/gbp_types.h @@ -18,8 +18,8 @@ #include <vnet/vnet.h> -typedef u32 epg_id_t; -#define EPG_INVALID (~0) +typedef u16 epg_id_t; +#define EPG_INVALID ((u16)~0) #endif diff --git a/src/vnet/CMakeLists.txt b/src/vnet/CMakeLists.txt index 3dcb7efa799..de27a5b73d8 100644 --- a/src/vnet/CMakeLists.txt +++ b/src/vnet/CMakeLists.txt @@ -657,6 +657,24 @@ list(APPEND VNET_HEADERS list(APPEND VNET_API_FILES mpls/mpls.api) ############################################################################## +# Tunnel protocol: vxlan-gbp +############################################################################## +list(APPEND VNET_SOURCES + vxlan-gbp/decap.c + vxlan-gbp/encap.c + vxlan-gbp/vxlan_gbp_api.c + vxlan-gbp/vxlan_gbp.c +) + +list(APPEND VNET_HEADERS + vxlan-gbp/vxlan_gbp.h + vxlan-gbp/vxlan_gbp_packet.h + vxlan-gbp/vxlan_gbp_error.def +) + +list(APPEND VNET_API_FILES vxlan-gbp/vxlan_gbp.api) + +############################################################################## # Tunnel protocol: vxlan-gpe ############################################################################## diff --git a/src/vnet/buffer.h b/src/vnet/buffer.h index e4167eae6ae..7e4387bf32b 100644 --- a/src/vnet/buffer.h +++ b/src/vnet/buffer.h @@ -364,7 +364,9 @@ typedef struct /* Group Based Policy */ struct { - u32 src_epg; + u8 __unused; + u8 flags; + u16 src_epg; } gbp; union diff --git a/src/vnet/fib/fib_node.h b/src/vnet/fib/fib_node.h index 5578498c1ac..3a4ba8470bc 100644 --- a/src/vnet/fib/fib_node.h +++ b/src/vnet/fib/fib_node.h @@ -47,6 +47,7 @@ typedef enum fib_node_type_t_ { FIB_NODE_TYPE_UDP_ENCAP, FIB_NODE_TYPE_BIER_FMASK, FIB_NODE_TYPE_BIER_ENTRY, + FIB_NODE_TYPE_VXLAN_GBP_TUNNEL, /** * Marker. New types before this one. leave the test last. */ @@ -74,6 +75,7 @@ typedef enum fib_node_type_t_ { [FIB_NODE_TYPE_UDP_ENCAP] = "udp-encap", \ [FIB_NODE_TYPE_BIER_FMASK] = "bier-fmask", \ [FIB_NODE_TYPE_BIER_ENTRY] = "bier-entry", \ + [FIB_NODE_TYPE_VXLAN_GBP_TUNNEL] = "vxlan-gbp-tunnel" \ } /** diff --git a/src/vnet/mfib/mfib_types.h b/src/vnet/mfib/mfib_types.h index 14d1288cff3..58b7c987ab7 100644 --- a/src/vnet/mfib/mfib_types.h +++ b/src/vnet/mfib/mfib_types.h @@ -172,6 +172,7 @@ typedef enum mfib_source_t_ MFIB_SOURCE_RR, MFIB_SOURCE_GENEVE, MFIB_SOURCE_IGMP, + MFIB_SOURCE_VXLAN_GBP, MFIB_SOURCE_DEFAULT_ROUTE, } mfib_source_t; @@ -188,6 +189,7 @@ typedef enum mfib_source_t_ [MFIB_SOURCE_RR] = "Recursive-resolution", \ [MFIB_SOURCE_GENEVE] = "Geneve", \ [MFIB_SOURCE_IGMP] = "IGMP", \ + [MFIB_SOURCE_VXLAN_GBP] = "VXLAN-GBP", \ [MFIB_SOURCE_DEFAULT_ROUTE] = "Default Route", \ } diff --git a/src/vnet/udp/udp.h b/src/vnet/udp/udp.h index abf0d686d0a..91d063a2f0c 100644 --- a/src/vnet/udp/udp.h +++ b/src/vnet/udp/udp.h @@ -57,6 +57,7 @@ _ (4500, ipsec) \ _ (4739, ipfix) \ _ (4789, vxlan) \ _ (4789, vxlan6) \ +_ (48879, vxlan_gbp) \ _ (4790, VXLAN_GPE) \ _ (6633, vpath_3) \ _ (6081, geneve) \ @@ -72,6 +73,7 @@ _ (3784, bfd6) \ _ (3785, bfd_echo6) \ _ (4341, lisp_gpe6) \ _ (4342, lisp_cp6) \ +_ (48879, vxlan6_gbp) \ _ (4790, VXLAN6_GPE) \ _ (6633, vpath6_3) \ _ (6081, geneve6) \ diff --git a/src/vnet/vnet_all_api_h.h b/src/vnet/vnet_all_api_h.h index f07be653e20..157006b2643 100644 --- a/src/vnet/vnet_all_api_h.h +++ b/src/vnet/vnet_all_api_h.h @@ -75,6 +75,7 @@ #include <vnet/dhcp/dhcp6_pd_client_cp.api.h> #include <vnet/dhcp/dhcp6_ia_na_client_cp.api.h> #include <vnet/devices/pipe/pipe.api.h> +#include <vnet/vxlan-gbp/vxlan_gbp.api.h> /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/vxlan-gbp/decap.c b/src/vnet/vxlan-gbp/decap.c new file mode 100644 index 00000000000..0dc6b545c57 --- /dev/null +++ b/src/vnet/vxlan-gbp/decap.c @@ -0,0 +1,1020 @@ +/* + * decap.c: vxlan gbp tunnel decap packet processing + * + * Copyright (c) 2018 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <vnet/vxlan-gbp/vxlan_gbp.h> + +vlib_node_registration_t vxlan4_gbp_input_node; +vlib_node_registration_t vxlan6_gbp_input_node; + +typedef struct +{ + u32 next_index; + u32 tunnel_index; + u32 error; + u32 vni; + u16 sclass; +} vxlan_gbp_rx_trace_t; + +static u8 * +format_vxlan_gbp_rx_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + vxlan_gbp_rx_trace_t *t = va_arg (*args, vxlan_gbp_rx_trace_t *); + + if (t->tunnel_index == ~0) + return format (s, + "VXLAN_GBP decap error - tunnel for vni %d does not exist", + t->vni); + return format (s, + "VXLAN_GBP decap from vxlan_gbp_tunnel%d vni %d sclass %d" + " next %d error %d", + t->tunnel_index, t->vni, t->sclass, t->next_index, t->error); +} + +always_inline u32 +buf_fib_index (vlib_buffer_t * b, u32 is_ip4) +{ + u32 sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_TX]; + if (sw_if_index != (u32) ~ 0) + return sw_if_index; + + u32 *fib_index_by_sw_if_index = is_ip4 ? + ip4_main.fib_index_by_sw_if_index : ip6_main.fib_index_by_sw_if_index; + sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX]; + + return vec_elt (fib_index_by_sw_if_index, sw_if_index); +} + +typedef vxlan4_gbp_tunnel_key_t last_tunnel_cache4; + +always_inline vxlan_gbp_tunnel_t * +vxlan4_gbp_find_tunnel (vxlan_gbp_main_t * vxm, last_tunnel_cache4 * cache, + u32 fib_index, ip4_header_t * ip4_0, + vxlan_gbp_header_t * vxlan_gbp0, + vxlan_gbp_tunnel_t ** stats_t0) +{ + /* Make sure VXLAN_GBP tunnel exist according to packet SIP and VNI */ + vxlan4_gbp_tunnel_key_t key4; + key4.key[1] = ((u64) fib_index << 32) | vxlan_gbp0->vni_reserved; + + if (PREDICT_FALSE (key4.key[1] != cache->key[1] || + ip4_0->src_address.as_u32 != (u32) cache->key[0])) + { + key4.key[0] = ip4_0->src_address.as_u32; + int rv = clib_bihash_search_inline_16_8 (&vxm->vxlan4_gbp_tunnel_by_key, + &key4); + if (PREDICT_FALSE (rv != 0)) + return 0; + + *cache = key4; + } + vxlan_gbp_tunnel_t *t0 = pool_elt_at_index (vxm->tunnels, cache->value); + + /* Validate VXLAN_GBP tunnel SIP against packet DIP */ + if (PREDICT_TRUE (ip4_0->dst_address.as_u32 == t0->src.ip4.as_u32)) + *stats_t0 = t0; + else + { + /* try multicast */ + if (PREDICT_TRUE (!ip4_address_is_multicast (&ip4_0->dst_address))) + return 0; + + key4.key[0] = ip4_0->dst_address.as_u32; + /* Make sure mcast VXLAN_GBP tunnel exist by packet DIP and VNI */ + int rv = clib_bihash_search_inline_16_8 (&vxm->vxlan4_gbp_tunnel_by_key, + &key4); + if (PREDICT_FALSE (rv != 0)) + return 0; + + *stats_t0 = pool_elt_at_index (vxm->tunnels, key4.value); + } + + return t0; +} + +typedef vxlan6_gbp_tunnel_key_t last_tunnel_cache6; + +always_inline vxlan_gbp_tunnel_t * +vxlan6_gbp_find_tunnel (vxlan_gbp_main_t * vxm, last_tunnel_cache6 * cache, + u32 fib_index, ip6_header_t * ip6_0, + vxlan_gbp_header_t * vxlan_gbp0, + vxlan_gbp_tunnel_t ** stats_t0) +{ + /* Make sure VXLAN_GBP tunnel exist according to packet SIP and VNI */ + vxlan6_gbp_tunnel_key_t key6 = { + .key = { + [0] = ip6_0->src_address.as_u64[0], + [1] = ip6_0->src_address.as_u64[1], + [2] = (((u64) fib_index) << 32) | vxlan_gbp0->vni_reserved, + } + }; + + if (PREDICT_FALSE + (clib_bihash_key_compare_24_8 (key6.key, cache->key) == 0)) + { + int rv = clib_bihash_search_inline_24_8 (&vxm->vxlan6_gbp_tunnel_by_key, + &key6); + if (PREDICT_FALSE (rv != 0)) + return 0; + + *cache = key6; + } + vxlan_gbp_tunnel_t *t0 = pool_elt_at_index (vxm->tunnels, cache->value); + + /* Validate VXLAN_GBP tunnel SIP against packet DIP */ + if (PREDICT_TRUE (ip6_address_is_equal (&ip6_0->dst_address, &t0->src.ip6))) + *stats_t0 = t0; + else + { + /* try multicast */ + if (PREDICT_TRUE (!ip6_address_is_multicast (&ip6_0->dst_address))) + return 0; + + /* Make sure mcast VXLAN_GBP tunnel exist by packet DIP and VNI */ + key6.key[0] = ip6_0->dst_address.as_u64[0]; + key6.key[1] = ip6_0->dst_address.as_u64[1]; + int rv = clib_bihash_search_inline_24_8 (&vxm->vxlan6_gbp_tunnel_by_key, + &key6); + if (PREDICT_FALSE (rv != 0)) + return 0; + + *stats_t0 = pool_elt_at_index (vxm->tunnels, key6.value); + } + + return t0; +} + +always_inline uword +vxlan_gbp_input (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame, u32 is_ip4) +{ + vxlan_gbp_main_t *vxm = &vxlan_gbp_main; + vnet_main_t *vnm = vxm->vnet_main; + vnet_interface_main_t *im = &vnm->interface_main; + vlib_combined_counter_main_t *rx_counter = + im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX; + vlib_combined_counter_main_t *drop_counter = + im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_DROP; + last_tunnel_cache4 last4; + last_tunnel_cache6 last6; + u32 pkts_decapsulated = 0; + u32 thread_index = vlib_get_thread_index (); + + if (is_ip4) + memset (&last4, 0xff, sizeof last4); + else + memset (&last6, 0xff, sizeof last6); + + u32 next_index = node->cached_next_index; + + u32 *from = vlib_frame_vector_args (from_frame); + u32 n_left_from = from_frame->n_vectors; + + while (n_left_from > 0) + { + u32 *to_next, n_left_to_next; + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + /* Prefetch next iteration. */ + { + vlib_buffer_t *p2, *p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (p3->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD); + } + + u32 bi0 = to_next[0] = from[0]; + u32 bi1 = to_next[1] = from[1]; + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + vlib_buffer_t *b0, *b1; + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + /* udp leaves current_data pointing at the vxlan_gbp header */ + void *cur0 = vlib_buffer_get_current (b0); + void *cur1 = vlib_buffer_get_current (b1); + vxlan_gbp_header_t *vxlan_gbp0 = cur0; + vxlan_gbp_header_t *vxlan_gbp1 = cur1; + + ip4_header_t *ip4_0, *ip4_1; + ip6_header_t *ip6_0, *ip6_1; + if (is_ip4) + { + ip4_0 = cur0 - sizeof (udp_header_t) - sizeof (ip4_header_t); + ip4_1 = cur1 - sizeof (udp_header_t) - sizeof (ip4_header_t); + } + else + { + ip6_0 = cur0 - sizeof (udp_header_t) - sizeof (ip6_header_t); + ip6_1 = cur1 - sizeof (udp_header_t) - sizeof (ip6_header_t); + } + + /* pop vxlan_gbp */ + vlib_buffer_advance (b0, sizeof *vxlan_gbp0); + vlib_buffer_advance (b1, sizeof *vxlan_gbp1); + + u32 fi0 = buf_fib_index (b0, is_ip4); + u32 fi1 = buf_fib_index (b1, is_ip4); + + vxlan_gbp_tunnel_t *t0, *stats_t0 = 0; + vxlan_gbp_tunnel_t *t1, *stats_t1 = 0; + if (is_ip4) + { + t0 = + vxlan4_gbp_find_tunnel (vxm, &last4, fi0, ip4_0, vxlan_gbp0, + &stats_t0); + t1 = + vxlan4_gbp_find_tunnel (vxm, &last4, fi1, ip4_1, vxlan_gbp1, + &stats_t1); + } + else + { + t0 = + vxlan6_gbp_find_tunnel (vxm, &last6, fi0, ip6_0, vxlan_gbp0, + &stats_t0); + t1 = + vxlan6_gbp_find_tunnel (vxm, &last6, fi1, ip6_1, vxlan_gbp1, + &stats_t1); + } + + u32 len0 = vlib_buffer_length_in_chain (vm, b0); + u32 len1 = vlib_buffer_length_in_chain (vm, b1); + + u32 next0, next1; + u8 error0 = 0, error1 = 0; + u8 flags0 = vxlan_gbp_get_flags (vxlan_gbp0); + u8 flags1 = vxlan_gbp_get_flags (vxlan_gbp1); + /* Validate VXLAN_GBP tunnel encap-fib index agaist packet */ + if (PREDICT_FALSE + (t0 == 0 || flags0 != (VXLAN_GBP_FLAGS_I | VXLAN_GBP_FLAGS_G))) + { + next0 = VXLAN_GBP_INPUT_NEXT_DROP; + + if (t0 != 0 + && flags0 != (VXLAN_GBP_FLAGS_I | VXLAN_GBP_FLAGS_G)) + { + error0 = VXLAN_GBP_ERROR_BAD_FLAGS; + vlib_increment_combined_counter + (drop_counter, thread_index, stats_t0->sw_if_index, 1, + len0); + } + else + error0 = VXLAN_GBP_ERROR_NO_SUCH_TUNNEL; + b0->error = node->errors[error0]; + } + else + { + next0 = t0->decap_next_index; + vnet_buffer2 (b0)->gbp.flags = + vxlan_gbp_get_gpflags (vxlan_gbp0); + vnet_buffer2 (b0)->gbp.src_epg = + vxlan_gbp_get_sclass (vxlan_gbp0); + + /* Required to make the l2 tag push / pop code work on l2 subifs */ + if (PREDICT_TRUE (next0 == VXLAN_GBP_INPUT_NEXT_L2_INPUT)) + vnet_update_l2_len (b0); + + /* Set packet input sw_if_index to unicast VXLAN tunnel for learning */ + vnet_buffer (b0)->sw_if_index[VLIB_RX] = t0->sw_if_index; + vlib_increment_combined_counter + (rx_counter, thread_index, stats_t0->sw_if_index, 1, len0); + pkts_decapsulated++; + } + + /* Validate VXLAN_GBP tunnel encap-fib index agaist packet */ + if (PREDICT_FALSE + (t1 == 0 || flags1 != (VXLAN_GBP_FLAGS_I | VXLAN_GBP_FLAGS_G))) + { + next1 = VXLAN_GBP_INPUT_NEXT_DROP; + + if (t1 != 0 + && flags1 != (VXLAN_GBP_FLAGS_I | VXLAN_GBP_FLAGS_G)) + { + error1 = VXLAN_GBP_ERROR_BAD_FLAGS; + vlib_increment_combined_counter + (drop_counter, thread_index, stats_t1->sw_if_index, 1, + len1); + } + else + error1 = VXLAN_GBP_ERROR_NO_SUCH_TUNNEL; + b1->error = node->errors[error1]; + } + else + { + next1 = t1->decap_next_index; + vnet_buffer2 (b1)->gbp.flags = + vxlan_gbp_get_gpflags (vxlan_gbp1); + vnet_buffer2 (b1)->gbp.src_epg = + vxlan_gbp_get_sclass (vxlan_gbp1); + + /* Required to make the l2 tag push / pop code work on l2 subifs */ + if (PREDICT_TRUE (next1 == VXLAN_GBP_INPUT_NEXT_L2_INPUT)) + vnet_update_l2_len (b1); + + /* Set packet input sw_if_index to unicast VXLAN_GBP tunnel for learning */ + vnet_buffer (b1)->sw_if_index[VLIB_RX] = t1->sw_if_index; + pkts_decapsulated++; + + vlib_increment_combined_counter + (rx_counter, thread_index, stats_t1->sw_if_index, 1, len1); + } + + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + vxlan_gbp_rx_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->next_index = next0; + tr->error = error0; + tr->tunnel_index = t0 == 0 ? ~0 : t0 - vxm->tunnels; + tr->vni = vxlan_gbp_get_vni (vxlan_gbp0); + tr->sclass = vxlan_gbp_get_sclass (vxlan_gbp0); + } + if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) + { + vxlan_gbp_rx_trace_t *tr = + vlib_add_trace (vm, node, b1, sizeof (*tr)); + tr->next_index = next1; + tr->error = error1; + tr->tunnel_index = t1 == 0 ? ~0 : t1 - vxm->tunnels; + tr->vni = vxlan_gbp_get_vni (vxlan_gbp1); + tr->sclass = vxlan_gbp_get_sclass (vxlan_gbp1); + } + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0 = to_next[0] = from[0]; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); + + /* udp leaves current_data pointing at the vxlan_gbp header */ + void *cur0 = vlib_buffer_get_current (b0); + vxlan_gbp_header_t *vxlan_gbp0 = cur0; + ip4_header_t *ip4_0; + ip6_header_t *ip6_0; + if (is_ip4) + ip4_0 = cur0 - sizeof (udp_header_t) - sizeof (ip4_header_t); + else + ip6_0 = cur0 - sizeof (udp_header_t) - sizeof (ip6_header_t); + + /* pop (ip, udp, vxlan_gbp) */ + vlib_buffer_advance (b0, sizeof (*vxlan_gbp0)); + + u32 fi0 = buf_fib_index (b0, is_ip4); + + vxlan_gbp_tunnel_t *t0, *stats_t0 = 0; + if (is_ip4) + t0 = + vxlan4_gbp_find_tunnel (vxm, &last4, fi0, ip4_0, vxlan_gbp0, + &stats_t0); + else + t0 = + vxlan6_gbp_find_tunnel (vxm, &last6, fi0, ip6_0, vxlan_gbp0, + &stats_t0); + + uword len0 = vlib_buffer_length_in_chain (vm, b0); + + u32 next0; + u8 error0 = 0; + u8 flags0 = vxlan_gbp_get_flags (vxlan_gbp0); + /* Validate VXLAN_GBP tunnel encap-fib index agaist packet */ + if (PREDICT_FALSE + (t0 == 0 || flags0 != (VXLAN_GBP_FLAGS_I | VXLAN_GBP_FLAGS_G))) + { + next0 = VXLAN_GBP_INPUT_NEXT_DROP; + + if (t0 != 0 + && flags0 != (VXLAN_GBP_FLAGS_I | VXLAN_GBP_FLAGS_G)) + { + error0 = VXLAN_GBP_ERROR_BAD_FLAGS; + vlib_increment_combined_counter + (drop_counter, thread_index, stats_t0->sw_if_index, 1, + len0); + } + else + error0 = VXLAN_GBP_ERROR_NO_SUCH_TUNNEL; + b0->error = node->errors[error0]; + } + else + { + next0 = t0->decap_next_index; + vnet_buffer2 (b0)->gbp.flags = + vxlan_gbp_get_gpflags (vxlan_gbp0); + vnet_buffer2 (b0)->gbp.src_epg = + vxlan_gbp_get_sclass (vxlan_gbp0); + + + /* Required to make the l2 tag push / pop code work on l2 subifs */ + if (PREDICT_TRUE (next0 == VXLAN_GBP_INPUT_NEXT_L2_INPUT)) + vnet_update_l2_len (b0); + + /* Set packet input sw_if_index to unicast VXLAN_GBP tunnel for learning */ + vnet_buffer (b0)->sw_if_index[VLIB_RX] = t0->sw_if_index; + pkts_decapsulated++; + + vlib_increment_combined_counter + (rx_counter, thread_index, stats_t0->sw_if_index, 1, len0); + } + + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + vxlan_gbp_rx_trace_t *tr + = vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->next_index = next0; + tr->error = error0; + tr->tunnel_index = t0 == 0 ? ~0 : t0 - vxm->tunnels; + tr->vni = vxlan_gbp_get_vni (vxlan_gbp0); + tr->sclass = vxlan_gbp_get_sclass (vxlan_gbp0); + } + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + /* Do we still need this now that tunnel tx stats is kept? */ + u32 node_idx = + is_ip4 ? vxlan4_gbp_input_node.index : vxlan6_gbp_input_node.index; + vlib_node_increment_counter (vm, node_idx, VXLAN_GBP_ERROR_DECAPSULATED, + pkts_decapsulated); + + return from_frame->n_vectors; +} + +static uword +vxlan4_gbp_input (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * from_frame) +{ + return vxlan_gbp_input (vm, node, from_frame, /* is_ip4 */ 1); +} + +static uword +vxlan6_gbp_input (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * from_frame) +{ + return vxlan_gbp_input (vm, node, from_frame, /* is_ip4 */ 0); +} + +static char *vxlan_gbp_error_strings[] = { +#define vxlan_gbp_error(n,s) s, +#include <vnet/vxlan-gbp/vxlan_gbp_error.def> +#undef vxlan_gbp_error +#undef _ +}; + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (vxlan4_gbp_input_node) = +{ + .function = vxlan4_gbp_input, + .name = "vxlan4-gbp-input", + .vector_size = sizeof (u32), + .n_errors = VXLAN_GBP_N_ERROR, + .error_strings = vxlan_gbp_error_strings, + .n_next_nodes = VXLAN_GBP_INPUT_N_NEXT, + .format_trace = format_vxlan_gbp_rx_trace, + .next_nodes = { +#define _(s,n) [VXLAN_GBP_INPUT_NEXT_##s] = n, + foreach_vxlan_gbp_input_next +#undef _ + }, +}; +VLIB_NODE_FUNCTION_MULTIARCH (vxlan4_gbp_input_node, vxlan4_gbp_input) + +VLIB_REGISTER_NODE (vxlan6_gbp_input_node) = +{ + .function = vxlan6_gbp_input, + .name = "vxlan6-gbp-input", + .vector_size = sizeof (u32), + .n_errors = VXLAN_GBP_N_ERROR, + .error_strings = vxlan_gbp_error_strings, + .n_next_nodes = VXLAN_GBP_INPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [VXLAN_GBP_INPUT_NEXT_##s] = n, + foreach_vxlan_gbp_input_next +#undef _ + }, + .format_trace = format_vxlan_gbp_rx_trace, +}; +VLIB_NODE_FUNCTION_MULTIARCH (vxlan6_gbp_input_node, vxlan6_gbp_input) +/* *INDENT-ON* */ + +typedef enum +{ + IP_VXLAN_GBP_BYPASS_NEXT_DROP, + IP_VXLAN_GBP_BYPASS_NEXT_VXLAN_GBP, + IP_VXLAN_GBP_BYPASS_N_NEXT, +} ip_vxan_gbp_bypass_next_t; + +always_inline uword +ip_vxlan_gbp_bypass_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, u32 is_ip4) +{ + vxlan_gbp_main_t *vxm = &vxlan_gbp_main; + u32 *from, *to_next, n_left_from, n_left_to_next, next_index; + vlib_node_runtime_t *error_node = + vlib_node_get_runtime (vm, ip4_input_node.index); + ip4_address_t addr4; /* last IPv4 address matching a local VTEP address */ + ip6_address_t addr6; /* last IPv6 address matching a local VTEP address */ + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + ip4_forward_next_trace (vm, node, frame, VLIB_TX); + + if (is_ip4) + addr4.data_u32 = ~0; + else + ip6_address_set_zero (&addr6); + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + vlib_buffer_t *b0, *b1; + ip4_header_t *ip40, *ip41; + ip6_header_t *ip60, *ip61; + udp_header_t *udp0, *udp1; + u32 bi0, ip_len0, udp_len0, flags0, next0; + u32 bi1, ip_len1, udp_len1, flags1, next1; + i32 len_diff0, len_diff1; + u8 error0, good_udp0, proto0; + u8 error1, good_udp1, proto1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p2, *p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (p3->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD); + } + + bi0 = to_next[0] = from[0]; + bi1 = to_next[1] = from[1]; + from += 2; + n_left_from -= 2; + to_next += 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + if (is_ip4) + { + ip40 = vlib_buffer_get_current (b0); + ip41 = vlib_buffer_get_current (b1); + } + else + { + ip60 = vlib_buffer_get_current (b0); + ip61 = vlib_buffer_get_current (b1); + } + + /* Setup packet for next IP feature */ + vnet_feature_next (&next0, b0); + vnet_feature_next (&next1, b1); + + if (is_ip4) + { + /* Treat IP frag packets as "experimental" protocol for now + until support of IP frag reassembly is implemented */ + proto0 = ip4_is_fragment (ip40) ? 0xfe : ip40->protocol; + proto1 = ip4_is_fragment (ip41) ? 0xfe : ip41->protocol; + } + else + { + proto0 = ip60->protocol; + proto1 = ip61->protocol; + } + + /* Process packet 0 */ + if (proto0 != IP_PROTOCOL_UDP) + goto exit0; /* not UDP packet */ + + if (is_ip4) + udp0 = ip4_next_header (ip40); + else + udp0 = ip6_next_header (ip60); + + if (udp0->dst_port != clib_host_to_net_u16 (UDP_DST_PORT_vxlan_gbp)) + goto exit0; /* not VXLAN_GBP packet */ + + /* Validate DIP against VTEPs */ + if (is_ip4) + { + if (addr4.as_u32 != ip40->dst_address.as_u32) + { + if (!hash_get (vxm->vtep4, ip40->dst_address.as_u32)) + goto exit0; /* no local VTEP for VXLAN_GBP packet */ + addr4 = ip40->dst_address; + } + } + else + { + if (!ip6_address_is_equal (&addr6, &ip60->dst_address)) + { + if (!hash_get_mem (vxm->vtep6, &ip60->dst_address)) + goto exit0; /* no local VTEP for VXLAN_GBP packet */ + addr6 = ip60->dst_address; + } + } + + flags0 = b0->flags; + good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; + + /* Don't verify UDP checksum for packets with explicit zero checksum. */ + good_udp0 |= udp0->checksum == 0; + + /* Verify UDP length */ + if (is_ip4) + ip_len0 = clib_net_to_host_u16 (ip40->length); + else + ip_len0 = clib_net_to_host_u16 (ip60->payload_length); + udp_len0 = clib_net_to_host_u16 (udp0->length); + len_diff0 = ip_len0 - udp_len0; + + /* Verify UDP checksum */ + if (PREDICT_FALSE (!good_udp0)) + { + if ((flags0 & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED) == 0) + { + if (is_ip4) + flags0 = ip4_tcp_udp_validate_checksum (vm, b0); + else + flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, b0); + good_udp0 = + (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; + } + } + + if (is_ip4) + { + error0 = good_udp0 ? 0 : IP4_ERROR_UDP_CHECKSUM; + error0 = (len_diff0 >= 0) ? error0 : IP4_ERROR_UDP_LENGTH; + } + else + { + error0 = good_udp0 ? 0 : IP6_ERROR_UDP_CHECKSUM; + error0 = (len_diff0 >= 0) ? error0 : IP6_ERROR_UDP_LENGTH; + } + + next0 = error0 ? + IP_VXLAN_GBP_BYPASS_NEXT_DROP : + IP_VXLAN_GBP_BYPASS_NEXT_VXLAN_GBP; + b0->error = error0 ? error_node->errors[error0] : 0; + + /* vxlan-gbp-input node expect current at VXLAN_GBP header */ + if (is_ip4) + vlib_buffer_advance (b0, + sizeof (ip4_header_t) + + sizeof (udp_header_t)); + else + vlib_buffer_advance (b0, + sizeof (ip6_header_t) + + sizeof (udp_header_t)); + + exit0: + /* Process packet 1 */ + if (proto1 != IP_PROTOCOL_UDP) + goto exit1; /* not UDP packet */ + + if (is_ip4) + udp1 = ip4_next_header (ip41); + else + udp1 = ip6_next_header (ip61); + + if (udp1->dst_port != clib_host_to_net_u16 (UDP_DST_PORT_vxlan_gbp)) + goto exit1; /* not VXLAN_GBP packet */ + + /* Validate DIP against VTEPs */ + if (is_ip4) + { + if (addr4.as_u32 != ip41->dst_address.as_u32) + { + if (!hash_get (vxm->vtep4, ip41->dst_address.as_u32)) + goto exit1; /* no local VTEP for VXLAN_GBP packet */ + addr4 = ip41->dst_address; + } + } + else + { + if (!ip6_address_is_equal (&addr6, &ip61->dst_address)) + { + if (!hash_get_mem (vxm->vtep6, &ip61->dst_address)) + goto exit1; /* no local VTEP for VXLAN_GBP packet */ + addr6 = ip61->dst_address; + } + } + + flags1 = b1->flags; + good_udp1 = (flags1 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; + + /* Don't verify UDP checksum for packets with explicit zero checksum. */ + good_udp1 |= udp1->checksum == 0; + + /* Verify UDP length */ + if (is_ip4) + ip_len1 = clib_net_to_host_u16 (ip41->length); + else + ip_len1 = clib_net_to_host_u16 (ip61->payload_length); + udp_len1 = clib_net_to_host_u16 (udp1->length); + len_diff1 = ip_len1 - udp_len1; + + /* Verify UDP checksum */ + if (PREDICT_FALSE (!good_udp1)) + { + if ((flags1 & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED) == 0) + { + if (is_ip4) + flags1 = ip4_tcp_udp_validate_checksum (vm, b1); + else + flags1 = ip6_tcp_udp_icmp_validate_checksum (vm, b1); + good_udp1 = + (flags1 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; + } + } + + if (is_ip4) + { + error1 = good_udp1 ? 0 : IP4_ERROR_UDP_CHECKSUM; + error1 = (len_diff1 >= 0) ? error1 : IP4_ERROR_UDP_LENGTH; + } + else + { + error1 = good_udp1 ? 0 : IP6_ERROR_UDP_CHECKSUM; + error1 = (len_diff1 >= 0) ? error1 : IP6_ERROR_UDP_LENGTH; + } + + next1 = error1 ? + IP_VXLAN_GBP_BYPASS_NEXT_DROP : + IP_VXLAN_GBP_BYPASS_NEXT_VXLAN_GBP; + b1->error = error1 ? error_node->errors[error1] : 0; + + /* vxlan_gbp-input node expect current at VXLAN_GBP header */ + if (is_ip4) + vlib_buffer_advance (b1, + sizeof (ip4_header_t) + + sizeof (udp_header_t)); + else + vlib_buffer_advance (b1, + sizeof (ip6_header_t) + + sizeof (udp_header_t)); + + exit1: + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t *b0; + ip4_header_t *ip40; + ip6_header_t *ip60; + udp_header_t *udp0; + u32 bi0, ip_len0, udp_len0, flags0, next0; + i32 len_diff0; + u8 error0, good_udp0, proto0; + + bi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + if (is_ip4) + ip40 = vlib_buffer_get_current (b0); + else + ip60 = vlib_buffer_get_current (b0); + + /* Setup packet for next IP feature */ + vnet_feature_next (&next0, b0); + + if (is_ip4) + /* Treat IP4 frag packets as "experimental" protocol for now + until support of IP frag reassembly is implemented */ + proto0 = ip4_is_fragment (ip40) ? 0xfe : ip40->protocol; + else + proto0 = ip60->protocol; + + if (proto0 != IP_PROTOCOL_UDP) + goto exit; /* not UDP packet */ + + if (is_ip4) + udp0 = ip4_next_header (ip40); + else + udp0 = ip6_next_header (ip60); + + if (udp0->dst_port != clib_host_to_net_u16 (UDP_DST_PORT_vxlan_gbp)) + goto exit; /* not VXLAN_GBP packet */ + + /* Validate DIP against VTEPs */ + if (is_ip4) + { + if (addr4.as_u32 != ip40->dst_address.as_u32) + { + if (!hash_get (vxm->vtep4, ip40->dst_address.as_u32)) + goto exit; /* no local VTEP for VXLAN_GBP packet */ + addr4 = ip40->dst_address; + } + } + else + { + if (!ip6_address_is_equal (&addr6, &ip60->dst_address)) + { + if (!hash_get_mem (vxm->vtep6, &ip60->dst_address)) + goto exit; /* no local VTEP for VXLAN_GBP packet */ + addr6 = ip60->dst_address; + } + } + + flags0 = b0->flags; + good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; + + /* Don't verify UDP checksum for packets with explicit zero checksum. */ + good_udp0 |= udp0->checksum == 0; + + /* Verify UDP length */ + if (is_ip4) + ip_len0 = clib_net_to_host_u16 (ip40->length); + else + ip_len0 = clib_net_to_host_u16 (ip60->payload_length); + udp_len0 = clib_net_to_host_u16 (udp0->length); + len_diff0 = ip_len0 - udp_len0; + + /* Verify UDP checksum */ + if (PREDICT_FALSE (!good_udp0)) + { + if ((flags0 & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED) == 0) + { + if (is_ip4) + flags0 = ip4_tcp_udp_validate_checksum (vm, b0); + else + flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, b0); + good_udp0 = + (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; + } + } + + if (is_ip4) + { + error0 = good_udp0 ? 0 : IP4_ERROR_UDP_CHECKSUM; + error0 = (len_diff0 >= 0) ? error0 : IP4_ERROR_UDP_LENGTH; + } + else + { + error0 = good_udp0 ? 0 : IP6_ERROR_UDP_CHECKSUM; + error0 = (len_diff0 >= 0) ? error0 : IP6_ERROR_UDP_LENGTH; + } + + next0 = error0 ? + IP_VXLAN_GBP_BYPASS_NEXT_DROP : + IP_VXLAN_GBP_BYPASS_NEXT_VXLAN_GBP; + b0->error = error0 ? error_node->errors[error0] : 0; + + /* vxlan_gbp-input node expect current at VXLAN_GBP header */ + if (is_ip4) + vlib_buffer_advance (b0, + sizeof (ip4_header_t) + + sizeof (udp_header_t)); + else + vlib_buffer_advance (b0, + sizeof (ip6_header_t) + + sizeof (udp_header_t)); + + exit: + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +static uword +ip4_vxlan_gbp_bypass (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return ip_vxlan_gbp_bypass_inline (vm, node, frame, /* is_ip4 */ 1); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip4_vxlan_gbp_bypass_node) = +{ + .function = ip4_vxlan_gbp_bypass, + .name = "ip4-vxlan-gbp-bypass", + .vector_size = sizeof (u32), + .n_next_nodes = IP_VXLAN_GBP_BYPASS_N_NEXT, + .next_nodes = { + [IP_VXLAN_GBP_BYPASS_NEXT_DROP] = "error-drop", + [IP_VXLAN_GBP_BYPASS_NEXT_VXLAN_GBP] = "vxlan4-gbp-input", + }, + .format_buffer = format_ip4_header, + .format_trace = format_ip4_forward_next_trace, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (ip4_vxlan_gbp_bypass_node, ip4_vxlan_gbp_bypass) +/* *INDENT-ON* */ + +/* Dummy init function to get us linked in. */ +clib_error_t * +ip4_vxlan_gbp_bypass_init (vlib_main_t * vm) +{ + return 0; +} + +VLIB_INIT_FUNCTION (ip4_vxlan_gbp_bypass_init); + +static uword +ip6_vxlan_gbp_bypass (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return ip_vxlan_gbp_bypass_inline (vm, node, frame, /* is_ip4 */ 0); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip6_vxlan_gbp_bypass_node) = +{ + .function = ip6_vxlan_gbp_bypass, + .name = "ip6-vxlan-gbp-bypass", + .vector_size = sizeof (u32), + .n_next_nodes = IP_VXLAN_GBP_BYPASS_N_NEXT, + .next_nodes = { + [IP_VXLAN_GBP_BYPASS_NEXT_DROP] = "error-drop", + [IP_VXLAN_GBP_BYPASS_NEXT_VXLAN_GBP] = "vxlan6-gbp-input", + }, + .format_buffer = format_ip6_header, + .format_trace = format_ip6_forward_next_trace, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (ip6_vxlan_gbp_bypass_node, ip6_vxlan_gbp_bypass) +/* *INDENT-ON* */ + +/* Dummy init function to get us linked in. */ +clib_error_t * +ip6_vxlan_gbp_bypass_init (vlib_main_t * vm) +{ + return 0; +} + +VLIB_INIT_FUNCTION (ip6_vxlan_gbp_bypass_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/vxlan-gbp/dir.dox b/src/vnet/vxlan-gbp/dir.dox new file mode 100644 index 00000000000..6e63c90b17b --- /dev/null +++ b/src/vnet/vxlan-gbp/dir.dox @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2018 Cisco and/or its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** +@dir +@brief VXLAN-GBP Code. + +This directory contains source code to support VXLAN-GBP. + +*/ +/*? %%clicmd:group_label VXLAN-GBP CLI %% ?*/ diff --git a/src/vnet/vxlan-gbp/encap.c b/src/vnet/vxlan-gbp/encap.c new file mode 100644 index 00000000000..07142c8a521 --- /dev/null +++ b/src/vnet/vxlan-gbp/encap.c @@ -0,0 +1,543 @@ +/* + * Copyright (c) 2018 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vppinfra/error.h> +#include <vppinfra/hash.h> +#include <vnet/vnet.h> +#include <vnet/ip/ip.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/vxlan-gbp/vxlan_gbp.h> +#include <vnet/qos/qos_types.h> +#include <vnet/adj/rewrite.h> + +/* Statistics (not all errors) */ +#define foreach_vxlan_gbp_encap_error \ +_(ENCAPSULATED, "good packets encapsulated") + +static char *vxlan_gbp_encap_error_strings[] = { +#define _(sym,string) string, + foreach_vxlan_gbp_encap_error +#undef _ +}; + +typedef enum +{ +#define _(sym,str) VXLAN_GBP_ENCAP_ERROR_##sym, + foreach_vxlan_gbp_encap_error +#undef _ + VXLAN_GBP_ENCAP_N_ERROR, +} vxlan_gbp_encap_error_t; + +typedef enum +{ + VXLAN_GBP_ENCAP_NEXT_DROP, + VXLAN_GBP_ENCAP_N_NEXT, +} vxlan_gbp_encap_next_t; + +typedef struct +{ + u32 tunnel_index; + u32 vni; + u16 sclass; +} vxlan_gbp_encap_trace_t; + +u8 * +format_vxlan_gbp_encap_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + vxlan_gbp_encap_trace_t *t = va_arg (*args, vxlan_gbp_encap_trace_t *); + + s = format (s, "VXLAN_GBP encap to vxlan_gbp_tunnel%d vni %d sclass %d", + t->tunnel_index, t->vni, t->sclass); + return s; +} + +always_inline uword +vxlan_gbp_encap_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame, u8 is_ip4, u8 csum_offload) +{ + u32 n_left_from, next_index, *from, *to_next; + vxlan_gbp_main_t *vxm = &vxlan_gbp_main; + vnet_main_t *vnm = vxm->vnet_main; + vnet_interface_main_t *im = &vnm->interface_main; + vlib_combined_counter_main_t *tx_counter = + im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX; + u32 pkts_encapsulated = 0; + u32 thread_index = vlib_get_thread_index (); + u32 sw_if_index0 = 0, sw_if_index1 = 0; + u32 next0 = 0, next1 = 0; + vxlan_gbp_tunnel_t *t0 = NULL, *t1 = NULL; + index_t dpoi_idx0 = INDEX_INVALID, dpoi_idx1 = INDEX_INVALID; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + STATIC_ASSERT_SIZEOF (ip6_vxlan_gbp_header_t, 56); + STATIC_ASSERT_SIZEOF (ip4_vxlan_gbp_header_t, 36); + + u8 const underlay_hdr_len = is_ip4 ? + sizeof (ip4_vxlan_gbp_header_t) : sizeof (ip6_vxlan_gbp_header_t); + u8 const rw_hdr_offset = sizeof t0->rewrite_data - underlay_hdr_len; + u16 const l3_len = is_ip4 ? sizeof (ip4_header_t) : sizeof (ip6_header_t); + u32 const csum_flags = is_ip4 ? + VNET_BUFFER_F_OFFLOAD_IP_CKSUM | VNET_BUFFER_F_IS_IP4 | + VNET_BUFFER_F_OFFLOAD_UDP_CKSUM : VNET_BUFFER_F_OFFLOAD_UDP_CKSUM; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + /* Prefetch next iteration. */ + { + vlib_buffer_t *p2, *p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (p3->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD); + } + + u32 bi0 = to_next[0] = from[0]; + u32 bi1 = to_next[1] = from[1]; + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); + vlib_buffer_t *b1 = vlib_get_buffer (vm, bi1); + u32 flow_hash0 = vnet_l2_compute_flow_hash (b0); + u32 flow_hash1 = vnet_l2_compute_flow_hash (b1); + + /* Get next node index and adj index from tunnel next_dpo */ + if (sw_if_index0 != vnet_buffer (b0)->sw_if_index[VLIB_TX]) + { + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX]; + vnet_hw_interface_t *hi0 = + vnet_get_sup_hw_interface (vnm, sw_if_index0); + t0 = &vxm->tunnels[hi0->dev_instance]; + /* Note: change to always set next0 if it may set to drop */ + next0 = t0->next_dpo.dpoi_next_node; + dpoi_idx0 = t0->next_dpo.dpoi_index; + } + + /* Get next node index and adj index from tunnel next_dpo */ + if (sw_if_index1 != vnet_buffer (b1)->sw_if_index[VLIB_TX]) + { + if (sw_if_index0 == vnet_buffer (b1)->sw_if_index[VLIB_TX]) + { + sw_if_index1 = sw_if_index0; + t1 = t0; + next1 = next0; + dpoi_idx1 = dpoi_idx0; + } + else + { + sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_TX]; + vnet_hw_interface_t *hi1 = + vnet_get_sup_hw_interface (vnm, sw_if_index1); + t1 = &vxm->tunnels[hi1->dev_instance]; + /* Note: change to always set next1 if it may set to drop */ + next1 = t1->next_dpo.dpoi_next_node; + dpoi_idx1 = t1->next_dpo.dpoi_index; + } + } + + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpoi_idx0; + vnet_buffer (b1)->ip.adj_index[VLIB_TX] = dpoi_idx1; + + ASSERT (t0->rewrite_header.data_bytes == underlay_hdr_len); + ASSERT (t1->rewrite_header.data_bytes == underlay_hdr_len); + + vlib_buffer_advance (b0, -underlay_hdr_len); + vlib_buffer_advance (b1, -underlay_hdr_len); + + u32 len0 = vlib_buffer_length_in_chain (vm, b0); + u32 len1 = vlib_buffer_length_in_chain (vm, b1); + u16 payload_l0 = clib_host_to_net_u16 (len0 - l3_len); + u16 payload_l1 = clib_host_to_net_u16 (len1 - l3_len); + + void *underlay0 = vlib_buffer_get_current (b0); + void *underlay1 = vlib_buffer_get_current (b1); + + /* vnet_rewrite_two_header writes only in (uword) 8 bytes chunks + * and discards the first 4 bytes of the (36 bytes ip4 underlay) rewrite + * use memcpy as a workaround */ + clib_memcpy (underlay0, t0->rewrite_header.data + rw_hdr_offset, + underlay_hdr_len); + clib_memcpy (underlay1, t1->rewrite_header.data + rw_hdr_offset, + underlay_hdr_len); + + ip4_header_t *ip4_0, *ip4_1; + qos_bits_t ip4_0_tos = 0, ip4_1_tos = 0; + ip6_header_t *ip6_0, *ip6_1; + udp_header_t *udp0, *udp1; + vxlan_gbp_header_t *vxlan_gbp0, *vxlan_gbp1; + u8 *l3_0, *l3_1; + if (is_ip4) + { + ip4_vxlan_gbp_header_t *hdr0 = underlay0; + ip4_vxlan_gbp_header_t *hdr1 = underlay1; + + /* Fix the IP4 checksum and length */ + ip4_0 = &hdr0->ip4; + ip4_1 = &hdr1->ip4; + ip4_0->length = clib_host_to_net_u16 (len0); + ip4_1->length = clib_host_to_net_u16 (len1); + + if (PREDICT_FALSE (b0->flags & VNET_BUFFER_F_QOS_DATA_VALID)) + { + ip4_0_tos = vnet_buffer2 (b0)->qos.bits; + ip4_0->tos = ip4_0_tos; + } + if (PREDICT_FALSE (b1->flags & VNET_BUFFER_F_QOS_DATA_VALID)) + { + ip4_1_tos = vnet_buffer2 (b1)->qos.bits; + ip4_1->tos = ip4_1_tos; + } + + l3_0 = (u8 *) ip4_0; + l3_1 = (u8 *) ip4_1; + udp0 = &hdr0->udp; + udp1 = &hdr1->udp; + vxlan_gbp0 = &hdr0->vxlan_gbp; + vxlan_gbp1 = &hdr1->vxlan_gbp; + } + else /* ipv6 */ + { + ip6_vxlan_gbp_header_t *hdr0 = underlay0; + ip6_vxlan_gbp_header_t *hdr1 = underlay1; + + /* Fix IP6 payload length */ + ip6_0 = &hdr0->ip6; + ip6_1 = &hdr1->ip6; + ip6_0->payload_length = payload_l0; + ip6_1->payload_length = payload_l1; + + l3_0 = (u8 *) ip6_0; + l3_1 = (u8 *) ip6_1; + udp0 = &hdr0->udp; + udp1 = &hdr1->udp; + vxlan_gbp0 = &hdr0->vxlan_gbp; + vxlan_gbp1 = &hdr1->vxlan_gbp; + } + + /* Fix UDP length and set source port */ + udp0->length = payload_l0; + udp0->src_port = flow_hash0; + udp1->length = payload_l1; + udp1->src_port = flow_hash1; + + /* set source class and gpflags */ + vxlan_gbp0->gpflags = vnet_buffer2 (b0)->gbp.flags; + vxlan_gbp1->gpflags = vnet_buffer2 (b1)->gbp.flags; + vxlan_gbp0->sclass = + clib_host_to_net_u16 (vnet_buffer2 (b0)->gbp.src_epg); + vxlan_gbp1->sclass = + clib_host_to_net_u16 (vnet_buffer2 (b1)->gbp.src_epg); + + if (csum_offload) + { + b0->flags |= csum_flags; + vnet_buffer (b0)->l3_hdr_offset = l3_0 - b0->data; + vnet_buffer (b0)->l4_hdr_offset = (u8 *) udp0 - b0->data; + b1->flags |= csum_flags; + vnet_buffer (b1)->l3_hdr_offset = l3_1 - b1->data; + vnet_buffer (b1)->l4_hdr_offset = (u8 *) udp1 - b1->data; + } + /* IPv4 UDP checksum only if checksum offload is used */ + else if (is_ip4) + { + ip_csum_t sum0 = ip4_0->checksum; + sum0 = ip_csum_update (sum0, 0, ip4_0->length, ip4_header_t, + length /* changed member */ ); + if (PREDICT_FALSE (ip4_0_tos)) + { + sum0 = ip_csum_update (sum0, 0, ip4_0_tos, ip4_header_t, + tos /* changed member */ ); + } + ip4_0->checksum = ip_csum_fold (sum0); + ip_csum_t sum1 = ip4_1->checksum; + sum1 = ip_csum_update (sum1, 0, ip4_1->length, ip4_header_t, + length /* changed member */ ); + if (PREDICT_FALSE (ip4_1_tos)) + { + sum1 = ip_csum_update (sum1, 0, ip4_1_tos, ip4_header_t, + tos /* changed member */ ); + } + ip4_1->checksum = ip_csum_fold (sum1); + } + /* IPv6 UDP checksum is mandatory */ + else + { + int bogus = 0; + + udp0->checksum = ip6_tcp_udp_icmp_compute_checksum + (vm, b0, ip6_0, &bogus); + ASSERT (bogus == 0); + if (udp0->checksum == 0) + udp0->checksum = 0xffff; + udp1->checksum = ip6_tcp_udp_icmp_compute_checksum + (vm, b1, ip6_1, &bogus); + ASSERT (bogus == 0); + if (udp1->checksum == 0) + udp1->checksum = 0xffff; + } + + vlib_increment_combined_counter (tx_counter, thread_index, + sw_if_index0, 1, len0); + vlib_increment_combined_counter (tx_counter, thread_index, + sw_if_index1, 1, len1); + pkts_encapsulated += 2; + + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + vxlan_gbp_encap_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->tunnel_index = t0 - vxm->tunnels; + tr->vni = t0->vni; + tr->sclass = vnet_buffer2 (b0)->gbp.src_epg; + } + + if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) + { + vxlan_gbp_encap_trace_t *tr = + vlib_add_trace (vm, node, b1, sizeof (*tr)); + tr->tunnel_index = t1 - vxm->tunnels; + tr->vni = t1->vni; + tr->sclass = vnet_buffer2 (b1)->gbp.src_epg; + } + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0 = to_next[0] = from[0]; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); + u32 flow_hash0 = vnet_l2_compute_flow_hash (b0); + + /* Get next node index and adj index from tunnel next_dpo */ + if (sw_if_index0 != vnet_buffer (b0)->sw_if_index[VLIB_TX]) + { + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX]; + vnet_hw_interface_t *hi0 = + vnet_get_sup_hw_interface (vnm, sw_if_index0); + t0 = &vxm->tunnels[hi0->dev_instance]; + /* Note: change to always set next0 if it may be set to drop */ + next0 = t0->next_dpo.dpoi_next_node; + dpoi_idx0 = t0->next_dpo.dpoi_index; + } + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpoi_idx0; + + ASSERT (t0->rewrite_header.data_bytes == underlay_hdr_len); + + vlib_buffer_advance (b0, -underlay_hdr_len); + void *underlay0 = vlib_buffer_get_current (b0); + + /* vnet_rewrite_one_header writes only in (uword) 8 bytes chunks + * and discards the first 4 bytes of the (36 bytes ip4 underlay) rewrite + * use memcpy as a workaround */ + clib_memcpy (underlay0, t0->rewrite_header.data + rw_hdr_offset, + underlay_hdr_len); + + u32 len0 = vlib_buffer_length_in_chain (vm, b0); + u16 payload_l0 = clib_host_to_net_u16 (len0 - l3_len); + + vxlan_gbp_header_t *vxlan_gbp0; + udp_header_t *udp0; + ip4_header_t *ip4_0; + qos_bits_t ip4_0_tos = 0; + ip6_header_t *ip6_0; + u8 *l3_0; + if (is_ip4) + { + ip4_vxlan_gbp_header_t *hdr = underlay0; + + /* Fix the IP4 checksum and length */ + ip4_0 = &hdr->ip4; + ip4_0->length = clib_host_to_net_u16 (len0); + + if (PREDICT_FALSE (b0->flags & VNET_BUFFER_F_QOS_DATA_VALID)) + { + ip4_0_tos = vnet_buffer2 (b0)->qos.bits; + ip4_0->tos = ip4_0_tos; + } + + l3_0 = (u8 *) ip4_0; + udp0 = &hdr->udp; + vxlan_gbp0 = &hdr->vxlan_gbp; + } + else /* ip6 path */ + { + ip6_vxlan_gbp_header_t *hdr = underlay0; + + /* Fix IP6 payload length */ + ip6_0 = &hdr->ip6; + ip6_0->payload_length = payload_l0; + + l3_0 = (u8 *) ip6_0; + udp0 = &hdr->udp; + vxlan_gbp0 = &hdr->vxlan_gbp; + } + + /* Fix UDP length and set source port */ + udp0->length = payload_l0; + udp0->src_port = flow_hash0; + + /* set source class and gpflags */ + vxlan_gbp0->gpflags = vnet_buffer2 (b0)->gbp.flags; + vxlan_gbp0->sclass = + clib_host_to_net_u16 (vnet_buffer2 (b0)->gbp.src_epg); + + if (csum_offload) + { + b0->flags |= csum_flags; + vnet_buffer (b0)->l3_hdr_offset = l3_0 - b0->data; + vnet_buffer (b0)->l4_hdr_offset = (u8 *) udp0 - b0->data; + } + /* IPv4 UDP checksum only if checksum offload is used */ + else if (is_ip4) + { + ip_csum_t sum0 = ip4_0->checksum; + sum0 = ip_csum_update (sum0, 0, ip4_0->length, ip4_header_t, + length /* changed member */ ); + if (PREDICT_FALSE (ip4_0_tos)) + { + sum0 = ip_csum_update (sum0, 0, ip4_0_tos, ip4_header_t, + tos /* changed member */ ); + } + ip4_0->checksum = ip_csum_fold (sum0); + } + /* IPv6 UDP checksum is mandatory */ + else + { + int bogus = 0; + + udp0->checksum = ip6_tcp_udp_icmp_compute_checksum + (vm, b0, ip6_0, &bogus); + ASSERT (bogus == 0); + if (udp0->checksum == 0) + udp0->checksum = 0xffff; + } + + vlib_increment_combined_counter (tx_counter, thread_index, + sw_if_index0, 1, len0); + pkts_encapsulated++; + + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + vxlan_gbp_encap_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->tunnel_index = t0 - vxm->tunnels; + tr->vni = t0->vni; + tr->sclass = vnet_buffer2 (b0)->gbp.src_epg; + } + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + /* Do we still need this now that tunnel tx stats is kept? */ + vlib_node_increment_counter (vm, node->node_index, + VXLAN_GBP_ENCAP_ERROR_ENCAPSULATED, + pkts_encapsulated); + + return from_frame->n_vectors; +} + +static uword +vxlan4_gbp_encap (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * from_frame) +{ + /* Disable chksum offload as setup overhead in tx node is not worthwhile + for ip4 header checksum only, unless udp checksum is also required */ + return vxlan_gbp_encap_inline (vm, node, from_frame, /* is_ip4 */ 1, + /* csum_offload */ 0); +} + +static uword +vxlan6_gbp_encap (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * from_frame) +{ + /* Enable checksum offload for ip6 as udp checksum is mandatory, */ + return vxlan_gbp_encap_inline (vm, node, from_frame, /* is_ip4 */ 0, + /* csum_offload */ 1); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (vxlan4_gbp_encap_node) = +{ + .function = vxlan4_gbp_encap, + .name = "vxlan4-gbp-encap", + .vector_size = sizeof (u32), + .format_trace = format_vxlan_gbp_encap_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN (vxlan_gbp_encap_error_strings), + .error_strings = vxlan_gbp_encap_error_strings, + .n_next_nodes = VXLAN_GBP_ENCAP_N_NEXT, + .next_nodes = { + [VXLAN_GBP_ENCAP_NEXT_DROP] = "error-drop", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (vxlan4_gbp_encap_node, vxlan4_gbp_encap) + +VLIB_REGISTER_NODE (vxlan6_gbp_encap_node) = +{ + .function = vxlan6_gbp_encap, + .name = "vxlan6-gbp-encap", + .vector_size = sizeof (u32), + .format_trace = format_vxlan_gbp_encap_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN (vxlan_gbp_encap_error_strings), + .error_strings = vxlan_gbp_encap_error_strings, + .n_next_nodes = VXLAN_GBP_ENCAP_N_NEXT, + .next_nodes = { + [VXLAN_GBP_ENCAP_NEXT_DROP] = "error-drop", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (vxlan6_gbp_encap_node, vxlan6_gbp_encap) +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/vxlan-gbp/vxlan_gbp.api b/src/vnet/vxlan-gbp/vxlan_gbp.api new file mode 100644 index 00000000000..ecb5ba27f18 --- /dev/null +++ b/src/vnet/vxlan-gbp/vxlan_gbp.api @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2018 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +option version = "1.0.0"; + +/** \brief Create or delete a VXLAN-GBP tunnel + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param is_add - Use 1 to create the tunnel, 0 to remove it + @param is_ipv6 - Use 0 for IPv4, 1 for IPv6 + @param instance - optional unique custom device instance, else ~0. + @param src_address - Source IP address + @param dst_address - Destination IP address, can be multicast + @param mcast_sw_if_index - Interface for multicast destination + @param encap_vrf_id - Encap route table FIB index + @param decap_next_index - Name of decap next graph node + @param vni - The VXLAN Network Identifier, uint24 + @param sclass - The policy group id +*/ +define vxlan_gbp_add_del_tunnel +{ + u32 client_index; + u32 context; + u8 is_add; + u8 is_ipv6; + u32 instance; /* If non-~0, specifies a custom dev instance */ + u8 src_address[16]; + u8 dst_address[16]; + u32 mcast_sw_if_index; + u32 encap_vrf_id; + u32 decap_next_index; + u32 vni; + u16 sclass; +}; + +define vxlan_gbp_add_del_tunnel_reply +{ + u32 context; + i32 retval; + u32 sw_if_index; +}; + +define vxlan_gbp_tunnel_dump +{ + u32 client_index; + u32 context; + u32 sw_if_index; +}; + +define vxlan_gbp_tunnel_details +{ + u32 context; + u32 sw_if_index; + u32 instance; + u8 src_address[16]; + u8 dst_address[16]; + u32 mcast_sw_if_index; + u32 encap_vrf_id; + u32 decap_next_index; + u32 vni; + u16 sclass; + u8 is_ipv6; +}; + +/** \brief Interface set vxlan-bypass request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - interface used to reach neighbor + @param is_ipv6 - if non-zero, enable ipv6-vxlan-bypass, else ipv4-vxlan-bypass + @param enable - if non-zero enable, else disable +*/ +autoreply define sw_interface_set_vxlan_gbp_bypass +{ + u32 client_index; + u32 context; + u32 sw_if_index; + u8 is_ipv6; + u8 enable; +}; + diff --git a/src/vnet/vxlan-gbp/vxlan_gbp.c b/src/vnet/vxlan-gbp/vxlan_gbp.c new file mode 100644 index 00000000000..14b3e667dbf --- /dev/null +++ b/src/vnet/vxlan-gbp/vxlan_gbp.c @@ -0,0 +1,1109 @@ +/* + * Copyright (c) 2018 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vnet/vxlan-gbp/vxlan_gbp.h> +#include <vnet/ip/format.h> +#include <vnet/fib/fib_entry.h> +#include <vnet/fib/fib_table.h> +#include <vnet/mfib/mfib_table.h> +#include <vnet/adj/adj_mcast.h> +#include <vnet/adj/rewrite.h> +#include <vnet/interface.h> +#include <vlib/vlib.h> + +/** + * @file + * @brief VXLAN GBP. + * + * VXLAN GBP provides the features of vxlan and carry group policy id. + */ + + +vxlan_gbp_main_t vxlan_gbp_main; + +static u8 * +format_decap_next (u8 * s, va_list * args) +{ + u32 next_index = va_arg (*args, u32); + + if (next_index == VXLAN_GBP_INPUT_NEXT_DROP) + return format (s, "drop"); + else + return format (s, "index %d", next_index); + return s; +} + +u8 * +format_vxlan_gbp_tunnel (u8 * s, va_list * args) +{ + vxlan_gbp_tunnel_t *t = va_arg (*args, vxlan_gbp_tunnel_t *); + + s = format (s, + "[%d] instance %d src %U dst %U vni %d fib-idx %d" + " sw-if-idx %d ", + t->dev_instance, t->user_instance, + format_ip46_address, &t->src, IP46_TYPE_ANY, + format_ip46_address, &t->dst, IP46_TYPE_ANY, + t->vni, t->encap_fib_index, t->sw_if_index); + + s = format (s, "encap-dpo-idx %d ", t->next_dpo.dpoi_index); + + if (PREDICT_FALSE (t->decap_next_index != VXLAN_GBP_INPUT_NEXT_L2_INPUT)) + s = format (s, "decap-next-%U ", format_decap_next, t->decap_next_index); + + if (PREDICT_FALSE (ip46_address_is_multicast (&t->dst))) + s = format (s, "mcast-sw-if-idx %d ", t->mcast_sw_if_index); + + return s; +} + +static u8 * +format_vxlan_gbp_name (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + vxlan_gbp_main_t *vxm = &vxlan_gbp_main; + vxlan_gbp_tunnel_t *t; + + if (dev_instance == ~0) + return format (s, "<cached-unused>"); + + if (dev_instance >= vec_len (vxm->tunnels)) + return format (s, "<improperly-referenced>"); + + t = pool_elt_at_index (vxm->tunnels, dev_instance); + + return format (s, "vxlan_gbp_tunnel%d", t->user_instance); +} + +static clib_error_t * +vxlan_gbp_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, + u32 flags) +{ + u32 hw_flags = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ? + VNET_HW_INTERFACE_FLAG_LINK_UP : 0; + vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags); + + return /* no error */ 0; +} + +/* *INDENT-OFF* */ +VNET_DEVICE_CLASS (vxlan_gbp_device_class, static) = { + .name = "VXLAN-GBP", + .format_device_name = format_vxlan_gbp_name, + .format_tx_trace = format_vxlan_gbp_encap_trace, + .admin_up_down_function = vxlan_gbp_interface_admin_up_down, +}; +/* *INDENT-ON* */ + +static u8 * +format_vxlan_gbp_header_with_length (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + s = format (s, "unimplemented dev %u", dev_instance); + return s; +} + +/* *INDENT-OFF* */ +VNET_HW_INTERFACE_CLASS (vxlan_gbp_hw_class) = { + .name = "VXLAN-GBP", + .format_header = format_vxlan_gbp_header_with_length, + .build_rewrite = default_build_rewrite, +}; +/* *INDENT-ON* */ + +static void +vxlan_gbp_tunnel_restack_dpo (vxlan_gbp_tunnel_t * t) +{ + u8 is_ip4 = ip46_address_is_ip4 (&t->dst); + dpo_id_t dpo = DPO_INVALID; + fib_forward_chain_type_t forw_type = is_ip4 ? + FIB_FORW_CHAIN_TYPE_UNICAST_IP4 : FIB_FORW_CHAIN_TYPE_UNICAST_IP6; + + fib_entry_contribute_forwarding (t->fib_entry_index, forw_type, &dpo); + + /* vxlan_gbp uses the payload hash as the udp source port + * hence the packet's hash is unknown + * skip single bucket load balance dpo's */ + while (DPO_LOAD_BALANCE == dpo.dpoi_type) + { + load_balance_t *lb = load_balance_get (dpo.dpoi_index); + if (lb->lb_n_buckets > 1) + break; + + dpo_copy (&dpo, load_balance_get_bucket_i (lb, 0)); + } + + u32 encap_index = is_ip4 ? + vxlan4_gbp_encap_node.index : vxlan6_gbp_encap_node.index; + dpo_stack_from_node (encap_index, &t->next_dpo, &dpo); + dpo_reset (&dpo); +} + +static vxlan_gbp_tunnel_t * +vxlan_gbp_tunnel_from_fib_node (fib_node_t * node) +{ + ASSERT (FIB_NODE_TYPE_VXLAN_GBP_TUNNEL == node->fn_type); + return ((vxlan_gbp_tunnel_t *) (((char *) node) - + STRUCT_OFFSET_OF (vxlan_gbp_tunnel_t, + node))); +} + +/** + * Function definition to backwalk a FIB node - + * Here we will restack the new dpo of VXLAN DIP to encap node. + */ +static fib_node_back_walk_rc_t +vxlan_gbp_tunnel_back_walk (fib_node_t * node, fib_node_back_walk_ctx_t * ctx) +{ + vxlan_gbp_tunnel_restack_dpo (vxlan_gbp_tunnel_from_fib_node (node)); + return (FIB_NODE_BACK_WALK_CONTINUE); +} + +/** + * Function definition to get a FIB node from its index + */ +static fib_node_t * +vxlan_gbp_tunnel_fib_node_get (fib_node_index_t index) +{ + vxlan_gbp_tunnel_t *t; + vxlan_gbp_main_t *vxm = &vxlan_gbp_main; + + t = pool_elt_at_index (vxm->tunnels, index); + + return (&t->node); +} + +/** + * Function definition to inform the FIB node that its last lock has gone. + */ +static void +vxlan_gbp_tunnel_last_lock_gone (fib_node_t * node) +{ + /* + * The VXLAN GBP tunnel is a root of the graph. As such + * it never has children and thus is never locked. + */ + ASSERT (0); +} + +/* + * Virtual function table registered by VXLAN GBP tunnels + * for participation in the FIB object graph. + */ +const static fib_node_vft_t vxlan_gbp_vft = { + .fnv_get = vxlan_gbp_tunnel_fib_node_get, + .fnv_last_lock = vxlan_gbp_tunnel_last_lock_gone, + .fnv_back_walk = vxlan_gbp_tunnel_back_walk, +}; + + +#define foreach_copy_field \ +_(vni) \ +_(mcast_sw_if_index) \ +_(encap_fib_index) \ +_(decap_next_index) \ +_(src) \ +_(dst) + +static void +vxlan_gbp_rewrite (vxlan_gbp_tunnel_t * t, bool is_ip6) +{ + union + { + ip4_vxlan_gbp_header_t h4; + ip6_vxlan_gbp_header_t h6; + } h; + int len = is_ip6 ? sizeof h.h6 : sizeof h.h4; + + udp_header_t *udp; + vxlan_gbp_header_t *vxlan_gbp; + /* Fixed portion of the (outer) ip header */ + + memset (&h, 0, sizeof (h)); + if (!is_ip6) + { + ip4_header_t *ip = &h.h4.ip4; + udp = &h.h4.udp, vxlan_gbp = &h.h4.vxlan_gbp; + ip->ip_version_and_header_length = 0x45; + ip->ttl = 254; + ip->protocol = IP_PROTOCOL_UDP; + + ip->src_address = t->src.ip4; + ip->dst_address = t->dst.ip4; + + /* we fix up the ip4 header length and checksum after-the-fact */ + ip->checksum = ip4_header_checksum (ip); + } + else + { + ip6_header_t *ip = &h.h6.ip6; + udp = &h.h6.udp, vxlan_gbp = &h.h6.vxlan_gbp; + ip->ip_version_traffic_class_and_flow_label = + clib_host_to_net_u32 (6 << 28); + ip->hop_limit = 255; + ip->protocol = IP_PROTOCOL_UDP; + + ip->src_address = t->src.ip6; + ip->dst_address = t->dst.ip6; + } + + /* UDP header, randomize src port on something, maybe? */ + udp->src_port = clib_host_to_net_u16 (47789); + udp->dst_port = clib_host_to_net_u16 (UDP_DST_PORT_vxlan_gbp); + + /* VXLAN header */ + vxlan_gbp_set_header (vxlan_gbp, t->vni); + vnet_rewrite_set_data (*t, &h, len); +} + +static bool +vxlan_gbp_decap_next_is_valid (vxlan_gbp_main_t * vxm, u32 is_ip6, + u32 decap_next_index) +{ + vlib_main_t *vm = vxm->vlib_main; + u32 input_idx = (!is_ip6) ? + vxlan4_gbp_input_node.index : vxlan6_gbp_input_node.index; + vlib_node_runtime_t *r = vlib_node_get_runtime (vm, input_idx); + + return decap_next_index < r->n_next_nodes; +} + +static uword +vtep_addr_ref (ip46_address_t * ip) +{ + uword *vtep = ip46_address_is_ip4 (ip) ? + hash_get (vxlan_gbp_main.vtep4, ip->ip4.as_u32) : + hash_get_mem (vxlan_gbp_main.vtep6, &ip->ip6); + if (vtep) + return ++(*vtep); + ip46_address_is_ip4 (ip) ? + hash_set (vxlan_gbp_main.vtep4, ip->ip4.as_u32, 1) : + hash_set_mem_alloc (&vxlan_gbp_main.vtep6, &ip->ip6, 1); + return 1; +} + +static uword +vtep_addr_unref (ip46_address_t * ip) +{ + uword *vtep = ip46_address_is_ip4 (ip) ? + hash_get (vxlan_gbp_main.vtep4, ip->ip4.as_u32) : + hash_get_mem (vxlan_gbp_main.vtep6, &ip->ip6); + ASSERT (vtep); + if (--(*vtep) != 0) + return *vtep; + ip46_address_is_ip4 (ip) ? + hash_unset (vxlan_gbp_main.vtep4, ip->ip4.as_u32) : + hash_unset_mem_free (&vxlan_gbp_main.vtep6, &ip->ip6); + return 0; +} + +/* *INDENT-OFF* */ +typedef CLIB_PACKED(union +{ + struct + { + fib_node_index_t mfib_entry_index; + adj_index_t mcast_adj_index; + }; + u64 as_u64; +}) mcast_shared_t; +/* *INDENT-ON* */ + +static inline mcast_shared_t +mcast_shared_get (ip46_address_t * ip) +{ + ASSERT (ip46_address_is_multicast (ip)); + uword *p = hash_get_mem (vxlan_gbp_main.mcast_shared, ip); + ASSERT (p); + mcast_shared_t ret = {.as_u64 = *p }; + return ret; +} + +static inline void +mcast_shared_add (ip46_address_t * dst, fib_node_index_t mfei, adj_index_t ai) +{ + mcast_shared_t new_ep = { + .mcast_adj_index = ai, + .mfib_entry_index = mfei, + }; + + hash_set_mem_alloc (&vxlan_gbp_main.mcast_shared, dst, new_ep.as_u64); +} + +static inline void +mcast_shared_remove (ip46_address_t * dst) +{ + mcast_shared_t ep = mcast_shared_get (dst); + + adj_unlock (ep.mcast_adj_index); + mfib_table_entry_delete_index (ep.mfib_entry_index, MFIB_SOURCE_VXLAN_GBP); + + hash_unset_mem_free (&vxlan_gbp_main.mcast_shared, dst); +} + +int vnet_vxlan_gbp_add_del_tunnel + (vnet_vxlan_gbp_add_del_tunnel_args_t * a, u32 * sw_if_indexp) +{ + vxlan_gbp_main_t *vxm = &vxlan_gbp_main; + vxlan_gbp_tunnel_t *t = 0; + vnet_main_t *vnm = vxm->vnet_main; + u64 *p; + u32 sw_if_index = ~0; + vxlan4_gbp_tunnel_key_t key4; + vxlan6_gbp_tunnel_key_t key6; + u32 is_ip6 = a->is_ip6; + + int not_found; + if (!is_ip6) + { + key4.key[0] = a->dst.ip4.as_u32; + key4.key[1] = (((u64) a->encap_fib_index) << 32) + | clib_host_to_net_u32 (a->vni << 8); + not_found = + clib_bihash_search_inline_16_8 (&vxm->vxlan4_gbp_tunnel_by_key, + &key4); + p = &key4.value; + } + else + { + key6.key[0] = a->dst.ip6.as_u64[0]; + key6.key[1] = a->dst.ip6.as_u64[1]; + key6.key[2] = (((u64) a->encap_fib_index) << 32) + | clib_host_to_net_u32 (a->vni << 8); + not_found = + clib_bihash_search_inline_24_8 (&vxm->vxlan6_gbp_tunnel_by_key, + &key6); + p = &key6.value; + } + + if (not_found) + p = 0; + + if (a->is_add) + { + l2input_main_t *l2im = &l2input_main; + u32 dev_instance; /* real dev instance tunnel index */ + u32 user_instance; /* request and actual instance number */ + + /* adding a tunnel: tunnel must not already exist */ + if (p) + return VNET_API_ERROR_TUNNEL_EXIST; + + /* if not set explicitly, default to l2 */ + if (a->decap_next_index == ~0) + a->decap_next_index = VXLAN_GBP_INPUT_NEXT_L2_INPUT; + if (!vxlan_gbp_decap_next_is_valid (vxm, is_ip6, a->decap_next_index)) + return VNET_API_ERROR_INVALID_DECAP_NEXT; + + pool_get_aligned (vxm->tunnels, t, CLIB_CACHE_LINE_BYTES); + memset (t, 0, sizeof (*t)); + dev_instance = t - vxm->tunnels; + + /* copy from arg structure */ +#define _(x) t->x = a->x; + foreach_copy_field; +#undef _ + + vxlan_gbp_rewrite (t, is_ip6); + /* + * Reconcile the real dev_instance and a possible requested instance. + */ + user_instance = a->instance; + if (user_instance == ~0) + user_instance = dev_instance; + if (hash_get (vxm->instance_used, user_instance)) + { + pool_put (vxm->tunnels, t); + return VNET_API_ERROR_INSTANCE_IN_USE; + } + hash_set (vxm->instance_used, user_instance, 1); + + t->dev_instance = dev_instance; /* actual */ + t->user_instance = user_instance; /* name */ + + /* copy the key */ + int add_failed; + if (is_ip6) + { + key6.value = (u64) dev_instance; + add_failed = + clib_bihash_add_del_24_8 (&vxm->vxlan6_gbp_tunnel_by_key, &key6, + 1 /*add */ ); + } + else + { + key4.value = (u64) dev_instance; + add_failed = + clib_bihash_add_del_16_8 (&vxm->vxlan4_gbp_tunnel_by_key, &key4, + 1 /*add */ ); + } + + if (add_failed) + { + pool_put (vxm->tunnels, t); + return VNET_API_ERROR_INVALID_REGISTRATION; + } + + t->hw_if_index = vnet_register_interface + (vnm, vxlan_gbp_device_class.index, dev_instance, + vxlan_gbp_hw_class.index, dev_instance); + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, t->hw_if_index); + + /* Set vxlan_gbp tunnel output node */ + u32 encap_index = !is_ip6 ? + vxlan4_gbp_encap_node.index : vxlan6_gbp_encap_node.index; + vnet_set_interface_output_node (vnm, t->hw_if_index, encap_index); + + t->sw_if_index = sw_if_index = hi->sw_if_index; + + vec_validate_init_empty (vxm->tunnel_index_by_sw_if_index, sw_if_index, + ~0); + vxm->tunnel_index_by_sw_if_index[sw_if_index] = dev_instance; + + /* setup l2 input config with l2 feature and bd 0 to drop packet */ + vec_validate (l2im->configs, sw_if_index); + l2im->configs[sw_if_index].feature_bitmap = L2INPUT_FEAT_DROP; + l2im->configs[sw_if_index].bd_index = 0; + + vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index); + si->flags &= ~VNET_SW_INTERFACE_FLAG_HIDDEN; + vnet_sw_interface_set_flags (vnm, sw_if_index, + VNET_SW_INTERFACE_FLAG_ADMIN_UP); + + fib_node_init (&t->node, FIB_NODE_TYPE_VXLAN_GBP_TUNNEL); + fib_prefix_t tun_dst_pfx; + vnet_flood_class_t flood_class = VNET_FLOOD_CLASS_TUNNEL_NORMAL; + + fib_prefix_from_ip46_addr (&t->dst, &tun_dst_pfx); + if (!ip46_address_is_multicast (&t->dst)) + { + /* Unicast tunnel - + * source the FIB entry for the tunnel's destination + * and become a child thereof. The tunnel will then get poked + * when the forwarding for the entry updates, and the tunnel can + * re-stack accordingly + */ + vtep_addr_ref (&t->src); + t->fib_entry_index = fib_table_entry_special_add + (t->encap_fib_index, &tun_dst_pfx, FIB_SOURCE_RR, + FIB_ENTRY_FLAG_NONE); + t->sibling_index = fib_entry_child_add + (t->fib_entry_index, FIB_NODE_TYPE_VXLAN_GBP_TUNNEL, + dev_instance); + vxlan_gbp_tunnel_restack_dpo (t); + } + else + { + /* Multicast tunnel - + * as the same mcast group can be used for mutiple mcast tunnels + * with different VNIs, create the output fib adjecency only if + * it does not already exist + */ + fib_protocol_t fp = fib_ip_proto (is_ip6); + + if (vtep_addr_ref (&t->dst) == 1) + { + fib_node_index_t mfei; + adj_index_t ai; + fib_route_path_t path = { + .frp_proto = fib_proto_to_dpo (fp), + .frp_addr = zero_addr, + .frp_sw_if_index = 0xffffffff, + .frp_fib_index = ~0, + .frp_weight = 0, + .frp_flags = FIB_ROUTE_PATH_LOCAL, + }; + const mfib_prefix_t mpfx = { + .fp_proto = fp, + .fp_len = (is_ip6 ? 128 : 32), + .fp_grp_addr = tun_dst_pfx.fp_addr, + }; + + /* + * Setup the (*,G) to receive traffic on the mcast group + * - the forwarding interface is for-us + * - the accepting interface is that from the API + */ + mfib_table_entry_path_update (t->encap_fib_index, + &mpfx, + MFIB_SOURCE_VXLAN_GBP, + &path, MFIB_ITF_FLAG_FORWARD); + + path.frp_sw_if_index = a->mcast_sw_if_index; + path.frp_flags = FIB_ROUTE_PATH_FLAG_NONE; + mfei = mfib_table_entry_path_update (t->encap_fib_index, + &mpfx, + MFIB_SOURCE_VXLAN_GBP, + &path, + MFIB_ITF_FLAG_ACCEPT); + + /* + * Create the mcast adjacency to send traffic to the group + */ + ai = adj_mcast_add_or_lock (fp, + fib_proto_to_link (fp), + a->mcast_sw_if_index); + + /* + * create a new end-point + */ + mcast_shared_add (&t->dst, mfei, ai); + } + + dpo_id_t dpo = DPO_INVALID; + mcast_shared_t ep = mcast_shared_get (&t->dst); + + /* Stack shared mcast dst mac addr rewrite on encap */ + dpo_set (&dpo, DPO_ADJACENCY_MCAST, + fib_proto_to_dpo (fp), ep.mcast_adj_index); + + dpo_stack_from_node (encap_index, &t->next_dpo, &dpo); + dpo_reset (&dpo); + flood_class = VNET_FLOOD_CLASS_TUNNEL_MASTER; + } + + vnet_get_sw_interface (vnet_get_main (), sw_if_index)->flood_class = + flood_class; + } + else + { + /* deleting a tunnel: tunnel must exist */ + if (!p) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + u32 instance = p[0]; + t = pool_elt_at_index (vxm->tunnels, instance); + + sw_if_index = t->sw_if_index; + vnet_sw_interface_set_flags (vnm, sw_if_index, 0 /* down */ ); + + vxm->tunnel_index_by_sw_if_index[sw_if_index] = ~0; + + if (!is_ip6) + clib_bihash_add_del_16_8 (&vxm->vxlan4_gbp_tunnel_by_key, &key4, + 0 /*del */ ); + else + clib_bihash_add_del_24_8 (&vxm->vxlan6_gbp_tunnel_by_key, &key6, + 0 /*del */ ); + + if (!ip46_address_is_multicast (&t->dst)) + { + vtep_addr_unref (&t->src); + fib_entry_child_remove (t->fib_entry_index, t->sibling_index); + fib_table_entry_delete_index (t->fib_entry_index, FIB_SOURCE_RR); + } + else if (vtep_addr_unref (&t->dst) == 0) + { + mcast_shared_remove (&t->dst); + } + + vnet_delete_hw_interface (vnm, t->hw_if_index); + hash_unset (vxm->instance_used, t->user_instance); + + fib_node_deinit (&t->node); + pool_put (vxm->tunnels, t); + } + + if (sw_if_indexp) + *sw_if_indexp = sw_if_index; + + return 0; +} + +static uword +get_decap_next_for_node (u32 node_index, u32 ipv4_set) +{ + vxlan_gbp_main_t *vxm = &vxlan_gbp_main; + vlib_main_t *vm = vxm->vlib_main; + uword input_node = (ipv4_set) ? vxlan4_gbp_input_node.index : + vxlan6_gbp_input_node.index; + + return vlib_node_add_next (vm, input_node, node_index); +} + +static uword +unformat_decap_next (unformat_input_t * input, va_list * args) +{ + u32 *result = va_arg (*args, u32 *); + u32 ipv4_set = va_arg (*args, int); + vxlan_gbp_main_t *vxm = &vxlan_gbp_main; + vlib_main_t *vm = vxm->vlib_main; + u32 node_index; + u32 tmp; + + if (unformat (input, "l2")) + *result = VXLAN_GBP_INPUT_NEXT_L2_INPUT; + else if (unformat (input, "node %U", unformat_vlib_node, vm, &node_index)) + *result = get_decap_next_for_node (node_index, ipv4_set); + else if (unformat (input, "%d", &tmp)) + *result = tmp; + else + return 0; + return 1; +} + +static clib_error_t * +vxlan_gbp_add_del_tunnel_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + ip46_address_t src = ip46_address_initializer, dst = + ip46_address_initializer; + u8 is_add = 1; + u8 src_set = 0; + u8 dst_set = 0; + u8 grp_set = 0; + u8 ipv4_set = 0; + u8 ipv6_set = 0; + u32 instance = ~0; + u32 encap_fib_index = 0; + u32 mcast_sw_if_index = ~0; + u32 decap_next_index = VXLAN_GBP_INPUT_NEXT_L2_INPUT; + u32 vni = 0; + u32 table_id; + clib_error_t *parse_error = NULL; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "del")) + { + is_add = 0; + } + else if (unformat (line_input, "instance %d", &instance)) + ; + else if (unformat (line_input, "src %U", + unformat_ip46_address, &src, IP46_TYPE_ANY)) + { + src_set = 1; + ip46_address_is_ip4 (&src) ? (ipv4_set = 1) : (ipv6_set = 1); + } + else if (unformat (line_input, "dst %U", + unformat_ip46_address, &dst, IP46_TYPE_ANY)) + { + dst_set = 1; + ip46_address_is_ip4 (&dst) ? (ipv4_set = 1) : (ipv6_set = 1); + } + else if (unformat (line_input, "group %U %U", + unformat_ip46_address, &dst, IP46_TYPE_ANY, + unformat_vnet_sw_interface, + vnet_get_main (), &mcast_sw_if_index)) + { + grp_set = dst_set = 1; + ip46_address_is_ip4 (&dst) ? (ipv4_set = 1) : (ipv6_set = 1); + } + else if (unformat (line_input, "encap-vrf-id %d", &table_id)) + { + encap_fib_index = + fib_table_find (fib_ip_proto (ipv6_set), table_id); + } + else if (unformat (line_input, "decap-next %U", unformat_decap_next, + &decap_next_index, ipv4_set)) + ; + else if (unformat (line_input, "vni %d", &vni)) + ; + else + { + parse_error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + break; + } + } + + unformat_free (line_input); + + if (parse_error) + return parse_error; + + if (encap_fib_index == ~0) + return clib_error_return (0, "nonexistent encap-vrf-id %d", table_id); + + if (src_set == 0) + return clib_error_return (0, "tunnel src address not specified"); + + if (dst_set == 0) + return clib_error_return (0, "tunnel dst address not specified"); + + if (grp_set && !ip46_address_is_multicast (&dst)) + return clib_error_return (0, "tunnel group address not multicast"); + + if (grp_set == 0 && ip46_address_is_multicast (&dst)) + return clib_error_return (0, "dst address must be unicast"); + + if (grp_set && mcast_sw_if_index == ~0) + return clib_error_return (0, "tunnel nonexistent multicast device"); + + if (ipv4_set && ipv6_set) + return clib_error_return (0, "both IPv4 and IPv6 addresses specified"); + + if (ip46_address_cmp (&src, &dst) == 0) + return clib_error_return (0, "src and dst addresses are identical"); + + if (decap_next_index == ~0) + return clib_error_return (0, "next node not found"); + + if (vni == 0) + return clib_error_return (0, "vni not specified"); + + if (vni >> 24) + return clib_error_return (0, "vni %d out of range", vni); + + vnet_vxlan_gbp_add_del_tunnel_args_t a = { + .is_add = is_add, + .is_ip6 = ipv6_set, + .instance = instance, +#define _(x) .x = x, + foreach_copy_field +#undef _ + }; + + u32 tunnel_sw_if_index; + int rv = vnet_vxlan_gbp_add_del_tunnel (&a, &tunnel_sw_if_index); + + switch (rv) + { + case 0: + if (is_add) + vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name, + vnet_get_main (), tunnel_sw_if_index); + break; + + case VNET_API_ERROR_TUNNEL_EXIST: + return clib_error_return (0, "tunnel already exists..."); + + case VNET_API_ERROR_NO_SUCH_ENTRY: + return clib_error_return (0, "tunnel does not exist..."); + + case VNET_API_ERROR_INSTANCE_IN_USE: + return clib_error_return (0, "Instance is in use"); + + default: + return clib_error_return + (0, "vnet_vxlan_gbp_add_del_tunnel returned %d", rv); + } + + return 0; +} + +/*? + * Add or delete a VXLAN Tunnel. + * + * VXLAN provides the features needed to allow L2 bridge domains (BDs) + * to span multiple servers. This is done by building an L2 overlay on + * top of an L3 network underlay using VXLAN tunnels. + * + * This makes it possible for servers to be co-located in the same data + * center or be separated geographically as long as they are reachable + * through the underlay L3 network. + * + * You can refer to this kind of L2 overlay bridge domain as a VXLAN + * (Virtual eXtensible VLAN) segment. + * + * @cliexpar + * Example of how to create a VXLAN Tunnel: + * @cliexcmd{create vxlan_gbp tunnel src 10.0.3.1 dst 10.0.3.3 vni 13 encap-vrf-id 7} + * Example of how to create a VXLAN Tunnel with a known name, vxlan_gbp_tunnel42: + * @cliexcmd{create vxlan_gbp tunnel src 10.0.3.1 dst 10.0.3.3 instance 42} + * Example of how to create a multicast VXLAN Tunnel with a known name, vxlan_gbp_tunnel23: + * @cliexcmd{create vxlan_gbp tunnel src 10.0.3.1 group 239.1.1.1 GigabitEtherner0/8/0 instance 23} + * Example of how to delete a VXLAN Tunnel: + * @cliexcmd{create vxlan_gbp tunnel src 10.0.3.1 dst 10.0.3.3 vni 13 del} + ?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (create_vxlan_gbp_tunnel_command, static) = { + .path = "create vxlan-gbp tunnel", + .short_help = + "create vxlan-gbp tunnel src <local-vtep-addr>" + " {dst <remote-vtep-addr>|group <mcast-vtep-addr> <intf-name>} vni <nn>" + " [instance <id>]" + " [encap-vrf-id <nn>] [decap-next [l2|node <name>]] [del]", + .function = vxlan_gbp_add_del_tunnel_command_fn, +}; +/* *INDENT-ON* */ + +static clib_error_t * +show_vxlan_gbp_tunnel_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vxlan_gbp_main_t *vxm = &vxlan_gbp_main; + vxlan_gbp_tunnel_t *t; + int raw = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "raw")) + raw = 1; + else + return clib_error_return (0, "parse error: '%U'", + format_unformat_error, input); + } + + if (pool_elts (vxm->tunnels) == 0) + vlib_cli_output (vm, "No vxlan-gbp tunnels configured..."); + +/* *INDENT-OFF* */ + pool_foreach (t, vxm->tunnels, + ({ + vlib_cli_output (vm, "%U", format_vxlan_gbp_tunnel, t); + })); +/* *INDENT-ON* */ + + if (raw) + { + vlib_cli_output (vm, "Raw IPv4 Hash Table:\n%U\n", + format_bihash_16_8, &vxm->vxlan4_gbp_tunnel_by_key, + 1 /* verbose */ ); + vlib_cli_output (vm, "Raw IPv6 Hash Table:\n%U\n", + format_bihash_24_8, &vxm->vxlan6_gbp_tunnel_by_key, + 1 /* verbose */ ); + } + + return 0; +} + +/*? + * Display all the VXLAN Tunnel entries. + * + * @cliexpar + * Example of how to display the VXLAN Tunnel entries: + * @cliexstart{show vxlan_gbp tunnel} + * [0] src 10.0.3.1 dst 10.0.3.3 vni 13 encap_fib_index 0 sw_if_index 5 decap_next l2 + * @cliexend + ?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_vxlan_gbp_tunnel_command, static) = { + .path = "show vxlan-gbp tunnel", + .short_help = "show vxlan-gbp tunnel [raw]", + .function = show_vxlan_gbp_tunnel_command_fn, +}; +/* *INDENT-ON* */ + + +void +vnet_int_vxlan_gbp_bypass_mode (u32 sw_if_index, u8 is_ip6, u8 is_enable) +{ + if (is_ip6) + vnet_feature_enable_disable ("ip6-unicast", "ip6-vxlan-gbp-bypass", + sw_if_index, is_enable, 0, 0); + else + vnet_feature_enable_disable ("ip4-unicast", "ip4-vxlan-gbp-bypass", + sw_if_index, is_enable, 0, 0); +} + + +static clib_error_t * +set_ip_vxlan_gbp_bypass (u32 is_ip6, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + vnet_main_t *vnm = vnet_get_main (); + clib_error_t *error = 0; + u32 sw_if_index, is_enable; + + sw_if_index = ~0; + is_enable = 1; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat_user + (line_input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + ; + else if (unformat (line_input, "del")) + is_enable = 0; + else + { + error = unformat_parse_error (line_input); + goto done; + } + } + + if (~0 == sw_if_index) + { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, line_input); + goto done; + } + + vnet_int_vxlan_gbp_bypass_mode (sw_if_index, is_ip6, is_enable); + +done: + unformat_free (line_input); + + return error; +} + +static clib_error_t * +set_ip4_vxlan_gbp_bypass (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + return set_ip_vxlan_gbp_bypass (0, input, cmd); +} + +/*? + * This command adds the 'ip4-vxlan-gbp-bypass' graph node for a given interface. + * By adding the IPv4 vxlan_gbp-bypass graph node to an interface, the node checks + * for and validate input vxlan_gbp packet and bypass ip4-lookup, ip4-local, + * ip4-udp-lookup nodes to speedup vxlan_gbp packet forwarding. This node will + * cause extra overhead to for non-vxlan_gbp packets which is kept at a minimum. + * + * @cliexpar + * @parblock + * Example of graph node before ip4-vxlan_gbp-bypass is enabled: + * @cliexstart{show vlib graph ip4-vxlan_gbp-bypass} + * Name Next Previous + * ip4-vxlan-gbp-bypass error-drop [0] + * vxlan4-gbp-input [1] + * ip4-lookup [2] + * @cliexend + * + * Example of how to enable ip4-vxlan-gbp-bypass on an interface: + * @cliexcmd{set interface ip vxlan-gbp-bypass GigabitEthernet2/0/0} + * + * Example of graph node after ip4-vxlan-gbp-bypass is enabled: + * @cliexstart{show vlib graph ip4-vxlan-gbp-bypass} + * Name Next Previous + * ip4-vxlan-gbp-bypass error-drop [0] ip4-input + * vxlan4-gbp-input [1] ip4-input-no-checksum + * ip4-lookup [2] + * @cliexend + * + * Example of how to display the feature enabed on an interface: + * @cliexstart{show ip interface features GigabitEthernet2/0/0} + * IP feature paths configured on GigabitEthernet2/0/0... + * ... + * ipv4 unicast: + * ip4-vxlan-gbp-bypass + * ip4-lookup + * ... + * @cliexend + * + * Example of how to disable ip4-vxlan-gbp-bypass on an interface: + * @cliexcmd{set interface ip vxlan-gbp-bypass GigabitEthernet2/0/0 del} + * @endparblock +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (set_interface_ip_vxlan_gbp_bypass_command, static) = { + .path = "set interface ip vxlan-gbp-bypass", + .function = set_ip4_vxlan_gbp_bypass, + .short_help = "set interface ip vxlan-gbp-bypass <interface> [del]", +}; +/* *INDENT-ON* */ + +static clib_error_t * +set_ip6_vxlan_gbp_bypass (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + return set_ip_vxlan_gbp_bypass (1, input, cmd); +} + +/*? + * This command adds the 'ip6-vxlan-gbp-bypass' graph node for a given interface. + * By adding the IPv6 vxlan-gbp-bypass graph node to an interface, the node checks + * for and validate input vxlan_gbp packet and bypass ip6-lookup, ip6-local, + * ip6-udp-lookup nodes to speedup vxlan_gbp packet forwarding. This node will + * cause extra overhead to for non-vxlan packets which is kept at a minimum. + * + * @cliexpar + * @parblock + * Example of graph node before ip6-vxlan-gbp-bypass is enabled: + * @cliexstart{show vlib graph ip6-vxlan-gbp-bypass} + * Name Next Previous + * ip6-vxlan-gbp-bypass error-drop [0] + * vxlan6-gbp-input [1] + * ip6-lookup [2] + * @cliexend + * + * Example of how to enable ip6-vxlan-gbp-bypass on an interface: + * @cliexcmd{set interface ip6 vxlan-gbp-bypass GigabitEthernet2/0/0} + * + * Example of graph node after ip6-vxlan-gbp-bypass is enabled: + * @cliexstart{show vlib graph ip6-vxlan-gbp-bypass} + * Name Next Previous + * ip6-vxlan-gbp-bypass error-drop [0] ip6-input + * vxlan6-gbp-input [1] ip4-input-no-checksum + * ip6-lookup [2] + * @cliexend + * + * Example of how to display the feature enabed on an interface: + * @cliexstart{show ip interface features GigabitEthernet2/0/0} + * IP feature paths configured on GigabitEthernet2/0/0... + * ... + * ipv6 unicast: + * ip6-vxlan-gbp-bypass + * ip6-lookup + * ... + * @cliexend + * + * Example of how to disable ip6-vxlan-gbp-bypass on an interface: + * @cliexcmd{set interface ip6 vxlan-gbp-bypass GigabitEthernet2/0/0 del} + * @endparblock +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (set_interface_ip6_vxlan_gbp_bypass_command, static) = { + .path = "set interface ip6 vxlan-gbp-bypass", + .function = set_ip6_vxlan_gbp_bypass, + .short_help = "set interface ip vxlan-gbp-bypass <interface> [del]", +}; +/* *INDENT-ON* */ + +#define VXLAN_GBP_HASH_NUM_BUCKETS (2 * 1024) +#define VXLAN_GBP_HASH_MEMORY_SIZE (1 << 20) + +clib_error_t * +vxlan_gbp_init (vlib_main_t * vm) +{ + vxlan_gbp_main_t *vxm = &vxlan_gbp_main; + + vxm->vnet_main = vnet_get_main (); + vxm->vlib_main = vm; + + /* initialize the ip6 hash */ + clib_bihash_init_16_8 (&vxm->vxlan4_gbp_tunnel_by_key, "vxlan4-gbp", + VXLAN_GBP_HASH_NUM_BUCKETS, + VXLAN_GBP_HASH_MEMORY_SIZE); + clib_bihash_init_24_8 (&vxm->vxlan6_gbp_tunnel_by_key, "vxlan6-gbp", + VXLAN_GBP_HASH_NUM_BUCKETS, + VXLAN_GBP_HASH_MEMORY_SIZE); + vxm->vtep6 = hash_create_mem (0, sizeof (ip6_address_t), sizeof (uword)); + vxm->mcast_shared = hash_create_mem (0, + sizeof (ip46_address_t), + sizeof (mcast_shared_t)); + + udp_register_dst_port (vm, UDP_DST_PORT_vxlan_gbp, + vxlan4_gbp_input_node.index, /* is_ip4 */ 1); + udp_register_dst_port (vm, UDP_DST_PORT_vxlan6_gbp, + vxlan6_gbp_input_node.index, /* is_ip4 */ 0); + + fib_node_register_type (FIB_NODE_TYPE_VXLAN_GBP_TUNNEL, &vxlan_gbp_vft); + + return 0; +} + +VLIB_INIT_FUNCTION (vxlan_gbp_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/vxlan-gbp/vxlan_gbp.h b/src/vnet/vxlan-gbp/vxlan_gbp.h new file mode 100644 index 00000000000..bc0ccf308b6 --- /dev/null +++ b/src/vnet/vxlan-gbp/vxlan_gbp.h @@ -0,0 +1,210 @@ +/* + * Copyright (c) 2018 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_vnet_vxlan_gbp_h +#define included_vnet_vxlan_gbp_h + +#include <vppinfra/error.h> +#include <vppinfra/hash.h> +#include <vppinfra/bihash_16_8.h> +#include <vppinfra/bihash_24_8.h> +#include <vnet/vnet.h> +#include <vnet/ip/ip.h> +#include <vnet/l2/l2_input.h> +#include <vnet/l2/l2_output.h> +#include <vnet/l2/l2_bd.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/vxlan-gbp/vxlan_gbp_packet.h> +#include <vnet/ip/ip4_packet.h> +#include <vnet/ip/ip6_packet.h> +#include <vnet/udp/udp.h> +#include <vnet/dpo/dpo.h> +#include <vnet/adj/adj_types.h> + +/* *INDENT-OFF* */ +typedef CLIB_PACKED (struct { + ip4_header_t ip4; /* 20 bytes */ + udp_header_t udp; /* 8 bytes */ + vxlan_gbp_header_t vxlan_gbp; /* 8 bytes */ +}) ip4_vxlan_gbp_header_t; + +typedef CLIB_PACKED (struct { + ip6_header_t ip6; /* 40 bytes */ + udp_header_t udp; /* 8 bytes */ + vxlan_gbp_header_t vxlan_gbp; /* 8 bytes */ +}) ip6_vxlan_gbp_header_t; +/* *INDENT-ON* */ + +/* +* Key fields: remote ip, vni on incoming VXLAN packet +* all fields in NET byte order +*/ +typedef clib_bihash_kv_16_8_t vxlan4_gbp_tunnel_key_t; + +/* +* Key fields: remote ip, vni and fib index on incoming VXLAN packet +* ip, vni fields in NET byte order +* fib index field in host byte order +*/ +typedef clib_bihash_kv_24_8_t vxlan6_gbp_tunnel_key_t; + +typedef struct +{ + /* Required for pool_get_aligned */ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + + /* FIB DPO for IP forwarding of VXLAN encap packet */ + dpo_id_t next_dpo; + + /* Group Policy ID */ + u16 sclass; + + /* flags */ + u16 flags; + + /* vxlan VNI in HOST byte order */ + u32 vni; + + /* tunnel src and dst addresses */ + ip46_address_t src; + ip46_address_t dst; + + /* mcast packet output intfc index (used only if dst is mcast) */ + u32 mcast_sw_if_index; + + /* decap next index */ + u32 decap_next_index; + + /* The FIB index for src/dst addresses */ + u32 encap_fib_index; + + /* vnet intfc index */ + u32 sw_if_index; + u32 hw_if_index; + + /** Next node after VxLAN-GBP encap */ + uword encap_next_node; + + /** + * Linkage into the FIB object graph + */ + fib_node_t node; + + /* + * The FIB entry for (depending on VXLAN-GBP tunnel is unicast or mcast) + * sending unicast VXLAN-GBP encap packets or receiving mcast VXLAN-GBP packets + */ + fib_node_index_t fib_entry_index; + adj_index_t mcast_adj_index; + + /** + * The tunnel is a child of the FIB entry for its destintion. This is + * so it receives updates when the forwarding information for that entry + * changes. + * The tunnels sibling index on the FIB entry's dependency list. + */ + u32 sibling_index; + + u32 dev_instance; /* Real device instance in tunnel vector */ + u32 user_instance; /* Instance name being shown to user */ + + vnet_declare_rewrite (VLIB_BUFFER_PRE_DATA_SIZE); +} vxlan_gbp_tunnel_t; + +#define foreach_vxlan_gbp_input_next \ +_(DROP, "error-drop") \ +_(L2_INPUT, "l2-input") + +typedef enum +{ +#define _(s,n) VXLAN_GBP_INPUT_NEXT_##s, + foreach_vxlan_gbp_input_next +#undef _ + VXLAN_GBP_INPUT_N_NEXT, +} vxlan_gbp_input_next_t; + +typedef enum +{ +#define vxlan_gbp_error(n,s) VXLAN_GBP_ERROR_##n, +#include <vnet/vxlan-gbp/vxlan_gbp_error.def> +#undef vxlan_gbp_error + VXLAN_GBP_N_ERROR, +} vxlan_gbp_input_error_t; + +typedef struct +{ + /* vector of encap tunnel instances */ + vxlan_gbp_tunnel_t *tunnels; + + /* lookup tunnel by key */ + clib_bihash_16_8_t vxlan4_gbp_tunnel_by_key; /* keyed on ipv4.dst + fib + vni */ + clib_bihash_24_8_t vxlan6_gbp_tunnel_by_key; /* keyed on ipv6.dst + fib + vni */ + + /* local VTEP IPs ref count used by vxlan-bypass node to check if + received VXLAN packet DIP matches any local VTEP address */ + uword *vtep4; /* local ip4 VTEPs keyed on their ip4 addr */ + uword *vtep6; /* local ip6 VTEPs keyed on their ip6 addr */ + + /* mcast shared info */ + uword *mcast_shared; /* keyed on mcast ip46 addr */ + + /* Mapping from sw_if_index to tunnel index */ + u32 *tunnel_index_by_sw_if_index; + + /* convenience */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; + + /* Record used instances */ + uword *instance_used; +} vxlan_gbp_main_t; + +extern vxlan_gbp_main_t vxlan_gbp_main; + +extern vlib_node_registration_t vxlan4_gbp_input_node; +extern vlib_node_registration_t vxlan6_gbp_input_node; +extern vlib_node_registration_t vxlan4_gbp_encap_node; +extern vlib_node_registration_t vxlan6_gbp_encap_node; + +u8 *format_vxlan_gbp_encap_trace (u8 * s, va_list * args); + +typedef struct +{ + u8 is_add; + u8 is_ip6; + u32 instance; + ip46_address_t src, dst; + u16 sclass; + u32 mcast_sw_if_index; + u32 encap_fib_index; + u32 decap_next_index; + u32 vni; +} vnet_vxlan_gbp_add_del_tunnel_args_t; + +int vnet_vxlan_gbp_add_del_tunnel + (vnet_vxlan_gbp_add_del_tunnel_args_t * a, u32 * sw_if_indexp); + +void vnet_int_vxlan_gbp_bypass_mode (u32 sw_if_index, u8 is_ip6, + u8 is_enable); + +u32 vnet_vxlan_gbp_get_tunnel_index (u32 sw_if_index); +#endif /* included_vnet_vxlan_gbp_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/vxlan-gbp/vxlan_gbp_api.c b/src/vnet/vxlan-gbp/vxlan_gbp_api.c new file mode 100644 index 00000000000..b064a1f4392 --- /dev/null +++ b/src/vnet/vxlan-gbp/vxlan_gbp_api.c @@ -0,0 +1,242 @@ +/* + *------------------------------------------------------------------ + * vxlan_gbp_api.c - vxlan gbp api + * + * Copyright (c) 2018 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <vnet/vnet.h> +#include <vlibmemory/api.h> + +#include <vnet/interface.h> +#include <vnet/api_errno.h> +#include <vnet/feature/feature.h> +#include <vnet/vxlan-gbp/vxlan_gbp.h> +#include <vnet/fib/fib_table.h> + +#include <vnet/vnet_msg_enum.h> + +#define vl_typedefs /* define message structures */ +#include <vnet/vnet_all_api_h.h> +#undef vl_typedefs + +#define vl_endianfun /* define message structures */ +#include <vnet/vnet_all_api_h.h> +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) +#define vl_printfun +#include <vnet/vnet_all_api_h.h> +#undef vl_printfun + +#include <vlibapi/api_helper_macros.h> + +#define foreach_vpe_api_msg \ +_(SW_INTERFACE_SET_VXLAN_GBP_BYPASS, sw_interface_set_vxlan_gbp_bypass) \ +_(VXLAN_GBP_ADD_DEL_TUNNEL, vxlan_gbp_add_del_tunnel) \ +_(VXLAN_GBP_TUNNEL_DUMP, vxlan_gbp_tunnel_dump) + +static void + vl_api_sw_interface_set_vxlan_gbp_bypass_t_handler + (vl_api_sw_interface_set_vxlan_gbp_bypass_t * mp) +{ + vl_api_sw_interface_set_vxlan_gbp_bypass_reply_t *rmp; + int rv = 0; + u32 sw_if_index = ntohl (mp->sw_if_index); + + VALIDATE_SW_IF_INDEX (mp); + + vnet_int_vxlan_gbp_bypass_mode (sw_if_index, mp->is_ipv6, mp->enable); + BAD_SW_IF_INDEX_LABEL; + + REPLY_MACRO (VL_API_SW_INTERFACE_SET_VXLAN_GBP_BYPASS_REPLY); +} + +static void vl_api_vxlan_gbp_add_del_tunnel_t_handler + (vl_api_vxlan_gbp_add_del_tunnel_t * mp) +{ + vl_api_vxlan_gbp_add_del_tunnel_reply_t *rmp; + int rv = 0; + u32 fib_index; + + fib_index = fib_table_find (fib_ip_proto (mp->is_ipv6), + ntohl (mp->encap_vrf_id)); + if (fib_index == ~0) + { + rv = VNET_API_ERROR_NO_SUCH_FIB; + goto out; + } + + vnet_vxlan_gbp_add_del_tunnel_args_t a = { + .is_add = mp->is_add, + .is_ip6 = mp->is_ipv6, + .instance = ntohl (mp->instance), + .mcast_sw_if_index = ntohl (mp->mcast_sw_if_index), + .encap_fib_index = fib_index, + .decap_next_index = ntohl (mp->decap_next_index), + .vni = ntohl (mp->vni), + .sclass = ntohs (mp->sclass), + .dst = to_ip46 (mp->is_ipv6, mp->dst_address), + .src = to_ip46 (mp->is_ipv6, mp->src_address), + }; + + /* Check src & dst are different */ + if (ip46_address_cmp (&a.dst, &a.src) == 0) + { + rv = VNET_API_ERROR_SAME_SRC_DST; + goto out; + } + if (ip46_address_is_multicast (&a.dst) && + !vnet_sw_if_index_is_api_valid (a.mcast_sw_if_index)) + { + rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; + goto out; + } + + u32 sw_if_index = ~0; + rv = vnet_vxlan_gbp_add_del_tunnel (&a, &sw_if_index); + +out: + /* *INDENT-OFF* */ + REPLY_MACRO2(VL_API_VXLAN_GBP_ADD_DEL_TUNNEL_REPLY, + ({ + rmp->sw_if_index = ntohl (sw_if_index); + })); + /* *INDENT-ON* */ +} + +static void send_vxlan_gbp_tunnel_details + (vxlan_gbp_tunnel_t * t, vl_api_registration_t * reg, u32 context) +{ + vl_api_vxlan_gbp_tunnel_details_t *rmp; + ip4_main_t *im4 = &ip4_main; + ip6_main_t *im6 = &ip6_main; + u8 is_ipv6 = !ip46_address_is_ip4 (&t->dst); + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_VXLAN_GBP_TUNNEL_DETAILS); + if (is_ipv6) + { + memcpy (rmp->src_address, t->src.ip6.as_u8, 16); + memcpy (rmp->dst_address, t->dst.ip6.as_u8, 16); + rmp->encap_vrf_id = htonl (im6->fibs[t->encap_fib_index].ft_table_id); + } + else + { + memcpy (rmp->src_address, t->src.ip4.as_u8, 4); + memcpy (rmp->dst_address, t->dst.ip4.as_u8, 4); + rmp->encap_vrf_id = htonl (im4->fibs[t->encap_fib_index].ft_table_id); + } + + rmp->instance = htonl (t->user_instance); + rmp->mcast_sw_if_index = htonl (t->mcast_sw_if_index); + rmp->vni = htonl (t->vni); + rmp->sclass = htons (t->sclass); + rmp->decap_next_index = htonl (t->decap_next_index); + rmp->sw_if_index = htonl (t->sw_if_index); + rmp->is_ipv6 = is_ipv6; + rmp->context = context; + + vl_api_send_msg (reg, (u8 *) rmp); +} + +static void vl_api_vxlan_gbp_tunnel_dump_t_handler + (vl_api_vxlan_gbp_tunnel_dump_t * mp) +{ + vl_api_registration_t *reg; + vxlan_gbp_main_t *vxm = &vxlan_gbp_main; + vxlan_gbp_tunnel_t *t; + u32 sw_if_index; + + reg = vl_api_client_index_to_registration (mp->client_index); + if (!reg) + return; + + sw_if_index = ntohl (mp->sw_if_index); + + if (~0 == sw_if_index) + { + /* *INDENT-OFF* */ + pool_foreach (t, vxm->tunnels, + ({ + send_vxlan_gbp_tunnel_details(t, reg, mp->context); + })); + /* *INDENT-ON* */ + } + else + { + if ((sw_if_index >= vec_len (vxm->tunnel_index_by_sw_if_index)) || + (~0 == vxm->tunnel_index_by_sw_if_index[sw_if_index])) + { + return; + } + t = &vxm->tunnels[vxm->tunnel_index_by_sw_if_index[sw_if_index]]; + send_vxlan_gbp_tunnel_details (t, reg, mp->context); + } +} + +/* + * vpe_api_hookup + * Add vpe's API message handlers to the table. + * vlib has alread mapped shared memory and + * added the client registration handlers. + * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process() + */ +#define vl_msg_name_crc_list +#include <vnet/vnet_all_api_h.h> +#undef vl_msg_name_crc_list + +static void +setup_message_id_table (api_main_t * am) +{ +#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id); + foreach_vl_msg_name_crc_vxlan_gbp; +#undef _ +} + +static clib_error_t * +vxlan_gbp_api_hookup (vlib_main_t * vm) +{ + api_main_t *am = &api_main; + +#define _(N,n) \ + vl_msg_api_set_handlers(VL_API_##N, #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_vpe_api_msg; +#undef _ + + /* + * Set up the (msg_name, crc, message-id) table + */ + setup_message_id_table (am); + + return 0; +} + +VLIB_API_INIT_FUNCTION (vxlan_gbp_api_hookup); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/vxlan-gbp/vxlan_gbp_error.def b/src/vnet/vxlan-gbp/vxlan_gbp_error.def new file mode 100644 index 00000000000..43ad4dac064 --- /dev/null +++ b/src/vnet/vxlan-gbp/vxlan_gbp_error.def @@ -0,0 +1,17 @@ +/* + * Copyright (c) 2018 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +vxlan_gbp_error (DECAPSULATED, "good packets decapsulated") +vxlan_gbp_error (NO_SUCH_TUNNEL, "no such tunnel packets") +vxlan_gbp_error (BAD_FLAGS, "packets with bad flags field in vxlan gbp header") diff --git a/src/vnet/vxlan-gbp/vxlan_gbp_packet.h b/src/vnet/vxlan-gbp/vxlan_gbp_packet.h new file mode 100644 index 00000000000..e1674a0dba8 --- /dev/null +++ b/src/vnet/vxlan-gbp/vxlan_gbp_packet.h @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2018 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_vxlan_gbp_packet_h__ +#define __included_vxlan_gbp_packet_h__ 1 + +/* + * From draft-smith-vxlan-group-policy-04.txt + * + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |G|R|R|R|I|R|R|R|R|D|E|S|A|R|R|R| Group Policy ID | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | VXLAN Network Identifier (VNI) | Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * G bit: Bit 0 of the initial word is defined as the G (Group Based + * Policy Extension) bit. + * + * I bit: where the I flag MUST be set to 1 for a valid + * VXLAN Network ID (VNI). + * + * D bit: Bit 9 of the initial word is defined as the Don't Learn bit. + * When set, this bit indicates that the egress VTEP MUST NOT learn the + * source address of the encapsulated frame. + * + * E bit: Bit 10 of the initial word is defined as the bounce packet. + * When set, this bit indicates that packet is bounced and must be + * dropped. + * + * S bit: Bit 11 of the initial word is defined as the source policy + * applied bit. + * + * A bit: Bit 12 of the initial word is defined as the A (Policy + * Applied) bit. This bit is only defined as the A bit when the G bit + * is set to 1. + * + * A = 1 indicates that the group policy has already been applied to + * this packet. Policies MUST NOT be applied by devices when the A + * bit is set. + * + * A = 0 indicates that the group policy has not been applied to this + * packet. Group policies MUST be applied by devices when the A bit + * is set to 0 and the destination Group has been determined. + * Devices that apply the Group policy MUST set the A bit to 1 after + * the policy has been applied. + * + * Group Policy ID: 16 bit identifier that indicates the source TSI + * Group membership being encapsulated by VXLAN. Its value is source + * class id. + * + */ + +typedef struct +{ + union + { + struct + { + union + { + struct + { + u8 flag_g_i; + u8 gpflags; + }; + u16 flags; + }; + u16 sclass; + }; + u32 flags_sclass_as_u32; + }; + u32 vni_reserved; +} vxlan_gbp_header_t; + +#define VXLAN_GBP_FLAGS_G 0x80 +#define VXLAN_GBP_FLAGS_I 0x08 + +#define foreach_vxlan_gbp_gpflags \ +_ (0x40, D) \ +_ (0x20, E) \ +_ (0x10, S) \ +_ (0x08, A) + +typedef enum +{ +#define _(n,f) VXLAN_GBP_GPFLAGS_##f = n, + foreach_vxlan_gbp_gpflags +#undef _ +} vxlan_gbp_gpflag_t; + +static inline u32 +vxlan_gbp_get_vni (vxlan_gbp_header_t * h) +{ + u32 vni_reserved_host_byte_order; + + vni_reserved_host_byte_order = clib_net_to_host_u32 (h->vni_reserved); + return vni_reserved_host_byte_order >> 8; +} + +static inline u16 +vxlan_gbp_get_sclass (vxlan_gbp_header_t * h) +{ + u16 sclass_host_byte_order; + + sclass_host_byte_order = clib_net_to_host_u16 (h->sclass); + return sclass_host_byte_order; +} + +static inline u8 +vxlan_gbp_get_gpflags (vxlan_gbp_header_t * h) +{ + return h->gpflags; +} + +static inline u8 +vxlan_gbp_get_flags (vxlan_gbp_header_t * h) +{ + return h->flag_g_i; +} + +static inline void +vxlan_gbp_set_header (vxlan_gbp_header_t * h, u32 vni) +{ + h->vni_reserved = clib_host_to_net_u32 (vni << 8); + h->flags_sclass_as_u32 = 0; + h->flag_g_i = VXLAN_GBP_FLAGS_I | VXLAN_GBP_FLAGS_G; +} + +#endif /* __included_vxlan_gbp_packet_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/test/patches/scapy-2.3.3/vxlan.patch b/test/patches/scapy-2.3.3/vxlan.patch index 556df4dd70a..71493dd742c 100644 --- a/test/patches/scapy-2.3.3/vxlan.patch +++ b/test/patches/scapy-2.3.3/vxlan.patch @@ -8,4 +8,5 @@ diff --git a/scapy/layers/vxlan.py b/scapy/layers/vxlan.py +bind_layers(UDP, VXLAN, dport=4790) # RFC standard vxlan-gpe port bind_layers(UDP, VXLAN, dport=6633) # New IANA assigned port for use with NSH bind_layers(UDP, VXLAN, dport=8472) # Linux implementation port ++bind_layers(UDP, VXLAN, dport=48879) # RFC standard vxlan-gbp port bind_layers(VXLAN, Ether, {'flags': 0x8}) diff --git a/test/patches/scapy-2.4/vxlan.py.patch b/test/patches/scapy-2.4/vxlan.py.patch new file mode 100644 index 00000000000..c5876a4adbd --- /dev/null +++ b/test/patches/scapy-2.4/vxlan.py.patch @@ -0,0 +1,12 @@ +diff --git a/scapy/layers/vxlan.py b/scapy/layers/vxlan.py +index 03c3a5a..ee30f28 100644 +--- a/scapy/layers/vxlan.py ++++ b/scapy/layers/vxlan.py +@@ -69,6 +69,7 @@ bind_layers(UDP, VXLAN, dport=4789) # RFC standard vxlan port + bind_layers(UDP, VXLAN, dport=4790) # RFC standard vxlan-gpe port + bind_layers(UDP, VXLAN, dport=6633) # New IANA assigned port for use with NSH + bind_layers(UDP, VXLAN, dport=8472) # Linux implementation port ++bind_layers(UDP, VXLAN, dport=48879) # iVXLAN port + bind_layers(UDP, VXLAN, sport=4789) + bind_layers(UDP, VXLAN, sport=4790) + bind_layers(UDP, VXLAN, sport=6633) diff --git a/test/test_gbp.py b/test/test_gbp.py index b6faa12c6a6..894690b216d 100644 --- a/test/test_gbp.py +++ b/test/test_gbp.py @@ -161,7 +161,7 @@ class VppGbpSubnet(VppObject): self.address_n, self.address_len, sw_if_index=self.sw_if_index if self.sw_if_index else 0xffffffff, - epg_id=self.epg if self.epg else 0xffffffff, + epg_id=self.epg if self.epg else 0xffff, is_ip6=self.is_ip6) self._test.registry.register(self, self._test.logger) diff --git a/test/test_vxlan_gbp.py b/test/test_vxlan_gbp.py new file mode 100644 index 00000000000..55594a59fa7 --- /dev/null +++ b/test/test_vxlan_gbp.py @@ -0,0 +1,277 @@ +#!/usr/bin/env python + +import socket +from util import ip4n_range, ip4_range +import unittest +from framework import VppTestCase, VppTestRunner +from template_bd import BridgeDomain + +from scapy.layers.l2 import Ether, Raw +from scapy.layers.inet import IP, UDP +from scapy.layers.vxlan import VXLAN +from scapy.utils import atol + +import StringIO + + +def reassemble(listoffragments): + buffer = StringIO.StringIO() + first = listoffragments[0] + buffer.seek(20) + for pkt in listoffragments: + buffer.seek(pkt[IP].frag*8) + buffer.write(pkt[IP].payload) + first.len = len(buffer.getvalue()) + 20 + first.flags = 0 + del(first.chksum) + header = str(first[Ether])[:34] + return first[Ether].__class__(header + buffer.getvalue()) + + +class TestVxlanGbp(VppTestCase): + """ VXLAN GBP Test Case """ + + @property + def frame_request(self): + """ Ethernet frame modeling a generic request """ + return (Ether(src='00:00:00:00:00:01', dst='00:00:00:00:00:02') / + IP(src='1.2.3.4', dst='4.3.2.1') / + UDP(sport=10000, dport=20000) / + Raw('\xa5' * 100)) + + @property + def frame_reply(self): + """ Ethernet frame modeling a generic reply """ + return (Ether(src='00:00:00:00:00:02', dst='00:00:00:00:00:01') / + IP(src='4.3.2.1', dst='1.2.3.4') / + UDP(sport=20000, dport=10000) / + Raw('\xa5' * 100)) + + def encapsulate(self, pkt, vni): + """ + Encapsulate the original payload frame by adding VXLAN GBP header with + its UDP, IP and Ethernet fields + """ + return (Ether(src=self.pg0.remote_mac, dst=self.pg0.local_mac) / + IP(src=self.pg0.remote_ip4, dst=self.pg0.local_ip4) / + UDP(sport=self.dport, dport=self.dport, chksum=0) / + VXLAN(vni=vni, flags=self.flags, gpflags=self.gpflags, + gpid=self.sclass) / pkt) + + def ip_range(self, start, end): + """ range of remote ip's """ + return ip4_range(self.pg0.remote_ip4, start, end) + + def decapsulate(self, pkt): + """ + Decapsulate the original payload frame by removing VXLAN header + """ + # check if is set G and I flag + self.assertEqual(pkt[VXLAN].flags, int('0x88', 16)) + return pkt[VXLAN].payload + + # Method for checking VXLAN GBP encapsulation. + # + def check_encapsulation(self, pkt, vni, local_only=False, mcast_pkt=False): + # TODO: add error messages + # Verify source MAC is VPP_MAC and destination MAC is MY_MAC resolved + # by VPP using ARP. + self.assertEqual(pkt[Ether].src, self.pg0.local_mac) + if not local_only: + if not mcast_pkt: + self.assertEqual(pkt[Ether].dst, self.pg0.remote_mac) + else: + self.assertEqual(pkt[Ether].dst, type(self).mcast_mac) + # Verify VXLAN GBP tunnel source IP is VPP_IP and destination IP is + # MY_IP. + self.assertEqual(pkt[IP].src, self.pg0.local_ip4) + if not local_only: + if not mcast_pkt: + self.assertEqual(pkt[IP].dst, self.pg0.remote_ip4) + else: + self.assertEqual(pkt[IP].dst, type(self).mcast_ip4) + # Verify UDP destination port is VXLAN GBP 48879, source UDP port could + # be arbitrary. + self.assertEqual(pkt[UDP].dport, type(self).dport) + # TODO: checksum check + # Verify VNI + # pkt.show() + self.assertEqual(pkt[VXLAN].vni, vni) + # Verify Source Class + self.assertEqual(pkt[VXLAN].gpid, 0) + + @classmethod + def create_vxlan_gbp_flood_test_bd(cls, vni, n_ucast_tunnels): + # Create 2 ucast vxlan tunnels under bd + ip_range_start = 10 + ip_range_end = ip_range_start + n_ucast_tunnels + next_hop_address = cls.pg0.remote_ip4n + for dest_ip4n in ip4n_range(next_hop_address, ip_range_start, + ip_range_end): + # add host route so dest_ip4n will not be resolved + cls.vapi.ip_add_del_route(dest_ip4n, 32, next_hop_address) + r = cls.vapi.vxlan_gbp_add_del_tunnel( + src_addr=cls.pg0.local_ip4n, + dst_addr=dest_ip4n, + vni=vni) + cls.vapi.sw_interface_set_l2_bridge(r.sw_if_index, bd_id=vni) + + # Class method to start the VXLAN GBP test case. + # Overrides setUpClass method in VppTestCase class. + # Python try..except statement is used to ensure that the tear down of + # the class will be executed even if exception is raised. + # @param cls The class pointer. + @classmethod + def setUpClass(cls): + super(TestVxlanGbp, cls).setUpClass() + + try: + cls.dport = 48879 + cls.flags = 0x88 + cls.gpflags = 0x0 + cls.sclass = 0 + + # Create 2 pg interfaces. + cls.create_pg_interfaces(range(4)) + for pg in cls.pg_interfaces: + pg.admin_up() + + # Configure IPv4 addresses on VPP pg0. + cls.pg0.config_ip4() + + # Resolve MAC address for VPP's IP address on pg0. + cls.pg0.resolve_arp() + + # Create VXLAN GBP VTEP on VPP pg0, and put vxlan_gbp_tunnel0 and + # pg1 into BD. + cls.single_tunnel_bd = 1 + r = cls.vapi.vxlan_gbp_add_del_tunnel( + src_addr=cls.pg0.local_ip4n, + dst_addr=cls.pg0.remote_ip4n, + vni=cls.single_tunnel_bd) + cls.vapi.sw_interface_set_l2_bridge(r.sw_if_index, + bd_id=cls.single_tunnel_bd) + cls.vapi.sw_interface_set_l2_bridge(cls.pg1.sw_if_index, + bd_id=cls.single_tunnel_bd) + + # Setup vni 2 to test multicast flooding + cls.n_ucast_tunnels = 2 + # Setup vni 3 to test unicast flooding + cls.ucast_flood_bd = 3 + cls.create_vxlan_gbp_flood_test_bd(cls.ucast_flood_bd, + cls.n_ucast_tunnels) + cls.vapi.sw_interface_set_l2_bridge(cls.pg3.sw_if_index, + bd_id=cls.ucast_flood_bd) + except Exception: + super(TestVxlanGbp, cls).tearDownClass() + raise + + def assert_eq_pkts(self, pkt1, pkt2): + """ Verify the Ether, IP, UDP, payload are equal in both + packets + """ + self.assertEqual(pkt1[Ether].src, pkt2[Ether].src) + self.assertEqual(pkt1[Ether].dst, pkt2[Ether].dst) + self.assertEqual(pkt1[IP].src, pkt2[IP].src) + self.assertEqual(pkt1[IP].dst, pkt2[IP].dst) + self.assertEqual(pkt1[UDP].sport, pkt2[UDP].sport) + self.assertEqual(pkt1[UDP].dport, pkt2[UDP].dport) + self.assertEqual(pkt1[Raw], pkt2[Raw]) + + def test_decap(self): + """ Decapsulation test + Send encapsulated frames from pg0 + Verify receipt of decapsulated frames on pg1 + """ + encapsulated_pkt = self.encapsulate(self.frame_request, + self.single_tunnel_bd) + + self.pg0.add_stream([encapsulated_pkt, ]) + + self.pg1.enable_capture() + + self.pg_start() + + # Pick first received frame and check if it's the non-encapsulated + # frame + out = self.pg1.get_capture(1) + pkt = out[0] + self.assert_eq_pkts(pkt, self.frame_request) + + def test_encap(self): + """ Encapsulation test + Send frames from pg1 + Verify receipt of encapsulated frames on pg0 + """ + self.pg1.add_stream([self.frame_reply]) + + self.pg0.enable_capture() + + self.pg_start() + + # Pick first received frame and check if it's corectly encapsulated. + out = self.pg0.get_capture(1) + pkt = out[0] + self.check_encapsulation(pkt, self.single_tunnel_bd) + + payload = self.decapsulate(pkt) + self.assert_eq_pkts(payload, self.frame_reply) + + def test_ucast_flood(self): + """ Unicast flood test + Send frames from pg3 + Verify receipt of encapsulated frames on pg0 + """ + self.pg3.add_stream([self.frame_reply]) + + self.pg0.enable_capture() + + self.pg_start() + + # Get packet from each tunnel and assert it's corectly encapsulated. + out = self.pg0.get_capture(self.n_ucast_tunnels) + for pkt in out: + self.check_encapsulation(pkt, self.ucast_flood_bd, True) + payload = self.decapsulate(pkt) + self.assert_eq_pkts(payload, self.frame_reply) + + def test_encap_big_packet(self): + """ Encapsulation test send big frame from pg1 + Verify receipt of encapsulated frames on pg0 + """ + + self.vapi.sw_interface_set_mtu(self.pg0.sw_if_index, [1500, 0, 0, 0]) + + frame = (Ether(src='00:00:00:00:00:02', dst='00:00:00:00:00:01') / + IP(src='4.3.2.1', dst='1.2.3.4') / + UDP(sport=20000, dport=10000) / + Raw('\xa5' * 1450)) + + self.pg1.add_stream([frame]) + + self.pg0.enable_capture() + + self.pg_start() + + # Pick first received frame and check if it's correctly encapsulated. + out = self.pg0.get_capture(2) + pkt = reassemble(out) + self.check_encapsulation(pkt, self.single_tunnel_bd) + + payload = self.decapsulate(pkt) + self.assert_eq_pkts(payload, frame) + +# Method to define VPP actions before tear down of the test case. +# Overrides tearDown method in VppTestCase class. +# @param self The object pointer. + def tearDown(self): + super(TestVxlanGbp, self).tearDown() + if not self.vpp_dead: + self.logger.info(self.vapi.cli("show bridge-domain 1 detail")) + self.logger.info(self.vapi.cli("show bridge-domain 3 detail")) + self.logger.info(self.vapi.cli("show vxlan-gbp tunnel")) + self.logger.info(self.vapi.cli("show error")) + + +if __name__ == '__main__': + unittest.main(testRunner=VppTestRunner) diff --git a/test/vpp_papi_provider.py b/test/vpp_papi_provider.py index 9a3a9741ed9..b575b8ad6ac 100644 --- a/test/vpp_papi_provider.py +++ b/test/vpp_papi_provider.py @@ -2723,6 +2723,41 @@ class VppPapiProvider(object): 'protocol': protocol, 'vni': vni}) + def vxlan_gbp_add_del_tunnel( + self, + src_addr, + dst_addr, + mcast_sw_if_index=0xFFFFFFFF, + is_add=1, + is_ipv6=0, + encap_vrf_id=0, + decap_next_index=0xFFFFFFFF, + vni=0, + instance=0xFFFFFFFF): + """ + + :param dst_addr: + :param src_addr: + :param is_add: (Default value = 1) + :param is_ipv6: (Default value = 0) + :param encap_vrf_id: (Default value = 0) + :param decap_next_index: (Default value = 0xFFFFFFFF) + :param mcast_sw_if_index: (Default value = 0xFFFFFFFF) + :param vni: (Default value = 0) + :param instance: (Default value = 0xFFFFFFFF) + + """ + return self.api(self.papi.vxlan_gbp_add_del_tunnel, + {'is_add': is_add, + 'is_ipv6': is_ipv6, + 'src_address': src_addr, + 'dst_address': dst_addr, + 'mcast_sw_if_index': mcast_sw_if_index, + 'encap_vrf_id': encap_vrf_id, + 'decap_next_index': decap_next_index, + 'vni': vni, + 'instance': instance}) + def pppoe_add_del_session( self, client_ip, @@ -3446,7 +3481,7 @@ class VppPapiProvider(object): is_internal, addr, addr_len, sw_if_index=0xffffffff, - epg_id=0xffffffff, + epg_id=0xffff, is_ip6=False): """ GBP Subnet Add/Del """ return self.api(self.papi.gbp_subnet_add_del, |