From 1a1adc7121e698743a64141373966041efc5f2e7 Mon Sep 17 00:00:00 2001 From: Florin Coras Date: Fri, 22 Jul 2016 01:45:30 +0200 Subject: Initial L2 LISP support This introduces support for layer 2 overlays with LISP. Similarly to L3, all tenant packets to be encapsulated are captured by an interface, but the mapping (layer binding) instead of being between an L3 VRF and a LISP VNI, it is between and an L2 bridge domain and a VNI. At a high level, this results in two important properties: 1) the source and destinations of all packets flooded in the bridge-domain are mapped via the LISP control plane and the replies are converted into data-plane tunnels tracked via a LISP specific source/dest L2 FIB 2) All packets reaching the interface and matching a source/dest L2 LISP FIB entry are L3 (IP4/6) encapsulated. This is solely a unicast feature, therefore at this time ARPs are not handled in any special way. Change-Id: I0b7badcd7c6d5166db07d4acd2cc4ae7fba3e18e Signed-off-by: Florin Coras --- vnet/vnet/buffer.h | 6 + vnet/vnet/l2/l2_input.h | 2 +- vnet/vnet/lisp-cp/control.c | 279 +++++++++++------ vnet/vnet/lisp-cp/control.h | 13 +- vnet/vnet/lisp-cp/gid_dictionary.c | 2 +- vnet/vnet/lisp-cp/lisp_cp_messages.h | 14 +- vnet/vnet/lisp-cp/lisp_msg_serdes.c | 16 +- vnet/vnet/lisp-cp/lisp_types.c | 12 +- vnet/vnet/lisp-cp/lisp_types.h | 6 +- vnet/vnet/lisp-cp/packets.h | 3 + vnet/vnet/lisp-gpe/decap.c | 45 ++- vnet/vnet/lisp-gpe/interface.c | 559 +++++++++++++++++++++++++++++------ vnet/vnet/lisp-gpe/ip_forward.c | 32 ++ vnet/vnet/lisp-gpe/lisp_gpe.c | 274 +++++++++++++---- vnet/vnet/lisp-gpe/lisp_gpe.h | 113 ++++--- vpp-api-test/vat/api_format.c | 11 +- vpp/vpp-api/api.c | 9 +- vpp/vpp-api/vpe.api | 7 +- 18 files changed, 1086 insertions(+), 317 deletions(-) diff --git a/vnet/vnet/buffer.h b/vnet/vnet/buffer.h index f74be393df1..742fe32b25f 100644 --- a/vnet/vnet/buffer.h +++ b/vnet/vnet/buffer.h @@ -293,6 +293,12 @@ typedef struct { u32 current_config_index; } cop; + /* LISP */ + struct { + /* overlay address family */ + u16 overlay_afi; + } lisp; + u32 unused[6]; }; } vnet_buffer_opaque_t; diff --git a/vnet/vnet/l2/l2_input.h b/vnet/vnet/l2/l2_input.h index f04075549aa..9ebfb39d0d3 100644 --- a/vnet/vnet/l2/l2_input.h +++ b/vnet/vnet/l2/l2_input.h @@ -62,7 +62,7 @@ typedef struct { /* config vector indexed by sw_if_index */ l2_input_config_t *configs; - /* bridge domain config vector indexed by BD ID */ + /* bridge domain config vector indexed by bd_index */ l2_bridge_domain_t *bd_configs; /* convenience variables */ diff --git a/vnet/vnet/lisp-cp/control.c b/vnet/vnet/lisp-cp/control.c index 4e955c4de47..5ef7889d2a7 100644 --- a/vnet/vnet/lisp-cp/control.c +++ b/vnet/vnet/lisp-cp/control.c @@ -142,20 +142,32 @@ ip_fib_get_first_egress_ip_for_dst (lisp_cp_main_t * lcm, ip_address_t * dst, } static int -dp_add_del_iface (lisp_cp_main_t * lcm, u32 vni, u8 is_add) +dp_add_del_iface (lisp_cp_main_t * lcm, u32 vni, u8 is_l2, u8 is_add) { - uword * table_id, * intf; + uword * dp_table, * intf; vnet_lisp_gpe_add_del_iface_args_t _ai, *ai = &_ai; - table_id = hash_get(lcm->table_id_by_vni, vni); + if (!is_l2) + { + dp_table = hash_get(lcm->table_id_by_vni, vni); - if (!table_id) + if (!dp_table) + { + clib_warning("vni %d not associated to a vrf!", vni); + return VNET_API_ERROR_INVALID_VALUE; + } + } + else { - clib_warning ("vni %d not associated to a vrf!", vni); - return VNET_API_ERROR_INVALID_VALUE; + dp_table = hash_get(lcm->bd_id_by_vni, vni); + if (!dp_table) + { + clib_warning("vni %d not associated to a bridge domain!", vni); + return VNET_API_ERROR_INVALID_VALUE; + } } - intf = hash_get(lcm->dp_intf_by_vni, vni); + intf = hash_get(is_l2 ? lcm->l2_dp_intf_by_vni :lcm->dp_intf_by_vni, vni); /* enable/disable data-plane interface */ if (is_add) @@ -165,7 +177,9 @@ dp_add_del_iface (lisp_cp_main_t * lcm, u32 vni, u8 is_add) { ai->is_add = 1; ai->vni = vni; - ai->table_id = table_id[0]; + ai->is_l2 = is_l2; + ai->dp_table = dp_table[0]; + vnet_lisp_gpe_add_del_iface (ai, 0); /* keep track of vnis for which interfaces have been created */ @@ -182,7 +196,7 @@ dp_add_del_iface (lisp_cp_main_t * lcm, u32 vni, u8 is_add) ai->is_add = 0; ai->vni = vni; - ai->table_id = table_id[0]; + ai->dp_table = dp_table[0]; vnet_lisp_gpe_add_del_iface (ai, 0); hash_unset(lcm->dp_intf_by_vni, vni); } @@ -207,10 +221,10 @@ dp_del_fwd_entry (lisp_cp_main_t * lcm, u32 src_map_index, u32 dst_map_index) /* delete dp fwd entry */ u32 sw_if_index; a->is_add = 0; - a->dlocator = fe->dst_loc; - a->slocator = fe->src_loc; - a->vni = gid_address_vni(&a->deid); - gid_address_copy(&a->deid, &fe->deid); + a->rmt_loc = fe->dst_loc; + a->lcl_loc = fe->src_loc; + a->vni = gid_address_vni(&a->rmt_eid); + gid_address_copy(&a->rmt_eid, &fe->deid); vnet_lisp_gpe_add_del_fwd_entry (a, &sw_if_index); @@ -305,11 +319,12 @@ get_locator_pair (lisp_cp_main_t* lcm, mapping_t * lcl_map, mapping_t * rmt_map, static void dp_add_fwd_entry (lisp_cp_main_t* lcm, u32 src_map_index, u32 dst_map_index) { + vnet_lisp_gpe_add_del_fwd_entry_args_t _a, * a = &_a; mapping_t * src_map, * dst_map; u32 sw_if_index; - uword * feip = 0, * tidp; + uword * feip = 0, * dpid; fwd_entry_t* fe; - vnet_lisp_gpe_add_del_fwd_entry_args_t _a, * a = &_a; + u8 type; memset (a, 0, sizeof(*a)); @@ -321,22 +336,37 @@ dp_add_fwd_entry (lisp_cp_main_t* lcm, u32 src_map_index, u32 dst_map_index) src_map = pool_elt_at_index (lcm->mapping_pool, src_map_index); dst_map = pool_elt_at_index (lcm->mapping_pool, dst_map_index); - gid_address_copy (&a->deid, &dst_map->eid); - a->vni = gid_address_vni(&a->deid); + /* insert data plane forwarding entry */ + a->is_add = 1; - tidp = hash_get(lcm->table_id_by_vni, a->vni); - if (!tidp) + gid_address_copy (&a->rmt_eid, &dst_map->eid); + a->vni = gid_address_vni(&a->rmt_eid); + + /* get vrf or bd_index associated to vni */ + type = gid_address_type(&dst_map->eid); + if (GID_ADDR_IP_PREFIX == type) { - clib_warning("vni %d not associated to a vrf!", a->vni); - return; + dpid = hash_get(lcm->table_id_by_vni, a->vni); + if (!dpid) + { + clib_warning("vni %d not associated to a vrf!", a->vni); + return; + } + a->table_id = dpid[0]; + } + else if (GID_ADDR_MAC == type) + { + dpid = hash_get(lcm->bd_id_by_vni, a->vni); + if (!dpid) + { + clib_warning("vni %d not associated to a bridge domain !", a->vni); + return; + } + a->bd_id = dpid[0]; } - a->table_id = tidp[0]; - - /* insert data plane forwarding entry */ - a->is_add = 1; /* find best locator pair that 1) verifies LISP policy 2) are connected */ - if (0 == get_locator_pair (lcm, src_map, dst_map, &a->slocator, &a->dlocator)) + if (0 == get_locator_pair (lcm, src_map, dst_map, &a->lcl_loc, &a->rmt_loc)) { /* negative entry */ a->is_negative = 1; @@ -344,7 +374,7 @@ dp_add_fwd_entry (lisp_cp_main_t* lcm, u32 src_map_index, u32 dst_map_index) } /* TODO remove */ - u8 ipver = ip_prefix_version(&gid_address_ippref(&a->deid)); + u8 ipver = ip_prefix_version(&gid_address_ippref(&a->rmt_eid)); a->decap_next_index = (ipver == IP4) ? LISP_GPE_INPUT_NEXT_IP4_INPUT : LISP_GPE_INPUT_NEXT_IP6_INPUT; @@ -352,9 +382,9 @@ dp_add_fwd_entry (lisp_cp_main_t* lcm, u32 src_map_index, u32 dst_map_index) /* add tunnel to fwd entry table XXX check return value from DP insertion */ pool_get (lcm->fwd_entry_pool, fe); - fe->dst_loc = a->dlocator; - fe->src_loc = a->slocator; - gid_address_copy (&fe->deid, &a->deid); + fe->dst_loc = a->rmt_loc; + fe->src_loc = a->lcl_loc; + gid_address_copy (&fe->deid, &a->rmt_eid); hash_set (lcm->fwd_entry_by_mapping_index, dst_map_index, fe - lcm->fwd_entry_pool); } @@ -465,8 +495,9 @@ int vnet_lisp_add_del_local_mapping (vnet_lisp_add_del_mapping_args_t * a, u32 * map_index_result) { - uword * table_id; + uword * dp_table = 0; u32 vni; + u8 type; lisp_cp_main_t * lcm = vnet_lisp_cp_get_main (); @@ -477,11 +508,16 @@ vnet_lisp_add_del_local_mapping (vnet_lisp_add_del_mapping_args_t * a, } vni = gid_address_vni(&a->eid); - table_id = hash_get(lcm->table_id_by_vni, vni); + type = gid_address_type(&a->eid); + if (GID_ADDR_IP_PREFIX == type) + dp_table = hash_get(lcm->table_id_by_vni, vni); + else if (GID_ADDR_MAC == type) + dp_table = hash_get(lcm->bd_id_by_vni, vni); - if (!table_id) + if (!dp_table) { - clib_warning ("vni %d not associated to a vrf!", vni); + clib_warning("vni %d not associated to a %s!", vni, + GID_ADDR_IP_PREFIX == type ? "vrf" : "bd"); return VNET_API_ERROR_INVALID_VALUE; } @@ -577,10 +613,10 @@ VLIB_CLI_COMMAND (lisp_add_del_local_eid_command) = { }; int -vnet_lisp_eid_table_map (u32 vni, u32 vrf, u8 is_add) +vnet_lisp_eid_table_map (u32 vni, u32 dp_id, u8 is_l2, u8 is_add) { lisp_cp_main_t * lcm = vnet_lisp_cp_get_main (); - uword * table_id, * vnip; + uword * dp_idp, * vnip, ** dp_table_by_vni, ** vni_by_dp_table; if (vnet_lisp_enable_disable_status () == 0) { @@ -588,44 +624,48 @@ vnet_lisp_eid_table_map (u32 vni, u32 vrf, u8 is_add) return -1; } - if (vni == 0 || vrf == 0) + dp_table_by_vni = is_l2 ? &lcm->bd_id_by_vni : &lcm->table_id_by_vni; + vni_by_dp_table = is_l2 ? &lcm->vni_by_bd_id : &lcm->vni_by_table_id; + + if (!is_l2 && (vni == 0 || dp_id == 0)) { clib_warning ("can't add/del default vni-vrf mapping!"); return -1; } - table_id = hash_get (lcm->table_id_by_vni, vni); - vnip = hash_get (lcm->vni_by_table_id, vrf); + dp_idp = hash_get (dp_table_by_vni[0], vni); + vnip = hash_get (vni_by_dp_table[0], dp_id); if (is_add) { - if (table_id || vnip) + if (dp_idp || vnip) { clib_warning ("vni %d or vrf %d already used in vrf/vni " - "mapping!", vni, vrf); + "mapping!", vni, dp_id); return -1; } - hash_set (lcm->table_id_by_vni, vni, vrf); - hash_set (lcm->vni_by_table_id, vrf, vni); + hash_set (dp_table_by_vni[0], vni, dp_id); + hash_set (vni_by_dp_table[0], dp_id, vni); /* create dp iface */ - dp_add_del_iface (lcm, vni, 1); + dp_add_del_iface (lcm, vni, is_l2, 1); } else { - if (!table_id || !vnip) + if (!dp_idp || !vnip) { clib_warning ("vni %d or vrf %d not used in any vrf/vni! " - "mapping!", vni, vrf); + "mapping!", vni, dp_id); return -1; } - hash_unset (lcm->table_id_by_vni, vni); - hash_unset (lcm->vni_by_table_id, vrf); + hash_unset (dp_table_by_vni[0], vni); + hash_unset (vni_by_dp_table[0], dp_id); /* remove dp iface */ - dp_add_del_iface (lcm, vni, 0); + dp_add_del_iface (lcm, vni, is_l2, 0); } return 0; + } static clib_error_t * @@ -633,8 +673,8 @@ lisp_eid_table_map_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { - u8 is_add = 1; - u32 vni = 0, vrf = 0; + u8 is_add = 1, is_l2 = 0; + u32 vni = 0, dp_id = 0; unformat_input_t _line_input, * line_input = &_line_input; /* Get a line of input. */ @@ -647,20 +687,22 @@ lisp_eid_table_map_command_fn (vlib_main_t * vm, is_add = 0; else if (unformat (line_input, "vni %d", &vni)) ; - else if (unformat (line_input, "vrf %d", &vrf)) + else if (unformat (line_input, "vrf %d", &dp_id)) ; + else if (unformat (line_input, "bd %d", &dp_id)) + is_l2 = 1; else { return unformat_parse_error (line_input); } } - vnet_lisp_eid_table_map (vni, vrf, is_add); + vnet_lisp_eid_table_map (vni, dp_id, is_l2, is_add); return 0; } VLIB_CLI_COMMAND (lisp_eid_table_map_command) = { .path = "lisp eid-table map", - .short_help = "lisp eid-table map [del] vni vrf ", + .short_help = "lisp eid-table map [del] vni vrf | bd ", .function = lisp_eid_table_map_command_fn, }; @@ -985,13 +1027,13 @@ lisp_add_del_remote_mapping_command_fn (vlib_main_t * vm, else if (unformat (line_input, "action %s", &s)) { if (!strcmp ((char *)s, "no-action")) - action = ACTION_NONE; + action = LISP_NO_ACTION; if (!strcmp ((char *)s, "natively-forward")) - action = ACTION_NATIVELY_FORWARDED; + action = LISP_FORWARD_NATIVE; if (!strcmp ((char *)s, "send-map-request")) - action = ACTION_SEND_MAP_REQUEST; + action = LISP_SEND_MAP_REQUEST; else if (!strcmp ((char *)s, "drop")) - action = ACTION_DROP; + action = LISP_DROP; else { clib_warning ("invalid action: '%s'", s); @@ -1811,7 +1853,7 @@ vnet_lisp_add_del_locator_set (vnet_lisp_add_del_locator_set_args_t * a, clib_error_t * vnet_lisp_enable_disable (u8 is_enable) { - u32 vni, table_id; + u32 vni, dp_table; clib_error_t * error = 0; lisp_cp_main_t * lcm = vnet_lisp_cp_get_main (); vnet_lisp_gpe_enable_disable_args_t _a, * a = &_a; @@ -1826,9 +1868,13 @@ vnet_lisp_enable_disable (u8 is_enable) if (is_enable) { - /* enable all ifaces */ - hash_foreach(vni, table_id, lcm->table_id_by_vni, ({ - dp_add_del_iface(lcm, vni, 1); + /* enable all l2 and l3 ifaces */ + hash_foreach(vni, dp_table, lcm->table_id_by_vni, ({ + dp_add_del_iface(lcm, vni, 0, 1); + })); + + hash_foreach(vni, dp_table, lcm->bd_id_by_vni, ({ + dp_add_del_iface(lcm, vni, /* is_l2 */ 1, 1); })); } else @@ -2577,7 +2623,7 @@ send_encapsulated_map_request (vlib_main_t * vm, lisp_cp_main_t *lcm, } static void -get_src_and_dst (void *hdr, ip_address_t * src, ip_address_t *dst) +get_src_and_dst_ip (void *hdr, ip_address_t * src, ip_address_t *dst) { ip4_header_t * ip4 = hdr; ip6_header_t * ip6; @@ -2596,11 +2642,11 @@ get_src_and_dst (void *hdr, ip_address_t * src, ip_address_t *dst) } static u32 -lisp_get_vni_from_buffer (vlib_buffer_t * b, u8 version) +lisp_get_vni_from_buffer_ip (lisp_cp_main_t * lcm, vlib_buffer_t * b, + u8 version) { uword * vnip; u32 vni = ~0, table_id = ~0, fib_index; - lisp_cp_main_t * lcm = vnet_lisp_cp_get_main (); if (version == IP4) { @@ -2632,6 +2678,79 @@ lisp_get_vni_from_buffer (vlib_buffer_t * b, u8 version) return vni; } +always_inline u32 +lisp_get_vni_from_buffer_eth (lisp_cp_main_t * lcm, vlib_buffer_t * b) +{ + uword * vnip; + u32 vni = ~0; + u32 sw_if_index0; + + l2input_main_t * l2im = &l2input_main; + l2_input_config_t * config; + l2_bridge_domain_t * bd_config; + + sw_if_index0 = vnet_buffer(b)->sw_if_index[VLIB_RX]; + config = vec_elt_at_index(l2im->configs, sw_if_index0); + bd_config = vec_elt_at_index (l2im->bd_configs, config->bd_index); + + vnip = hash_get (lcm->vni_by_bd_id, bd_config->bd_id); + if (vnip) + vni = vnip[0]; + else + clib_warning("bridge domain %d is not mapped to any vni!", + config->bd_index); + + return vni; +} + +always_inline void +get_src_and_dst_eids_from_buffer (lisp_cp_main_t *lcm, vlib_buffer_t * b, + gid_address_t * src, gid_address_t * dst) +{ + u32 vni = 0; + u16 type; + + type = vnet_buffer(b)->lisp.overlay_afi; + + if (LISP_AFI_IP == type || LISP_AFI_IP6 == type) + { + ip4_header_t * ip; + u8 version, preflen; + + gid_address_type(src) = GID_ADDR_IP_PREFIX; + gid_address_type(dst) = GID_ADDR_IP_PREFIX; + + ip = vlib_buffer_get_current (b); + get_src_and_dst_ip (ip, &gid_address_ip(src), &gid_address_ip(dst)); + + version = gid_address_ip_version(src); + preflen = ip_address_max_len (version); + gid_address_ippref_len(src) = preflen; + gid_address_ippref_len(dst) = preflen; + + vni = lisp_get_vni_from_buffer_ip (lcm, b, version); + gid_address_vni (dst) = vni; + gid_address_vni (src) = vni; + } + else if (LISP_AFI_MAC == type) + { + ethernet_header_t * eh; + + eh = vlib_buffer_get_current (b); + + gid_address_type(src) = GID_ADDR_MAC; + gid_address_type(dst) = GID_ADDR_MAC; + mac_copy(&gid_address_mac(src), eh->src_address); + mac_copy(&gid_address_mac(dst), eh->dst_address); + + /* get vni */ + vni = lisp_get_vni_from_buffer_eth (lcm, b); + + gid_address_vni (dst) = vni; + gid_address_vni (src) = vni; + } +} + static uword lisp_cp_lookup (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * from_frame) @@ -2651,16 +2770,9 @@ lisp_cp_lookup (vlib_main_t * vm, vlib_node_runtime_t * node, while (n_left_from > 0 && n_left_to_next_drop > 0) { - u32 pi0, vni; - vlib_buffer_t * p0; - ip4_header_t * ip0; + u32 pi0; + vlib_buffer_t * b0; gid_address_t src, dst; - ip_prefix_t * spref, * dpref; - - gid_address_type (&src) = GID_ADDR_IP_PREFIX; - spref = &gid_address_ippref(&src); - gid_address_type (&dst) = GID_ADDR_IP_PREFIX; - dpref = &gid_address_ippref(&dst); pi0 = from[0]; from += 1; @@ -2669,18 +2781,11 @@ lisp_cp_lookup (vlib_main_t * vm, vlib_node_runtime_t * node, to_next_drop += 1; n_left_to_next_drop -= 1; - p0 = vlib_get_buffer (vm, pi0); - p0->error = node->errors[LISP_CP_LOOKUP_ERROR_DROP]; + b0 = vlib_get_buffer (vm, pi0); + b0->error = node->errors[LISP_CP_LOOKUP_ERROR_DROP]; /* src/dst eid pair */ - ip0 = vlib_buffer_get_current (p0); - get_src_and_dst (ip0, &ip_prefix_addr(spref), &ip_prefix_addr(dpref)); - ip_prefix_len(spref) = ip_address_max_len (ip_prefix_version(spref)); - ip_prefix_len(dpref) = ip_address_max_len (ip_prefix_version(dpref)); - - vni = lisp_get_vni_from_buffer (p0, ip_prefix_version (spref)); - gid_address_vni (&dst) = vni; - gid_address_vni (&src) = vni; + get_src_and_dst_eids_from_buffer (lcm, b0, &src, &dst); /* if we have remote mapping for destination already in map-chache add forwarding tunnel directly. If not send a map-request */ @@ -2690,7 +2795,7 @@ lisp_cp_lookup (vlib_main_t * vm, vlib_node_runtime_t * node, mapping_t * m = vec_elt_at_index (lcm->mapping_pool, di); /* send a map-request also in case of negative mapping entry with corresponding action */ - if (m->action == ACTION_SEND_MAP_REQUEST) + if (m->action == LISP_SEND_MAP_REQUEST) { /* send map-request */ send_encapsulated_map_request (vm, lcm, &src, &dst, 0); @@ -2713,9 +2818,9 @@ lisp_cp_lookup (vlib_main_t * vm, vlib_node_runtime_t * node, pkts_mapped++; } - if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) { - lisp_cp_lookup_trace_t *tr = vlib_add_trace (vm, node, p0, + lisp_cp_lookup_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof(*tr)); memset(tr, 0, sizeof(*tr)); diff --git a/vnet/vnet/lisp-cp/control.h b/vnet/vnet/lisp-cp/control.h index 4d5006e530e..74ade39cc58 100644 --- a/vnet/vnet/lisp-cp/control.h +++ b/vnet/vnet/lisp-cp/control.h @@ -96,14 +96,17 @@ typedef struct /* map-request locator set index */ u32 mreq_itr_rlocs; - /* Lookup vrf by vni */ + /* vni to vrf hash tables */ uword * table_id_by_vni; - - /* Lookup vni by vrf */ uword * vni_by_table_id; - /* Number of src prefixes in a vni that use an interface */ + /* vni to bd-index hash tables */ + uword * bd_id_by_vni; + uword * vni_by_bd_id; + + /* track l2 and l3 interfaces that have been created for vni */ uword * dp_intf_by_vni; + uword * l2_dp_intf_by_vni; /* Proxy ETR map index */ u32 pitr_map_index; @@ -216,6 +219,6 @@ int vnet_lisp_clear_all_remote_adjacencies (void); int -vnet_lisp_eid_table_map (u32 vni, u32 vrf, u8 is_add); +vnet_lisp_eid_table_map (u32 vni, u32 vrf, u8 is_l2, u8 is_add); #endif /* VNET_CONTROL_H_ */ diff --git a/vnet/vnet/lisp-cp/gid_dictionary.c b/vnet/vnet/lisp-cp/gid_dictionary.c index 752e1d3c5f4..a6699b99f16 100644 --- a/vnet/vnet/lisp-cp/gid_dictionary.c +++ b/vnet/vnet/lisp-cp/gid_dictionary.c @@ -21,7 +21,7 @@ mac_lookup (gid_dictionary_t * db, u32 vni, u8 * key) int rv; BVT(clib_bihash_kv) kv, value; - kv.key[0] = ((u64 *)key)[0]; + kv.key[0] = ((u64 *)key)[0] & MAC_BIT_MASK; kv.key[1] = (u64)vni; kv.key[2] = 0; diff --git a/vnet/vnet/lisp-cp/lisp_cp_messages.h b/vnet/vnet/lisp-cp/lisp_cp_messages.h index 69f6baa3ad8..8ed1b6c0bd2 100644 --- a/vnet/vnet/lisp-cp/lisp_cp_messages.h +++ b/vnet/vnet/lisp-cp/lisp_cp_messages.h @@ -214,12 +214,6 @@ char *map_request_hdr_to_char(map_request_hdr_t *h); * MAP-REPLY MESSAGE */ - /* Map Reply action codes */ - #define LISP_ACTION_NO_ACTION 0 - #define LISP_ACTION_FORWARD 1 - #define LISP_ACTION_DROP 2 - #define LISP_ACTION_SEND_MAP_REQUEST 3 - /* * Map-Reply Message Format * @@ -418,10 +412,10 @@ void mapping_record_init_hdr(mapping_record_hdr_t *h); typedef enum { - ACTION_NONE, - ACTION_NATIVELY_FORWARDED, - ACTION_SEND_MAP_REQUEST, - ACTION_DROP + LISP_NO_ACTION, + LISP_FORWARD_NATIVE, + LISP_SEND_MAP_REQUEST, + LISP_DROP } lisp_action_e; typedef enum lisp_authoritative diff --git a/vnet/vnet/lisp-cp/lisp_msg_serdes.c b/vnet/vnet/lisp-cp/lisp_msg_serdes.c index c1713b36c99..d7b734e9679 100644 --- a/vnet/vnet/lisp-cp/lisp_msg_serdes.c +++ b/vnet/vnet/lisp-cp/lisp_msg_serdes.c @@ -114,11 +114,21 @@ lisp_msg_push_ecm (vlib_main_t * vm, vlib_buffer_t *b, int lp, int rp, gid_address_t *la, gid_address_t *ra) { ecm_hdr_t *h; - ASSERT(gid_address_type(la) == GID_ADDR_IP_PREFIX); + ip_address_t _src_ip, * src_ip = &_src_ip, _dst_ip, * dst_ip = &_dst_ip; + if (gid_address_type(la) != GID_ADDR_IP_PREFIX) + { + /* empty ip4 */ + memset(src_ip, 0, sizeof(src_ip[0])); + memset(dst_ip, 0, sizeof(dst_ip[0])); + } + else + { + src_ip = &gid_address_ip(la); + dst_ip = &gid_address_ip(ra); + } /* Push inner ip and udp */ - pkt_push_udp_and_ip (vm, b, lp, rp, &gid_address_ip(la), - &gid_address_ip(ra)); + pkt_push_udp_and_ip (vm, b, lp, rp, src_ip, dst_ip); /* Push lisp ecm hdr */ h = pkt_push_ecm_hdr (b); diff --git a/vnet/vnet/lisp-cp/lisp_types.c b/vnet/vnet/lisp-cp/lisp_types.c index 753b22933ec..f0926594c2b 100644 --- a/vnet/vnet/lisp-cp/lisp_types.c +++ b/vnet/vnet/lisp-cp/lisp_types.c @@ -241,17 +241,17 @@ unformat_negative_mapping_action (unformat_input_t * input, va_list * args) int len = vec_len(s); clib_warning ("len = %d", len); if (!strcmp ((char *) s, "no-action")) - action[0] = ACTION_NONE; + action[0] = LISP_NO_ACTION; if (!strcmp ((char *) s, "natively-forward")) - action[0] = ACTION_NATIVELY_FORWARDED; + action[0] = LISP_FORWARD_NATIVE; if (!strcmp ((char *) s, "send-map-request")) - action[0] = ACTION_SEND_MAP_REQUEST; + action[0] = LISP_SEND_MAP_REQUEST; else if (!strcmp ((char *) s, "drop")) - action[0] = ACTION_DROP; + action[0] = LISP_DROP; else { clib_warning("invalid action: '%s'", s); - action[0] = ACTION_DROP; + action[0] = LISP_DROP; return 0; } } @@ -907,7 +907,7 @@ mac_parse (u8 * offset, gid_address_t * a) /* skip AFI field */ offset += sizeof (u16); - memcpy (gid_address_mac (a), offset, sizeof (gid_address_mac (a))); + clib_memcpy (gid_address_mac (a), offset, sizeof (gid_address_mac (a))); return (sizeof (u16) + sizeof (gid_address_mac (a))); } diff --git a/vnet/vnet/lisp-cp/lisp_types.h b/vnet/vnet/lisp-cp/lisp_types.h index e3a0eb799b5..4d4a6b1112c 100644 --- a/vnet/vnet/lisp-cp/lisp_types.h +++ b/vnet/vnet/lisp-cp/lisp_types.h @@ -156,8 +156,8 @@ typedef enum { LISP_AFI_NO_ADDR, LISP_AFI_IP, LISP_AFI_IP6, - LISP_AFI_MAC = 6, - LISP_AFI_LCAF = 16387 + LISP_AFI_LCAF = 16387, + LISP_AFI_MAC = 16389 } lisp_afi_e; u8 *format_gid_address (u8 * s, va_list * args); @@ -202,6 +202,8 @@ void _n ## _copy (void * dst , void * src); foreach_gid_address_type_fcns #undef _ +#define MAC_BIT_MASK (((u64)1 << 48) - 1) + typedef struct { /* mark locator as local as opposed to remote */ diff --git a/vnet/vnet/lisp-cp/packets.h b/vnet/vnet/lisp-cp/packets.h index d8966522700..fcb9f38975c 100644 --- a/vnet/vnet/lisp-cp/packets.h +++ b/vnet/vnet/lisp-cp/packets.h @@ -66,6 +66,9 @@ vlib_buffer_make_headroom (vlib_buffer_t *b, u8 size) always_inline void * vlib_buffer_pull (vlib_buffer_t * b, u8 size) { + if (b->current_length < size) + return 0; + void * data = vlib_buffer_get_current (b); vlib_buffer_advance (b, size); return data; diff --git a/vnet/vnet/lisp-gpe/decap.c b/vnet/vnet/lisp-gpe/decap.c index 5a9ce84a407..132262ebd09 100644 --- a/vnet/vnet/lisp-gpe/decap.c +++ b/vnet/vnet/lisp-gpe/decap.c @@ -52,17 +52,18 @@ next_proto_to_next_index[LISP_GPE_NEXT_PROTOS] = { LISP_GPE_INPUT_NEXT_DROP, LISP_GPE_INPUT_NEXT_IP4_INPUT, LISP_GPE_INPUT_NEXT_IP6_INPUT, + LISP_GPE_INPUT_NEXT_L2_INPUT, LISP_GPE_INPUT_NEXT_DROP }; -static u32 +always_inline u32 next_protocol_to_next_index (lisp_gpe_header_t * lgh, u8 * next_header) { /* lisp-gpe router */ if (PREDICT_TRUE((lgh->flags & LISP_GPE_FLAGS_P) && lgh->next_protocol < LISP_GPE_NEXT_PROTOS)) return next_proto_to_next_index[lgh->next_protocol]; - /* legay lisp router */ + /* legacy lisp router */ else if ((lgh->flags & LISP_GPE_FLAGS_P) == 0) { ip4_header_t * iph = (ip4_header_t *) next_header; @@ -77,6 +78,18 @@ next_protocol_to_next_index (lisp_gpe_header_t * lgh, u8 * next_header) return LISP_GPE_INPUT_NEXT_DROP; } +always_inline tunnel_lookup_t * +next_index_to_iface (lisp_gpe_main_t * lgm, u32 next_index) +{ + if (LISP_GPE_INPUT_NEXT_IP4_INPUT == next_index + || LISP_GPE_INPUT_NEXT_IP6_INPUT == next_index) + return &lgm->l3_ifaces; + else if (LISP_GPE_INPUT_NEXT_L2_INPUT == next_index) + return &lgm->l2_ifaces; + clib_warning("next_index not associated to an interface!"); + return 0; +} + static_always_inline void incr_decap_stats (vnet_main_t * vnm, u32 cpu_index, u32 length, u32 sw_if_index, u32 * last_sw_if_index, u32 * n_packets, u32 * n_bytes) @@ -110,7 +123,7 @@ lisp_gpe_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, { u32 n_left_from, next_index, * from, * to_next, cpu_index; u32 n_bytes = 0, n_packets = 0, last_sw_if_index = ~0, drops = 0; - lisp_gpe_main_t * lgm = &lisp_gpe_main; + lisp_gpe_main_t * lgm = vnet_lisp_gpe_get_main (); cpu_index = os_get_cpu_number(); from = vlib_frame_vector_args (from_frame); @@ -133,6 +146,7 @@ lisp_gpe_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, lisp_gpe_header_t * lh0, * lh1; u32 next0, next1, error0, error1; uword * si0, * si1; + tunnel_lookup_t * tl0, * tl1; /* Prefetch next iteration. */ { @@ -202,17 +216,22 @@ lisp_gpe_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, next1 = next_protocol_to_next_index (lh1, vlib_buffer_get_current (b1)); - /* Required to make the l2 tag push / pop code work on l2 subifs */ - vnet_update_l2_len (b0); - vnet_update_l2_len (b1); + /* determine if tunnel is l2 or l3 */ + tl0 = next_index_to_iface(lgm, next0); + tl1 = next_index_to_iface(lgm, next1); /* map iid/vni to lisp-gpe sw_if_index which is used by ipx_input to * decide the rx vrf and the input features to be applied */ - si0 = hash_get(lgm->tunnel_term_sw_if_index_by_vni, + si0 = hash_get(tl0->sw_if_index_by_vni, clib_net_to_host_u32 (lh0->iid)); - si1 = hash_get(lgm->tunnel_term_sw_if_index_by_vni, + si1 = hash_get(tl1->sw_if_index_by_vni, clib_net_to_host_u32 (lh1->iid)); + + /* Required to make the l2 tag push / pop code work on l2 subifs */ + vnet_update_l2_len (b0); + vnet_update_l2_len (b1); + if (si0) { incr_decap_stats (lgm->vnet_main, cpu_index, @@ -279,6 +298,7 @@ lisp_gpe_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, lisp_gpe_header_t * lh0; u32 error0; uword * si0; + tunnel_lookup_t * tl0; bi0 = from[0]; to_next[0] = bi0; @@ -328,14 +348,17 @@ lisp_gpe_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, next0 = next_protocol_to_next_index (lh0, vlib_buffer_get_current (b0)); - /* Required to make the l2 tag push / pop code work on l2 subifs */ - vnet_update_l2_len (b0); + /* determine if tunnel is l2 or l3 */ + tl0 = next_index_to_iface(lgm, next0); /* map iid/vni to lisp-gpe sw_if_index which is used by ipx_input to * decide the rx vrf and the input features to be applied */ - si0 = hash_get(lgm->tunnel_term_sw_if_index_by_vni, + si0 = hash_get(tl0->sw_if_index_by_vni, clib_net_to_host_u32 (lh0->iid)); + /* Required to make the l2 tag push / pop code work on l2 subifs */ + vnet_update_l2_len (b0); + if (si0) { incr_decap_stats (lgm->vnet_main, cpu_index, diff --git a/vnet/vnet/lisp-gpe/interface.c b/vnet/vnet/lisp-gpe/interface.c index 566fe7734ca..1f40ea86bbf 100644 --- a/vnet/vnet/lisp-gpe/interface.c +++ b/vnet/vnet/lisp-gpe/interface.c @@ -82,13 +82,15 @@ encap_one_inline (lisp_gpe_main_t * lgm, vlib_buffer_t * b0, { ip_udp_encap_one (lgm->vlib_main, b0, t0->rewrite, 36, 1); next0[0] = LISP_GPE_TX_NEXT_IP4_LOOKUP; - } else { ip_udp_encap_one (lgm->vlib_main, b0, t0->rewrite, 56, 0); next0[0] = LISP_GPE_TX_NEXT_IP6_LOOKUP; } + + /* Reset to look up tunnel partner in the configured FIB */ + vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->encap_fib_index; } always_inline void @@ -144,6 +146,10 @@ encap_two_inline (lisp_gpe_main_t * lgm, vlib_buffer_t * b0, vlib_buffer_t * b1, ip_udp_encap_one (lgm->vlib_main, b1, t1->rewrite, 56, 0); next0[0] = next1[0] = LISP_GPE_TX_NEXT_IP6_LOOKUP; } + + /* Reset to look up tunnel partner in the configured FIB */ + vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->encap_fib_index; + vnet_buffer(b1)->sw_if_index[VLIB_TX] = t1->encap_fib_index; } #define is_v4_packet(_h) ((*(u8*) _h) & 0xF0) == 0x40 @@ -231,10 +237,6 @@ lisp_gpe_interface_tx (vlib_main_t * vm, vlib_node_runtime_t * node, ip_addr_version(&t1->dst) == IP4 ? 1 : 0); } - /* Reset to look up tunnel partner in the configured FIB */ - vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->encap_fib_index; - vnet_buffer(b1)->sw_if_index[VLIB_TX] = t1->encap_fib_index; - if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) { lisp_gpe_tx_trace_t *tr = vlib_add_trace (vm, node, b0, @@ -275,9 +277,6 @@ lisp_gpe_interface_tx (vlib_main_t * vm, vlib_node_runtime_t * node, encap_one_inline (lgm, b0, t0, &next0, ip_addr_version(&t0->dst) == IP4 ? 1 : 0); - /* Reset to look up tunnel partner in the configured FIB */ - vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->encap_fib_index; - if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) { lisp_gpe_tx_trace_t *tr = vlib_add_trace (vm, node, b0, @@ -474,24 +473,380 @@ lisp_gpe_iface_set_table (u32 sw_if_index, u32 table_id, u8 is_ip4) } } -int -vnet_lisp_gpe_add_del_iface (vnet_lisp_gpe_add_del_iface_args_t * a, - u32 * hw_if_indexp) +#define foreach_l2_lisp_gpe_tx_next \ + _(DROP, "error-drop") \ + _(IP4_LOOKUP, "ip4-lookup") \ + _(IP6_LOOKUP, "ip6-lookup") \ + _(LISP_CP_LOOKUP, "lisp-cp-lookup") + +typedef enum +{ +#define _(sym,str) L2_LISP_GPE_TX_NEXT_##sym, + foreach_l2_lisp_gpe_tx_next +#undef _ + L2_LISP_GPE_TX_N_NEXT, +} l2_lisp_gpe_tx_next_t; + +typedef struct +{ + u32 tunnel_index; +} l2_lisp_gpe_tx_trace_t; + +u8 * +format_l2_lisp_gpe_tx_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + l2_lisp_gpe_tx_trace_t * t = va_arg (*args, l2_lisp_gpe_tx_trace_t *); + + s = format (s, "L2-LISP-GPE-TX: tunnel %d", t->tunnel_index); + return s; +} + +always_inline void +l2_process_tunnel_action (vlib_buffer_t * b0, u8 action, u32 * next0) { + if (LISP_SEND_MAP_REQUEST == action) + { + next0[0] = L2_LISP_GPE_TX_NEXT_LISP_CP_LOOKUP; + vnet_buffer(b0)->lisp.overlay_afi = LISP_AFI_MAC; + } + else + { + next0[0] = L2_LISP_GPE_TX_NEXT_DROP; + } +} + +always_inline void +l2_process_one (lisp_gpe_main_t * lgm, vlib_buffer_t * b0, u32 ti0, u32 * next0) +{ + lisp_gpe_tunnel_t * t0; + + t0 = pool_elt_at_index(lgm->tunnels, ti0); + ASSERT(0 != t0); + + if (PREDICT_TRUE(LISP_NO_ACTION == t0->action)) + { + encap_one_inline (lgm, b0, t0, next0, + ip_addr_version(&t0->dst) == IP4 ? 1 : 0); + } + else + { + l2_process_tunnel_action(b0, t0->action, next0); + } +} + +always_inline void +l2_process_two (lisp_gpe_main_t * lgm, vlib_buffer_t * b0, vlib_buffer_t * b1, + u32 ti0, u32 ti1, u32 * next0, u32 * next1) +{ + lisp_gpe_tunnel_t * t0, * t1; + + t0 = pool_elt_at_index(lgm->tunnels, ti0); + t1 = pool_elt_at_index(lgm->tunnels, ti1); + + ASSERT(0 != t0 && 0 != t1); + + if (PREDICT_TRUE(LISP_NO_ACTION == t0->action + && LISP_NO_ACTION == t1->action)) + { + encap_two_inline (lgm, b0, b1, t0, t1, next0, next1, + ip_addr_version(&t0->dst) == IP4 ? 1 : 0); + } + else + { + if (LISP_NO_ACTION == t0->action) + { + encap_one_inline (lgm, b0, t0, next0, + ip_addr_version(&t0->dst) == IP4 ? 1 : 0); + l2_process_tunnel_action (b1, t1->action, next1); + } + else if (LISP_NO_ACTION == t1->action) + { + encap_one_inline (lgm, b1, t1, next1, + ip_addr_version(&t1->dst) == IP4 ? 1 : 0); + l2_process_tunnel_action (b0, t0->action, next0); + } + else + { + l2_process_tunnel_action (b0, t0->action, next0); + l2_process_tunnel_action (b1, t1->action, next1); + } + } +} + +static uword +l2_lisp_gpe_interface_tx (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, next_index, * from, * to_next; lisp_gpe_main_t * lgm = &lisp_gpe_main; - vnet_main_t * vnm = lgm->vnet_main; - vnet_hw_interface_t * hi; - u32 hw_if_index = ~0, lookup_next_index4, lookup_next_index6, flen; - uword * hip, * vni, * si; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + u32 next0, next1, ti0, ti1; + lisp_gpe_tunnel_t * t0 = 0, * t1 = 0; + ethernet_header_t * e0, * e1; + + next0 = next1 = L2_LISP_GPE_TX_NEXT_LISP_CP_LOOKUP; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, *p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header(p2, LOAD); + vlib_prefetch_buffer_header(p3, LOAD); + + CLIB_PREFETCH(p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH(p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD); + } + + bi0 = from[0]; + bi1 = from[1]; + to_next[0] = bi0; + to_next[1] = bi1; + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + e0 = vlib_buffer_get_current (b0); + e1 = vlib_buffer_get_current (b1); + + /* lookup dst + src mac */ + ti0 = lisp_l2_fib_lookup (lgm, vnet_buffer(b0)->l2.bd_index, + e0->src_address, e0->dst_address); + ti1 = lisp_l2_fib_lookup (lgm, vnet_buffer(b1)->l2.bd_index, + e1->src_address, e1->dst_address); + + if (PREDICT_TRUE((u32)~0 != ti0) && (u32)~0 != ti1) + { + /* process both tunnels */ + l2_process_two (lgm, b0, b1, ti0, ti1, &next0, &next1); + } + else + { + if ((u32)~0 != ti0) + { + /* process tunnel for b0 */ + l2_process_one (lgm, b0, ti0, &next0); + + /* no tunnel found for b1, send to control plane */ + next1 = L2_LISP_GPE_TX_NEXT_LISP_CP_LOOKUP; + vnet_buffer(b1)->lisp.overlay_afi = LISP_AFI_MAC; + } + else if ((u32)~0 != ti1) + { + /* process tunnel for b1 */ + l2_process_one (lgm, b1, ti1, &next1); + + /* no tunnel found b0, send to control plane */ + next0 = L2_LISP_GPE_TX_NEXT_LISP_CP_LOOKUP; + vnet_buffer(b0)->lisp.overlay_afi = LISP_AFI_MAC; + } + else + { + /* no tunnels found */ + next0 = L2_LISP_GPE_TX_NEXT_LISP_CP_LOOKUP; + vnet_buffer(b0)->lisp.overlay_afi = LISP_AFI_MAC; + next1 = L2_LISP_GPE_TX_NEXT_LISP_CP_LOOKUP; + vnet_buffer(b1)->lisp.overlay_afi = LISP_AFI_MAC; + } + } + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + l2_lisp_gpe_tx_trace_t *tr = vlib_add_trace (vm, node, b0, + sizeof(*tr)); + tr->tunnel_index = t0 - lgm->tunnels; + } + if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED)) + { + l2_lisp_gpe_tx_trace_t *tr = vlib_add_trace (vm, node, b1, + sizeof(*tr)); + tr->tunnel_index = t1 - lgm->tunnels; + } + + vlib_validate_buffer_enqueue_x2(vm, node, next_index, to_next, + n_left_to_next, bi0, bi1, next0, + next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t * b0; + u32 bi0, ti0, next0 = L2_LISP_GPE_TX_NEXT_LISP_CP_LOOKUP; + ethernet_header_t * e0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + e0 = vlib_buffer_get_current (b0); + + /* lookup dst + src mac */ + ti0 = lisp_l2_fib_lookup (lgm, vnet_buffer(b0)->l2.bd_index, + e0->src_address, e0->dst_address); + + if (PREDICT_TRUE((u32)~0 != ti0)) + { + l2_process_one (lgm, b0, ti0, &next0); + } + else + { + /* no tunnel found send to control plane */ + next0 = L2_LISP_GPE_TX_NEXT_LISP_CP_LOOKUP; + vnet_buffer(b0)->lisp.overlay_afi = LISP_AFI_MAC; + } + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + l2_lisp_gpe_tx_trace_t *tr = vlib_add_trace (vm, node, b0, + sizeof(*tr)); + tr->tunnel_index = ti0 ? ti0 : ~0; + } + vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return from_frame->n_vectors; +} + +static u8 * +format_l2_lisp_gpe_name (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + return format (s, "l2_lisp_gpe%d", dev_instance); +} + +VNET_DEVICE_CLASS (l2_lisp_gpe_device_class,static) = { + .name = "L2_LISP_GPE", + .format_device_name = format_l2_lisp_gpe_name, + .format_tx_trace = format_lisp_gpe_tx_trace, + .tx_function = l2_lisp_gpe_interface_tx, + .no_flatten_output_chains = 1, +}; + + +static vnet_hw_interface_t * +create_lisp_gpe_iface (lisp_gpe_main_t * lgm, u32 vni, u32 dp_table, + vnet_device_class_t * dev_class, + tunnel_lookup_t * tuns) +{ + u32 flen; + u32 hw_if_index = ~0; u8 * new_name; + vnet_hw_interface_t * hi; + vnet_main_t * vnm = lgm->vnet_main; - if (vnet_lisp_gpe_enable_disable_status() == 0) + /* create hw lisp_gpeX iface if needed, otherwise reuse existing */ + flen = vec_len(lgm->free_tunnel_hw_if_indices); + if (flen > 0) { - clib_warning ("LISP is disabled!"); - return VNET_API_ERROR_LISP_DISABLED; + hw_if_index = lgm->free_tunnel_hw_if_indices[flen - 1]; + _vec_len(lgm->free_tunnel_hw_if_indices) -= 1; + + hi = vnet_get_hw_interface (vnm, hw_if_index); + + /* rename interface */ + new_name = format (0, "%U", dev_class->format_device_name, + vni); + + vec_add1(new_name, 0); + vnet_rename_interface (vnm, hw_if_index, (char *) new_name); + vec_free(new_name); + + /* clear old stats of freed interface before reuse */ + vnet_interface_main_t * im = &vnm->interface_main; + vnet_interface_counter_lock (im); + vlib_zero_combined_counter ( + &im->combined_sw_if_counters[VNET_INTERFACE_COUNTER_TX], + hi->sw_if_index); + vlib_zero_combined_counter ( + &im->combined_sw_if_counters[VNET_INTERFACE_COUNTER_RX], + hi->sw_if_index); + vlib_zero_simple_counter ( + &im->sw_if_counters[VNET_INTERFACE_COUNTER_DROP], + hi->sw_if_index); + vnet_interface_counter_unlock (im); } + else + { + hw_if_index = vnet_register_interface (vnm, dev_class->index, vni, + lisp_gpe_hw_class.index, 0); + hi = vnet_get_hw_interface (vnm, hw_if_index); + } + + hash_set(tuns->hw_if_index_by_dp_table, dp_table, hw_if_index); - hip = hash_get(lgm->lisp_gpe_hw_if_index_by_table_id, a->table_id); + /* set tunnel termination: post decap, packets are tagged as having been + * originated by lisp-gpe interface */ + hash_set(tuns->sw_if_index_by_vni, vni, hi->sw_if_index); + hash_set(tuns->vni_by_sw_if_index, hi->sw_if_index, vni); + + return hi; +} + +static void +remove_lisp_gpe_iface (lisp_gpe_main_t * lgm, u32 hi_index, u32 dp_table, + tunnel_lookup_t * tuns) +{ + vnet_main_t * vnm = lgm->vnet_main; + vnet_hw_interface_t * hi; + uword * vnip; + + hi = vnet_get_hw_interface (vnm, hi_index); + + /* disable interface */ + vnet_sw_interface_set_flags (vnm, hi->sw_if_index, 0/* down */); + vnet_hw_interface_set_flags (vnm, hi->hw_if_index, 0/* down */); + hash_unset(tuns->hw_if_index_by_dp_table, dp_table); + vec_add1(lgm->free_tunnel_hw_if_indices, hi->hw_if_index); + + /* clean tunnel termination and vni to sw_if_index binding */ + vnip = hash_get(tuns->vni_by_sw_if_index, hi->sw_if_index); + hash_unset(tuns->sw_if_index_by_vni, vnip[0]); + hash_unset(tuns->vni_by_sw_if_index, hi->sw_if_index); +} + +static int +lisp_gpe_add_del_l3_iface (lisp_gpe_main_t * lgm, + vnet_lisp_gpe_add_del_iface_args_t * a) +{ + vnet_main_t * vnm = lgm->vnet_main; + tunnel_lookup_t * l3_ifaces = &lgm->l3_ifaces; + vnet_hw_interface_t * hi; + u32 lookup_next_index4, lookup_next_index6; + uword * hip, * si; + + hip = hash_get(l3_ifaces->hw_if_index_by_dp_table, a->table_id); if (a->is_add) { @@ -501,58 +856,16 @@ vnet_lisp_gpe_add_del_iface (vnet_lisp_gpe_add_del_iface_args_t * a, return -1; } - si = hash_get(lgm->tunnel_term_sw_if_index_by_vni, a->vni); + si = hash_get(l3_ifaces->sw_if_index_by_vni, a->vni); if (si) { clib_warning ("Interface for vni %d already exists", a->vni); return -1; } - /* create hw lisp_gpeX iface if needed, otherwise reuse existing */ - flen = vec_len(lgm->free_lisp_gpe_tunnel_hw_if_indices); - if (flen > 0) - { - hw_if_index = lgm->free_lisp_gpe_tunnel_hw_if_indices[flen - 1]; - _vec_len(lgm->free_lisp_gpe_tunnel_hw_if_indices) -= 1; - - hi = vnet_get_hw_interface (vnm, hw_if_index); - - /* rename interface */ - new_name = format (0, "%U", lisp_gpe_device_class.format_device_name, - a->vni); - vec_add1(new_name, 0); - vnet_rename_interface (vnm, hw_if_index, (char *) new_name); - vec_free(new_name); - - /* clear old stats of freed interface before reuse */ - vnet_interface_main_t * im = &vnm->interface_main; - vnet_interface_counter_lock (im); - vlib_zero_combined_counter ( - &im->combined_sw_if_counters[VNET_INTERFACE_COUNTER_TX], - hi->sw_if_index); - vlib_zero_combined_counter ( - &im->combined_sw_if_counters[VNET_INTERFACE_COUNTER_RX], - hi->sw_if_index); - vlib_zero_simple_counter ( - &im->sw_if_counters[VNET_INTERFACE_COUNTER_DROP], - hi->sw_if_index); - vnet_interface_counter_unlock (im); - } - else - { - hw_if_index = vnet_register_interface (vnm, - lisp_gpe_device_class.index, - a->vni, - lisp_gpe_hw_class.index, 0); - hi = vnet_get_hw_interface (vnm, hw_if_index); - } - - hash_set(lgm->lisp_gpe_hw_if_index_by_table_id, a->table_id, hw_if_index); - - /* set tunnel termination: post decap, packets are tagged as having been - * originated by lisp-gpe interface */ - hash_set(lgm->tunnel_term_sw_if_index_by_vni, a->vni, hi->sw_if_index); - hash_set(lgm->vni_by_tunnel_term_sw_if_index, hi->sw_if_index, a->vni); + /* create lisp iface and populate tunnel tables */ + hi = create_lisp_gpe_iface (lgm, a->vni, a->table_id, + &lisp_gpe_device_class, l3_ifaces); /* set ingress arc from lgpe_ipX_lookup */ lookup_next_index4 = vlib_node_add_next (lgm->vlib_main, @@ -593,18 +906,8 @@ vnet_lisp_gpe_add_del_iface (vnet_lisp_gpe_add_del_iface_args_t * a, clib_warning("The interface for vrf %d doesn't exist", a->table_id); return -1; } - hi = vnet_get_hw_interface (vnm, hip[0]); - - /* disable interface */ - vnet_sw_interface_set_flags (vnm, hi->sw_if_index, 0/* down */); - vnet_hw_interface_set_flags (vnm, hi->hw_if_index, 0/* down */); - hash_unset(lgm->lisp_gpe_hw_if_index_by_table_id, a->table_id); - vec_add1(lgm->free_lisp_gpe_tunnel_hw_if_indices, hi->hw_if_index); - /* clean tunnel termination and vni to sw_if_index binding */ - vni = hash_get(lgm->vni_by_tunnel_term_sw_if_index, hi->sw_if_index); - hash_unset(lgm->tunnel_term_sw_if_index_by_vni, vni[0]); - hash_unset(lgm->vni_by_tunnel_term_sw_if_index, hi->sw_if_index); + remove_lisp_gpe_iface (lgm, hip[0], a->table_id, &lgm->l3_ifaces); /* unset default routes */ add_del_lisp_gpe_default_route (a->table_id, /* is_v4 */1, 0); @@ -614,6 +917,86 @@ vnet_lisp_gpe_add_del_iface (vnet_lisp_gpe_add_del_iface_args_t * a, return 0; } +static int +lisp_gpe_add_del_l2_iface (lisp_gpe_main_t * lgm, + vnet_lisp_gpe_add_del_iface_args_t * a) +{ + vnet_main_t * vnm = lgm->vnet_main; + tunnel_lookup_t * l2_ifaces = &lgm->l2_ifaces; + vnet_hw_interface_t * hi; + uword * hip, * si; + u16 bd_index; + + bd_index = bd_find_or_add_bd_index(&bd_main, a->bd_id); + hip = hash_get(l2_ifaces->hw_if_index_by_dp_table, bd_index); + + if (a->is_add) + { + if (hip) + { + clib_warning("bridge domain %d already mapped to a vni", a->bd_id); + return -1; + } + + si = hash_get(l2_ifaces->sw_if_index_by_vni, a->vni); + if (si) + { + clib_warning ("Interface for vni %d already exists", a->vni); + return -1; + } + + /* create lisp iface and populate tunnel tables */ + hi = create_lisp_gpe_iface (lgm, a->vni, bd_index, + &l2_lisp_gpe_device_class, &lgm->l2_ifaces); + + /* add iface to l2 bridge domain */ + set_int_l2_mode (lgm->vlib_main, vnm, MODE_L2_BRIDGE, hi->sw_if_index, + bd_index, 0, 0, 0); + + /* set egress arcs */ +#define _(sym,str) vlib_node_add_named_next_with_slot (vnm->vlib_main, \ + hi->tx_node_index, str, L2_LISP_GPE_TX_NEXT_##sym); + foreach_l2_lisp_gpe_tx_next +#undef _ + + /* enable interface */ + vnet_sw_interface_set_flags (vnm, hi->sw_if_index, + VNET_SW_INTERFACE_FLAG_ADMIN_UP); + vnet_hw_interface_set_flags (vnm, hi->hw_if_index, + VNET_HW_INTERFACE_FLAG_LINK_UP); + } + else + { + if (hip == 0) + { + clib_warning("The interface for bridge domain %d doesn't exist", + a->bd_id); + return -1; + } + remove_lisp_gpe_iface (lgm, hip[0], bd_index, &lgm->l2_ifaces); + } + + return 0; +} + +int +vnet_lisp_gpe_add_del_iface (vnet_lisp_gpe_add_del_iface_args_t * a, + u32 * hw_if_indexp) +{ + lisp_gpe_main_t * lgm = &lisp_gpe_main; + + if (vnet_lisp_gpe_enable_disable_status() == 0) + { + clib_warning ("LISP is disabled!"); + return VNET_API_ERROR_LISP_DISABLED; + } + + if (!a->is_l2) + return lisp_gpe_add_del_l3_iface (lgm, a); + else + return lisp_gpe_add_del_l2_iface (lgm, a); +} + static clib_error_t * lisp_gpe_add_del_iface_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) @@ -622,8 +1005,8 @@ lisp_gpe_add_del_iface_command_fn (vlib_main_t * vm, unformat_input_t * input, u8 is_add = 1; clib_error_t * error = 0; int rv = 0; - u32 table_id, vni; - u8 vni_is_set = 0, vrf_is_set = 0; + u32 table_id, vni, bd_id; + u8 vni_is_set = 0, vrf_is_set = 0, bd_index_is_set = 0; vnet_lisp_gpe_add_del_iface_args_t _a, * a = &_a; @@ -645,6 +1028,10 @@ lisp_gpe_add_del_iface_command_fn (vlib_main_t * vm, unformat_input_t * input, { vni_is_set = 1; } + else if (unformat (line_input, "bd %d", &bd_id)) + { + bd_index_is_set = 1; + } else { return clib_error_return (0, "parse error: '%U'", @@ -652,14 +1039,20 @@ lisp_gpe_add_del_iface_command_fn (vlib_main_t * vm, unformat_input_t * input, } } - if (!vni_is_set || !vrf_is_set) - { - return clib_error_return (0, "Both vni and vrf must be set!"); - } + if (vrf_is_set && bd_index_is_set) + return clib_error_return(0, "Cannot set both vrf and brdige domain index!"); + + if (!vni_is_set) + return clib_error_return(0, "vni must be set!"); + + if (!vrf_is_set && !bd_index_is_set) + return clib_error_return(0, "vrf or bridge domain index must be set!"); a->is_add = is_add; - a->table_id = table_id; + a->dp_table = vrf_is_set ? table_id : bd_id; a->vni = vni; + a->is_l2 = bd_index_is_set; + rv = vnet_lisp_gpe_add_del_iface (a, 0); if (0 != rv) { diff --git a/vnet/vnet/lisp-gpe/ip_forward.c b/vnet/vnet/lisp-gpe/ip_forward.c index 9d999faa0e7..a8d53ee3588 100644 --- a/vnet/vnet/lisp-gpe/ip_forward.c +++ b/vnet/vnet/lisp-gpe/ip_forward.c @@ -895,6 +895,12 @@ lgpe_ip4_lookup (vlib_main_t * vm, vlib_node_runtime_t * node, vnet_buffer (b0)->sw_if_index[VLIB_TX] = src_adj0->rewrite_header.sw_if_index; } + else + { + next0 = LGPE_IP4_LOOKUP_NEXT_LISP_CP_LOOKUP; + vnet_buffer (b0)->lisp.overlay_afi = LISP_AFI_IP; + } + if (src_fib_index1 != (u32) ~0) { ip4_src_fib_lookup_one (lgm, src_fib_index1, @@ -905,6 +911,11 @@ lgpe_ip4_lookup (vlib_main_t * vm, vlib_node_runtime_t * node, vnet_buffer (b1)->sw_if_index[VLIB_TX] = src_adj1->rewrite_header.sw_if_index; } + else + { + next1 = LGPE_IP4_LOOKUP_NEXT_LISP_CP_LOOKUP; + vnet_buffer (b1)->lisp.overlay_afi = LISP_AFI_IP; + } } vlib_validate_buffer_enqueue_x2(vm, node, next_index, to_next, @@ -949,6 +960,11 @@ lgpe_ip4_lookup (vlib_main_t * vm, vlib_node_runtime_t * node, vnet_buffer (b0)->sw_if_index[VLIB_TX] = src_adj0->rewrite_header.sw_if_index; } + else + { + next0 = LGPE_IP4_LOOKUP_NEXT_LISP_CP_LOOKUP; + vnet_buffer (b0)->lisp.overlay_afi = LISP_AFI_IP; + } vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, bi0, next0); @@ -1111,6 +1127,12 @@ lgpe_ip6_lookup (vlib_main_t * vm, vlib_node_runtime_t * node, vnet_buffer (b0)->sw_if_index[VLIB_TX] = src_adj0->rewrite_header.sw_if_index; } + else + { + next0 = LGPE_IP4_LOOKUP_NEXT_LISP_CP_LOOKUP; + vnet_buffer (b0)->lisp.overlay_afi = LISP_AFI_IP6; + } + if (src_fib_index1 != (u32) ~0) { src_adj_index1 = ip6_src_fib_lookup (lgm, src_fib_index1, @@ -1121,6 +1143,11 @@ lgpe_ip6_lookup (vlib_main_t * vm, vlib_node_runtime_t * node, vnet_buffer (b1)->sw_if_index[VLIB_TX] = src_adj1->rewrite_header.sw_if_index; } + else + { + next1 = LGPE_IP4_LOOKUP_NEXT_LISP_CP_LOOKUP; + vnet_buffer (b1)->lisp.overlay_afi = LISP_AFI_IP6; + } } vlib_validate_buffer_enqueue_x2(vm, node, next_index, to_next, @@ -1166,6 +1193,11 @@ lgpe_ip6_lookup (vlib_main_t * vm, vlib_node_runtime_t * node, vnet_buffer (b0)->sw_if_index[VLIB_TX] = src_adj0->rewrite_header.sw_if_index; } + else + { + next0 = LGPE_IP4_LOOKUP_NEXT_LISP_CP_LOOKUP; + vnet_buffer (b0)->lisp.overlay_afi = LISP_AFI_IP6; + } vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, bi0, next0); diff --git a/vnet/vnet/lisp-gpe/lisp_gpe.c b/vnet/vnet/lisp-gpe/lisp_gpe.c index c1ae5c42445..812e0aed7ad 100644 --- a/vnet/vnet/lisp-gpe/lisp_gpe.c +++ b/vnet/vnet/lisp-gpe/lisp_gpe.c @@ -95,23 +95,31 @@ lisp_gpe_rewrite (lisp_gpe_tunnel_t * t) _(encap_fib_index) \ _(decap_fib_index) \ _(decap_next_index) \ -_(vni) +_(vni) \ +_(action) -static u32 -add_del_ip_tunnel (vnet_lisp_gpe_add_del_fwd_entry_args_t *a, +static int +add_del_ip_tunnel (vnet_lisp_gpe_add_del_fwd_entry_args_t *a, u8 is_l2, u32 * tun_index_res) { lisp_gpe_main_t * lgm = &lisp_gpe_main; lisp_gpe_tunnel_t *t = 0; + lisp_gpe_tunnel_key_t key; uword * p; int rv; - lisp_gpe_tunnel_key_t key; /* prepare tunnel key */ memset(&key, 0, sizeof(key)); - ip_prefix_copy(&key.eid, &gid_address_ippref(&a->deid)); - ip_address_copy(&key.dst_loc, &a->dlocator); - key.iid = clib_host_to_net_u32 (a->vni); + + /* fill in the key's remote eid */ + if (!is_l2) + ip_prefix_copy (&key.rmt_ippref, &gid_address_ippref(&a->rmt_eid)); + else + mac_copy (&key.rmt_mac, &gid_address_mac(&a->rmt_eid)); + + + ip_address_copy(&key.rmt_loc, &a->rmt_loc); + key.vni = clib_host_to_net_u32 (a->vni); p = mhash_get (&lgm->lisp_gpe_tunnel_by_key, &key); @@ -132,20 +140,24 @@ add_del_ip_tunnel (vnet_lisp_gpe_add_del_fwd_entry_args_t *a, foreach_copy_field; #undef _ - ip_address_copy(&t->src, &a->slocator); - ip_address_copy(&t->dst, &a->dlocator); + ip_address_copy(&t->src, &a->lcl_loc); + ip_address_copy(&t->dst, &a->rmt_loc); /* if vni is non-default */ if (a->vni) - { - t->flags = LISP_GPE_FLAGS_I; - t->vni = a->vni; - } + t->flags = LISP_GPE_FLAGS_I; + /* work in lisp-gpe not legacy mode */ t->flags |= LISP_GPE_FLAGS_P; - t->next_protocol = ip_prefix_version(&key.eid) == IP4 ? - LISP_GPE_NEXT_PROTO_IP4 : LISP_GPE_NEXT_PROTO_IP6; + /* next proto */ + if (!is_l2) + t->next_protocol = ip_prefix_version(&key.rmt_ippref) == IP4 ? + LISP_GPE_NEXT_PROTO_IP4 : LISP_GPE_NEXT_PROTO_IP6; + else + t->next_protocol = LISP_GPE_NEXT_PROTO_ETHERNET; + + /* compute rewrite */ rv = lisp_gpe_rewrite (t); if (rv) @@ -166,7 +178,7 @@ add_del_ip_tunnel (vnet_lisp_gpe_add_del_fwd_entry_args_t *a, if (!p) { clib_warning("Tunnel for eid %U doesn't exist!", format_gid_address, - &a->deid); + &a->rmt_eid); return VNET_API_ERROR_NO_SUCH_ENTRY; } @@ -182,12 +194,12 @@ add_del_ip_tunnel (vnet_lisp_gpe_add_del_fwd_entry_args_t *a, } static int -add_del_negative_fwd_entry (lisp_gpe_main_t * lgm, +add_del_negative_ip_fwd_entry (lisp_gpe_main_t * lgm, vnet_lisp_gpe_add_del_fwd_entry_args_t * a) { ip_adjacency_t adj; - ip_prefix_t * dpref = &gid_address_ippref(&a->deid); - ip_prefix_t * spref = &gid_address_ippref(&a->seid); + ip_prefix_t * dpref = &gid_address_ippref(&a->rmt_eid); + ip_prefix_t * spref = &gid_address_ippref(&a->lcl_eid); /* setup adjacency for eid */ memset (&adj, 0, sizeof(adj)); @@ -203,12 +215,12 @@ add_del_negative_fwd_entry (lisp_gpe_main_t * lgm, switch (a->action) { - case NO_ACTION: + case LISP_NO_ACTION: /* TODO update timers? */ - case FORWARD_NATIVE: + case LISP_FORWARD_NATIVE: /* TODO check if route/next-hop for eid exists in fib and add * more specific for the eid with the next-hop found */ - case SEND_MAP_REQUEST: + case LISP_SEND_MAP_REQUEST: /* insert tunnel that always sends map-request */ adj.explicit_fib_index = (ip_prefix_version(dpref) == IP4) ? LGPE_IP4_LOOKUP_NEXT_LISP_CP_LOOKUP: @@ -216,7 +228,7 @@ add_del_negative_fwd_entry (lisp_gpe_main_t * lgm, /* add/delete route for prefix */ return ip_sd_fib_add_del_route (lgm, dpref, spref, a->table_id, &adj, a->is_add); - case DROP: + case LISP_DROP: /* for drop fwd entries, just add route, no need to add encap tunnel */ adj.explicit_fib_index = (ip_prefix_version(dpref) == IP4 ? LGPE_IP4_LOOKUP_NEXT_DROP : LGPE_IP6_LOOKUP_NEXT_DROP); @@ -229,36 +241,29 @@ add_del_negative_fwd_entry (lisp_gpe_main_t * lgm, } } -int -vnet_lisp_gpe_add_del_fwd_entry (vnet_lisp_gpe_add_del_fwd_entry_args_t * a, - u32 * hw_if_indexp) +static int +add_del_ip_fwd_entry (lisp_gpe_main_t * lgm, + vnet_lisp_gpe_add_del_fwd_entry_args_t * a) { - lisp_gpe_main_t * lgm = &lisp_gpe_main; ip_adjacency_t adj, * adjp; u32 adj_index, rv, tun_index = ~0; ip_prefix_t * dpref, * spref; uword * lookup_next_index, * lgpe_sw_if_index, * lnip; u8 ip_ver; - if (vnet_lisp_gpe_enable_disable_status() == 0) - { - clib_warning ("LISP is disabled!"); - return VNET_API_ERROR_LISP_DISABLED; - } - /* treat negative fwd entries separately */ if (a->is_negative) - return add_del_negative_fwd_entry (lgm, a); - - dpref = &gid_address_ippref(&a->deid); - spref = &gid_address_ippref(&a->seid); - ip_ver = ip_prefix_version(dpref); + return add_del_negative_ip_fwd_entry (lgm, a); /* add/del tunnel to tunnels pool and prepares rewrite */ - rv = add_del_ip_tunnel (a, &tun_index); + rv = add_del_ip_tunnel (a, 0 /* is_l2 */, &tun_index); if (rv) return rv; + dpref = &gid_address_ippref(&a->rmt_eid); + spref = &gid_address_ippref(&a->lcl_eid); + ip_ver = ip_prefix_version(dpref); + /* setup adjacency for eid */ memset (&adj, 0, sizeof(adj)); adj.n_adj = 1; @@ -276,8 +281,7 @@ vnet_lisp_gpe_add_del_fwd_entry (vnet_lisp_gpe_add_del_fwd_entry_args_t * a, lgm->lgpe_ip4_lookup_next_index_by_table_id : lgm->lgpe_ip6_lookup_next_index_by_table_id; lookup_next_index = hash_get(lnip, a->table_id); - lgpe_sw_if_index = hash_get(lgm->tunnel_term_sw_if_index_by_vni, - a->vni); + lgpe_sw_if_index = hash_get(lgm->l3_ifaces.sw_if_index_by_vni, a->vni); /* the assumption is that the interface must've been created before * programming the dp */ @@ -311,6 +315,122 @@ vnet_lisp_gpe_add_del_fwd_entry (vnet_lisp_gpe_add_del_fwd_entry_args_t * a, return rv; } +static void +make_mac_fib_key (BVT(clib_bihash_kv) *kv, u16 bd_index, u8 src_mac[6], + u8 dst_mac[6]) +{ + kv->key[0] = (((u64) bd_index) << 48) | (((u64 *)dst_mac)[0] & MAC_BIT_MASK); + kv->key[1] = ((u64 *)src_mac)[0] & MAC_BIT_MASK; + kv->key[2] = 0; +} + +u32 +lisp_l2_fib_lookup (lisp_gpe_main_t * lgm, u16 bd_index, u8 src_mac[6], + u8 dst_mac[6]) +{ + int rv; + BVT(clib_bihash_kv) kv, value; + + make_mac_fib_key(&kv, bd_index, src_mac, dst_mac); + rv = BV(clib_bihash_search_inline_2) (&lgm->l2_fib, &kv, &value); + + /* no match, try with src 0, catch all for dst */ + if (rv != 0) + { + kv.key[1] = 0; + rv = BV(clib_bihash_search_inline_2) (&lgm->l2_fib, &kv, &value); + if (rv == 0) + return value.value; + } + + return ~0; +} + +u32 +lisp_l2_fib_add_del_entry (lisp_gpe_main_t * lgm, u16 bd_index, u8 src_mac[6], + u8 dst_mac[6], u32 val, u8 is_add) +{ + BVT(clib_bihash_kv) kv, value; + u32 old_val = ~0; + + make_mac_fib_key(&kv, bd_index, src_mac, dst_mac); + + if (BV(clib_bihash_search) (&lgm->l2_fib, &kv, &value) == 0) + old_val = value.value; + + if (!is_add) + BV(clib_bihash_add_del) (&lgm->l2_fib, &kv, 0 /* is_add */); + else + { + kv.value = val; + BV(clib_bihash_add_del) (&lgm->l2_fib, &kv, 1 /* is_add */); + } + return old_val; +} + +static void +l2_fib_init (lisp_gpe_main_t * lgm) +{ + BV(clib_bihash_init) (&lgm->l2_fib, "l2 fib", + 1 << max_log2 (L2_FIB_DEFAULT_HASH_NUM_BUCKETS), + L2_FIB_DEFAULT_HASH_MEMORY_SIZE); +} + +static int +add_del_l2_fwd_entry (lisp_gpe_main_t * lgm, + vnet_lisp_gpe_add_del_fwd_entry_args_t * a) +{ + int rv; + u32 tun_index; + bd_main_t * bdm = &bd_main; + uword * bd_indexp; + + /* create tunnel */ + rv = add_del_ip_tunnel (a, 1 /* is_l2 */, &tun_index); + if (rv) + return rv; + + bd_indexp = hash_get(bdm->bd_index_by_bd_id, a->bd_id); + if (!bd_indexp) + { + clib_warning("bridge domain %d doesn't exist", a->bd_id); + return -1; + } + + /* add entry to l2 lisp fib */ + lisp_l2_fib_add_del_entry (lgm, bd_indexp[0], gid_address_mac(&a->lcl_eid), + gid_address_mac(&a->rmt_eid), tun_index, + a->is_add); + return 0; +} + + +int +vnet_lisp_gpe_add_del_fwd_entry (vnet_lisp_gpe_add_del_fwd_entry_args_t * a, + u32 * hw_if_indexp) +{ + lisp_gpe_main_t * lgm = &lisp_gpe_main; + u8 type; + + if (vnet_lisp_gpe_enable_disable_status () == 0) + { + clib_warning ("LISP is disabled!"); + return VNET_API_ERROR_LISP_DISABLED; + } + + type = gid_address_type(&a->rmt_eid); + switch (type) + { + case GID_ADDR_IP_PREFIX: + return add_del_ip_fwd_entry (lgm, a); + case GID_ADDR_MAC: + return add_del_l2_fwd_entry (lgm, a); + default: + clib_warning("Forwarding entries for type %d not supported!", type); + return -1; + } +} + static clib_error_t * lisp_gpe_add_del_fwd_entry_command_fn (vlib_main_t * vm, unformat_input_t * input, @@ -428,13 +548,13 @@ lisp_gpe_add_del_fwd_entry_command_fn (vlib_main_t * vm, a->is_add = is_add; a->vni = vni; a->table_id = vrf; - gid_address_copy(&a->seid, leid); - gid_address_copy(&a->deid, reid); + gid_address_copy(&a->lcl_eid, leid); + gid_address_copy(&a->rmt_eid, reid); if (!is_negative) { - a->slocator = llocs[0]; - a->dlocator = rlocs[0]; + a->lcl_loc = llocs[0]; + a->rmt_loc = rlocs[0]; } rv = vnet_lisp_gpe_add_del_fwd_entry (a, 0); @@ -569,7 +689,7 @@ vnet_lisp_gpe_enable_disable (vnet_lisp_gpe_enable_disable_args_t * a) { CLIB_UNUSED(uword * val); hash_pair_t * p; - u32 * table_ids = 0, * table_id; + u32 * dp_tables = 0, * dp_table; lisp_gpe_tunnel_key_t * tunnels = 0, * tunnel; vnet_lisp_gpe_add_del_fwd_entry_args_t _at, * at = &_at; vnet_lisp_gpe_add_del_iface_args_t _ai, * ai= &_ai; @@ -582,26 +702,43 @@ vnet_lisp_gpe_enable_disable (vnet_lisp_gpe_enable_disable_args_t * a) vec_foreach(tunnel, tunnels) { memset(at, 0, sizeof(at[0])); at->is_add = 0; - gid_address_type(&at->deid) = GID_ADDR_IP_PREFIX; - ip_prefix_copy(&gid_address_ippref(&at->deid), &tunnel->eid); - ip_address_copy(&at->dlocator, &tunnel->dst_loc); + gid_address_type(&at->rmt_eid) = GID_ADDR_IP_PREFIX; + ip_prefix_copy(&gid_address_ippref(&at->rmt_eid), &tunnel->rmt_ippref); + ip_address_copy(&at->rmt_loc, &tunnel->rmt_loc); vnet_lisp_gpe_add_del_fwd_entry (at, 0); } vec_free(tunnels); - /* disable all ifaces */ - hash_foreach_pair(p, lgm->lisp_gpe_hw_if_index_by_table_id, ({ - vec_add1(table_ids, p->key); + /* disable all l3 ifaces */ + hash_foreach_pair(p, lgm->l3_ifaces.hw_if_index_by_dp_table, ({ + vec_add1(dp_tables, p->key); })); - vec_foreach(table_id, table_ids) { + vec_foreach(dp_table, dp_tables) { ai->is_add = 0; - ai->table_id = table_id[0]; + ai->table_id = dp_table[0]; + ai->is_l2 = 0; /* disables interface and removes defaults */ vnet_lisp_gpe_add_del_iface(ai, 0); } - vec_free(table_ids); + + /* disable all l2 ifaces */ + _vec_len(dp_tables) = 0; + hash_foreach_pair(p, lgm->l2_ifaces.hw_if_index_by_dp_table, ({ + vec_add1(dp_tables, p->key); + })); + + vec_foreach(dp_table, dp_tables) { + ai->is_add = 0; + ai->bd_id = dp_table[0]; + ai->is_l2 = 1; + + /* disables interface and removes defaults */ + vnet_lisp_gpe_add_del_iface(ai, 0); + } + + vec_free(dp_tables); lgm->is_en = 0; } @@ -651,9 +788,17 @@ lisp_show_iface_command_fn (vlib_main_t * vm, hash_pair_t * p; vlib_cli_output (vm, "%=10s%=12s", "vrf", "hw_if_index"); - hash_foreach_pair (p, lgm->lisp_gpe_hw_if_index_by_table_id, ({ + hash_foreach_pair (p, lgm->l3_ifaces.hw_if_index_by_dp_table, ({ vlib_cli_output (vm, "%=10d%=10d", p->key, p->value[0]); })); + + if (0 != lgm->l2_ifaces.hw_if_index_by_dp_table) + { + vlib_cli_output (vm, "%=10s%=12s", "bd_id", "hw_if_index"); + hash_foreach_pair (p, lgm->l2_ifaces.hw_if_index_by_dp_table, ({ + vlib_cli_output (vm, "%=10d%=10d", p->key, p->value[0]); + })); + } return 0; } @@ -663,6 +808,14 @@ VLIB_CLI_COMMAND (lisp_show_iface_command) = { .function = lisp_show_iface_command_fn, }; + +u8 * +format_vnet_lisp_gpe_status (u8 * s, va_list * args) +{ + lisp_gpe_main_t * lgm = &lisp_gpe_main; + return format (s, "%s", lgm->is_en ? "enabled" : "disabled"); +} + clib_error_t * lisp_gpe_init (vlib_main_t *vm) { @@ -687,6 +840,8 @@ lisp_gpe_init (vlib_main_t *vm) mhash_init (&lgm->lisp_gpe_tunnel_by_key, sizeof(uword), sizeof(lisp_gpe_tunnel_key_t)); + l2_fib_init (lgm); + udp_register_dst_port (vm, UDP_DST_PORT_lisp_gpe, lisp_gpe_ip4_input_node.index, 1 /* is_ip4 */); udp_register_dst_port (vm, UDP_DST_PORT_lisp_gpe6, @@ -694,11 +849,4 @@ lisp_gpe_init (vlib_main_t *vm) return 0; } -u8 * -format_vnet_lisp_gpe_status (u8 * s, va_list * args) -{ - lisp_gpe_main_t * lgm = &lisp_gpe_main; - return format (s, "%s", lgm->is_en ? "enabled" : "disabled"); -} - VLIB_INIT_FUNCTION(lisp_gpe_init); diff --git a/vnet/vnet/lisp-gpe/lisp_gpe.h b/vnet/vnet/lisp-gpe/lisp_gpe.h index 705b64f610b..fd7a1143cbd 100644 --- a/vnet/vnet/lisp-gpe/lisp_gpe.h +++ b/vnet/vnet/lisp-gpe/lisp_gpe.h @@ -27,6 +27,7 @@ #include #include +/* encap headers */ typedef CLIB_PACKED (struct { ip4_header_t ip4; /* 20 bytes */ udp_header_t udp; /* 8 bytes */ @@ -45,9 +46,14 @@ typedef struct { struct { - ip_prefix_t eid; /* within the dp only ip and mac can be eids */ - ip_address_t dst_loc; - u32 iid; + /* within the dp only ip and mac can be eids */ + union + { + ip_prefix_t rmt_ippref; + u8 rmt_mac[6]; + }; + ip_address_t rmt_loc; + u32 vni; }; u8 as_u8[40]; }; @@ -73,6 +79,9 @@ typedef struct u32 hw_if_index; u32 sw_if_index; + /* action for 'negative' tunnels */ + u8 action; + /* LISP header fields in HOST byte order */ u8 flags; u8 ver_res; @@ -85,7 +94,7 @@ typedef struct _(DROP, "error-drop") \ _(IP4_INPUT, "ip4-input") \ _(IP6_INPUT, "ip6-input") \ -_(ETHERNET_INPUT, "ethernet-input") +_(L2_INPUT, "l2-input") typedef enum { #define _(s,n) LISP_GPE_INPUT_NEXT_##s, @@ -121,27 +130,37 @@ typedef struct ip6_src_fib uword lookup_table_size; } ip6_src_fib_t; -typedef struct lisp_gpe_main +typedef struct tunnel_lookup { - /* Pool of src fibs that are paired with dst fibs */ - ip4_src_fib_t * ip4_src_fibs; - ip6_src_fib_t * ip6_src_fibs; + /* Lookup lisp-gpe interfaces by dp table (eg. vrf/bridge index) */ + uword * hw_if_index_by_dp_table; - /* vector of encap tunnel instances */ + /* lookup decap tunnel termination sw_if_index by vni and vice versa */ + uword * sw_if_index_by_vni; + uword * vni_by_sw_if_index; +} tunnel_lookup_t; + +typedef struct lisp_gpe_main +{ + /* pool of encap tunnel instances */ lisp_gpe_tunnel_t * tunnels; /* lookup tunnel by key */ mhash_t lisp_gpe_tunnel_by_key; - /* lookup decap tunnel termination sw_if_index by vni and vice versa */ - uword * tunnel_term_sw_if_index_by_vni; - uword * vni_by_tunnel_term_sw_if_index; - /* Free vlib hw_if_indices */ - u32 * free_lisp_gpe_tunnel_hw_if_indices; + u32 * free_tunnel_hw_if_indices; + + u8 is_en; + + /* L3 data structures + * ================== */ + + /* Pool of src fibs that are paired with dst fibs */ + ip4_src_fib_t * ip4_src_fibs; + ip6_src_fib_t * ip6_src_fibs; - /* Lookup lisp-gpe interfaces by vrf */ - uword * lisp_gpe_hw_if_index_by_table_id; + tunnel_lookup_t l3_ifaces; /* Lookup lgpe_ipX_lookup_next by vrf */ uword * lgpe_ip4_lookup_next_index_by_table_id; @@ -151,6 +170,14 @@ typedef struct lisp_gpe_main u32 ip4_lookup_next_lgpe_ip4_lookup; u32 ip6_lookup_next_lgpe_ip6_lookup; + /* L2 data structures + * ================== */ + + /* l2 lisp fib */ + BVT(clib_bihash) l2_fib; + + tunnel_lookup_t l2_ifaces; + /* convenience */ vlib_main_t * vlib_main; vnet_main_t * vnet_main; @@ -158,11 +185,15 @@ typedef struct lisp_gpe_main ip6_main_t * im6; ip_lookup_main_t * lm4; ip_lookup_main_t * lm6; - u8 is_en; } lisp_gpe_main_t; lisp_gpe_main_t lisp_gpe_main; +always_inline lisp_gpe_main_t * +vnet_lisp_gpe_get_main() { + return &lisp_gpe_main; +} + extern vlib_node_registration_t lgpe_ip4_lookup_node; extern vlib_node_registration_t lgpe_ip6_lookup_node; extern vlib_node_registration_t lisp_gpe_ip4_input_node; @@ -174,8 +205,19 @@ format_lisp_gpe_header_with_length (u8 * s, va_list * args); typedef struct { u8 is_add; - u32 table_id; /* vrf */ - u32 vni; /* host byte order */ + union + { + /* vrf */ + u32 table_id; + + /* bridge domain */ + u16 bd_id; + + /* generic access */ + u32 dp_table; + }; + u8 is_l2; + u32 vni; /* host byte order */ } vnet_lisp_gpe_add_del_iface_args_t; u8 @@ -192,29 +234,21 @@ typedef struct clib_error_t * vnet_lisp_gpe_enable_disable (vnet_lisp_gpe_enable_disable_args_t *a); -typedef enum -{ - NO_ACTION, - FORWARD_NATIVE, - SEND_MAP_REQUEST, - DROP -} negative_fwd_actions_e; - typedef struct { u8 is_add; /* type of mapping */ u8 is_negative; - negative_fwd_actions_e action; + u8 action; /* local and remote eids */ - gid_address_t seid; /* TODO convert to ip4, ip6, mac ? */ - gid_address_t deid; + gid_address_t lcl_eid; + gid_address_t rmt_eid; /* local and remote locators (underlay attachment points) */ - ip_address_t slocator; - ip_address_t dlocator; + ip_address_t lcl_loc; + ip_address_t rmt_loc; /* FIB indices to lookup remote locator at encap and inner IP at decap */ u32 encap_fib_index; @@ -225,8 +259,12 @@ typedef struct /* VNI/tenant id in HOST byte order */ u32 vni; - /* vrf where fwd entry should be inserted */ - u32 table_id; + /* vrf or bd where fwd entry should be inserted */ + union + { + u32 table_id; + u16 bd_id; + }; } vnet_lisp_gpe_add_del_fwd_entry_args_t; int @@ -267,4 +305,11 @@ typedef enum lgpe_ip6_lookup_next u8 * format_vnet_lisp_gpe_status (u8 * s, va_list * args); +#define L2_FIB_DEFAULT_HASH_NUM_BUCKETS (64 * 1024) +#define L2_FIB_DEFAULT_HASH_MEMORY_SIZE (32<<20) + +u32 +lisp_l2_fib_lookup (lisp_gpe_main_t * lgm, u16 bd_index, u8 src_mac[8], + u8 dst_mac[8]); + #endif /* included_vnet_lisp_gpe_h */ diff --git a/vpp-api-test/vat/api_format.c b/vpp-api-test/vat/api_format.c index 04f8799917c..90c69a2cd66 100644 --- a/vpp-api-test/vat/api_format.c +++ b/vpp-api-test/vat/api_format.c @@ -11198,8 +11198,8 @@ api_lisp_eid_table_add_del_map (vat_main_t * vam) f64 timeout = ~0; unformat_input_t * input = vam->input; vl_api_lisp_eid_table_add_del_map_t *mp; - u8 is_add = 1, vni_set = 0, vrf_set = 0; - u32 vni, vrf; + u8 is_add = 1, vni_set = 0, vrf_set = 0, bd_index_set = 0; + u32 vni, vrf, bd_index; /* Parse args required to build the message */ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) @@ -11208,13 +11208,15 @@ api_lisp_eid_table_add_del_map (vat_main_t * vam) is_add = 0; else if (unformat(input, "vrf %d", &vrf)) vrf_set = 1; + else if (unformat(input, "bd_index %d", &bd_index)) + bd_index_set = 1; else if (unformat(input, "vni %d", &vni)) vni_set = 1; else break; } - if (!vni_set || !vrf_set) + if (!vni_set || (!vrf_set && !bd_index_set)) { errmsg ("missing arguments!"); return -99; @@ -11224,7 +11226,8 @@ api_lisp_eid_table_add_del_map (vat_main_t * vam) mp->is_add = is_add; mp->vni = htonl (vni); - mp->vrf = htonl (vrf); + mp->dp_table = htonl (vrf); + mp->is_l2 = bd_index_set; /* send */ S; diff --git a/vpp/vpp-api/api.c b/vpp/vpp-api/api.c index 6796b57584f..dcaedac3f86 100644 --- a/vpp/vpp-api/api.c +++ b/vpp/vpp-api/api.c @@ -4949,7 +4949,8 @@ vl_api_lisp_eid_table_add_del_map_t_handler( vl_api_lisp_eid_table_add_del_map_reply_t *rmp; int rv = 0; rv = vnet_lisp_eid_table_map (clib_net_to_host_u32 (mp->vni), - clib_net_to_host_u32 (mp->vrf), mp->is_add); + clib_net_to_host_u32 (mp->dp_table), + mp->is_l2, mp->is_add); REPLY_MACRO(VL_API_LISP_EID_TABLE_ADD_DEL_MAP_REPLY) } @@ -5010,9 +5011,9 @@ vl_api_lisp_gpe_add_del_fwd_entry_t_handler( memset (&a, 0, sizeof(a)); a.is_add = mp->is_add; - a.deid = eid; - a.slocator = slocator; - a.dlocator = dlocator; + a.rmt_eid = eid; + a.lcl_loc = slocator; + a.rmt_loc = dlocator; rv = vnet_lisp_gpe_add_del_fwd_entry (&a, 0); REPLY_MACRO(VL_API_LISP_GPE_ADD_DEL_FWD_ENTRY_REPLY); diff --git a/vpp/vpp-api/vpe.api b/vpp/vpp-api/vpe.api index 2019f3ae972..ebadda8525a 100644 --- a/vpp/vpp-api/vpe.api +++ b/vpp/vpp-api/vpe.api @@ -2495,11 +2495,11 @@ define lisp_add_del_map_request_itr_rlocs_reply { i32 retval; }; -/** \brief map/unmap vni to vrf +/** \brief map/unmap vni/bd_index to vrf @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param is_add - add or delete mapping - @param vni - virtual network id + @param dp_table - virtual network id/bridge domain index @param vrf - vrf */ define lisp_eid_table_add_del_map { @@ -2507,7 +2507,8 @@ define lisp_eid_table_add_del_map { u32 context; u8 is_add; u32 vni; - u32 vrf; + u32 dp_table; + u8 is_l2; }; /** \brief Reply for lisp_eid_table_add_del_map -- cgit 1.2.3-korg