From 02655bd425cc596960b7efc74d1fbad3b3406419 Mon Sep 17 00:00:00 2001 From: Florin Coras Date: Tue, 26 Apr 2016 00:17:24 +0200 Subject: Add lisp-gpe ip6 data-plane support The implementation mimics that of the ip4 data-plane. Therefore, a new lgpe-ip6-lookup lookup node is introduced for ip6 source lookups, a lisp-gpe-ip6-input node for decapsulating ip6 encapsulated packets and the tx function of the lisp-gpe interface is updated to support any mix of v4 and v6 in underlay and overlay. Change-Id: Ib3a6e339b8cd7618a940acf0dd8e61c042fd83dd Signed-off-by: Florin Coras --- vnet/Makefile.am | 1 + vnet/vnet/ip/udp.h | 181 ++++--- vnet/vnet/lisp-cp/control.c | 13 +- vnet/vnet/lisp-cp/lisp_types.c | 12 + vnet/vnet/lisp-cp/lisp_types.h | 13 +- vnet/vnet/lisp-cp/packets.h | 2 +- vnet/vnet/lisp-gpe/decap.c | 146 +++-- vnet/vnet/lisp-gpe/interface.c | 186 +++++-- vnet/vnet/lisp-gpe/ip_forward.c | 1115 +++++++++++++++++++++++++++++++++++++++ vnet/vnet/lisp-gpe/lisp_gpe.c | 666 +++-------------------- vnet/vnet/lisp-gpe/lisp_gpe.h | 99 +++- vpp/api/api.c | 10 +- 12 files changed, 1682 insertions(+), 762 deletions(-) create mode 100644 vnet/vnet/lisp-gpe/ip_forward.c diff --git a/vnet/Makefile.am b/vnet/Makefile.am index 5fdffcaa..d8778eb6 100644 --- a/vnet/Makefile.am +++ b/vnet/Makefile.am @@ -478,6 +478,7 @@ endif libvnet_la_SOURCES += \ vnet/lisp-gpe/lisp_gpe.c \ vnet/lisp-gpe/interface.c \ + vnet/lisp-gpe/ip_forward.c \ vnet/lisp-gpe/decap.c nobase_include_HEADERS += \ diff --git a/vnet/vnet/ip/udp.h b/vnet/vnet/ip/udp.h index 26576a45..151f2a8a 100644 --- a/vnet/vnet/ip/udp.h +++ b/vnet/vnet/ip/udp.h @@ -49,6 +49,7 @@ _ (6633, vpath_3) #define foreach_udp6_dst_port \ _ (547, dhcpv6_to_server) \ _ (546, dhcpv6_to_client) \ +_ (4341, lisp_gpe6) \ _ (4342, lisp_cp6) \ _ (6633, vpath6_3) @@ -113,45 +114,63 @@ void udp_register_dst_port (vlib_main_t * vm, u32 node_index, u8 is_ip4); always_inline void -ip4_udp_encap_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 * ec0, word ec_len) +ip_udp_encap_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 * ec0, word ec_len, + u8 is_ip4) { - ip4_header_t * ip0; - ip_csum_t sum0; - u16 old_l0 = 0; u16 new_l0; udp_header_t * udp0; vlib_buffer_advance (b0, - ec_len); - ip0 = vlib_buffer_get_current(b0); - /* Apply the encap string. */ - clib_memcpy(ip0, ec0, ec_len); - - /* fix the ing outer-IP checksum */ - sum0 = ip0->checksum; - /* old_l0 always 0, see the rewrite setup */ - new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); - - sum0 = ip_csum_update(sum0, old_l0, new_l0, ip4_header_t, - length /* changed member */); - ip0->checksum = ip_csum_fold (sum0); - ip0->length = new_l0; - - /* Fix UDP length */ - udp0 = (udp_header_t *)(ip0+1); - new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) - - sizeof (*ip0)); - - udp0->length = new_l0; + if (is_ip4) + { + ip4_header_t * ip0; + ip_csum_t sum0; + u16 old_l0 = 0; + + ip0 = vlib_buffer_get_current(b0); + + /* Apply the encap string. */ + clib_memcpy(ip0, ec0, ec_len); + + /* fix the ing outer-IP checksum */ + sum0 = ip0->checksum; + /* old_l0 always 0, see the rewrite setup */ + new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); + + sum0 = ip_csum_update(sum0, old_l0, new_l0, ip4_header_t, + length /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + ip0->length = new_l0; + + /* Fix UDP length */ + udp0 = (udp_header_t *)(ip0+1); + new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) + - sizeof (*ip0)); + udp0->length = new_l0; + } + else + { + ip6_header_t * ip0; + ip0 = vlib_buffer_get_current(b0); + + /* Apply the encap string. */ + clib_memcpy(ip0, ec0, ec_len); + + new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) + - sizeof (*ip0)); + ip0->payload_length = new_l0; + + /* Fix UDP length */ + udp0 = (udp_header_t *)(ip0+1); + udp0->length = new_l0; + } } always_inline void -ip4_udp_encap_two (vlib_main_t * vm, vlib_buffer_t * b0, vlib_buffer_t * b1, - u8 * ec0, u8 * ec1, word ec_len) +ip_udp_encap_two (vlib_main_t * vm, vlib_buffer_t * b0, vlib_buffer_t * b1, + u8 * ec0, u8 * ec1, word ec_len, u8 is_v4) { - ip4_header_t * ip0, *ip1; - ip_csum_t sum0, sum1; - u16 old_l0 = 0, old_l1 = 0; u16 new_l0, new_l1; udp_header_t * udp0, *udp1; @@ -160,43 +179,73 @@ ip4_udp_encap_two (vlib_main_t * vm, vlib_buffer_t * b0, vlib_buffer_t * b1, vlib_buffer_advance (b0, -ec_len); vlib_buffer_advance (b1, -ec_len); - ip0 = vlib_buffer_get_current (b0); - ip1 = vlib_buffer_get_current (b1); - - /* Apply the encap string */ - clib_memcpy (ip0, ec0, ec_len); - clib_memcpy (ip1, ec1, ec_len); - - /* fix the ing outer-IP checksum */ - sum0 = ip0->checksum; - sum1 = ip1->checksum; - - /* old_l0 always 0, see the rewrite setup */ - new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); - new_l1 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1)); - - sum0 = ip_csum_update(sum0, old_l0, new_l0, ip4_header_t, - length /* changed member */); - sum1 = ip_csum_update(sum1, old_l1, new_l1, ip4_header_t, - length /* changed member */); - - ip0->checksum = ip_csum_fold (sum0); - ip1->checksum = ip_csum_fold (sum1); - - ip0->length = new_l0; - ip1->length = new_l1; - - /* Fix UDP length */ - udp0 = (udp_header_t *) (ip0 + 1); - udp1 = (udp_header_t *) (ip1 + 1); - - new_l0 = clib_host_to_net_u16 ( - vlib_buffer_length_in_chain (vm, b0) - sizeof(*ip0)); - new_l1 = clib_host_to_net_u16 ( - vlib_buffer_length_in_chain (vm, b1) - sizeof(*ip1)); - udp0->length = new_l0; - udp1->length = new_l1; - return; + if (is_v4) + { + ip4_header_t * ip0, *ip1; + ip_csum_t sum0, sum1; + u16 old_l0 = 0, old_l1 = 0; + + ip0 = vlib_buffer_get_current (b0); + ip1 = vlib_buffer_get_current (b1); + + /* Apply the encap string */ + clib_memcpy (ip0, ec0, ec_len); + clib_memcpy (ip1, ec1, ec_len); + + /* fix the ing outer-IP checksum */ + sum0 = ip0->checksum; + sum1 = ip1->checksum; + + /* old_l0 always 0, see the rewrite setup */ + new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); + new_l1 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1)); + + sum0 = ip_csum_update(sum0, old_l0, new_l0, ip4_header_t, + length /* changed member */); + sum1 = ip_csum_update(sum1, old_l1, new_l1, ip4_header_t, + length /* changed member */); + + ip0->checksum = ip_csum_fold (sum0); + ip1->checksum = ip_csum_fold (sum1); + + ip0->length = new_l0; + ip1->length = new_l1; + + /* Fix UDP length */ + udp0 = (udp_header_t *) (ip0 + 1); + udp1 = (udp_header_t *) (ip1 + 1); + + new_l0 = clib_host_to_net_u16 ( + vlib_buffer_length_in_chain (vm, b0) - sizeof(*ip0)); + new_l1 = clib_host_to_net_u16 ( + vlib_buffer_length_in_chain (vm, b1) - sizeof(*ip1)); + udp0->length = new_l0; + udp1->length = new_l1; + } + else + { + ip6_header_t * ip0, * ip1; + ip0 = vlib_buffer_get_current(b0); + ip1 = vlib_buffer_get_current(b1); + + /* Apply the encap string. */ + clib_memcpy(ip0, ec0, ec_len); + clib_memcpy(ip1, ec1, ec_len); + + new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) + - sizeof (*ip0)); + new_l1 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1) + - sizeof (*ip1)); + ip0->payload_length = new_l0; + ip1->payload_length = new_l1; + + /* Fix UDP length */ + udp0 = (udp_header_t *)(ip0+1); + udp1 = (udp_header_t *)(ip1+1); + + udp0->length = new_l0; + udp1->length = new_l1; + } } #endif /* included_udp_h */ diff --git a/vnet/vnet/lisp-cp/control.c b/vnet/vnet/lisp-cp/control.c index a1d21be7..3eabf368 100644 --- a/vnet/vnet/lisp-cp/control.c +++ b/vnet/vnet/lisp-cp/control.c @@ -151,6 +151,8 @@ vnet_lisp_add_del_local_mapping (vnet_lisp_add_del_mapping_args_t * a, ai->vni = 0; /* default for now, pass vni as parameter */ ai->table_id = table_id[0]; vnet_lisp_gpe_add_del_iface (ai, 0); + + hash_set(lcm->dp_if_refcount_by_vni, 0 /* table_id */, 1); } else { @@ -873,7 +875,7 @@ format_lisp_cp_lookup_trace (u8 * s, va_list * args) lisp_cp_lookup_trace_t * t = va_arg (*args, lisp_cp_lookup_trace_t *); s = format (s, "LISP-CP-LOOKUP: map-resolver: %U destination eid %U", - format_ip4_address, &t->map_resolver_ip, format_gid_address, + format_ip_address, &t->map_resolver_ip, format_gid_address, &t->dst_eid); return s; } @@ -896,10 +898,13 @@ get_local_iface_ip_for_dst (lisp_cp_main_t *lcm, ip_address_t * dst, u32 adj_index; ip_adjacency_t * adj; ip_interface_address_t * ia = 0; - ip_lookup_main_t * lm = &lcm->im4->lookup_main; + ip_lookup_main_t * lm; ip4_address_t * l4 = 0; ip6_address_t * l6 = 0; + lm = ip_addr_version (dst) == IP4 ? + &lcm->im4->lookup_main : &lcm->im6->lookup_main; + adj_index = ip_fib_lookup_with_table (lcm, 0, dst); adj = ip_get_adjacency (lm, adj_index); @@ -1331,6 +1336,7 @@ add_fwd_entry (lisp_cp_main_t* lcm, u32 src_map_index, u32 dst_map_index) u32 i, minp = ~0; locator_t * dl = 0; uword * feip = 0, * tidp; + fwd_entry_t* fe; vnet_lisp_gpe_add_del_fwd_entry_args_t _a, * a = &_a; memset (a, 0, sizeof(*a)); @@ -1407,15 +1413,14 @@ add_fwd_entry (lisp_cp_main_t* lcm, u32 src_map_index, u32 dst_map_index) } a->table_id = tidp[0]; + /* TODO remove */ u8 ipver = ip_prefix_version(&gid_address_ippref(&a->deid)); a->decap_next_index = (ipver == IP4) ? LISP_GPE_INPUT_NEXT_IP4_INPUT : LISP_GPE_INPUT_NEXT_IP6_INPUT; - /* XXX tunnels work only with IP4 now */ vnet_lisp_gpe_add_del_fwd_entry (a, &sw_if_index); /* add tunnel to fwd entry table XXX check return value from DP insertion */ - fwd_entry_t* fe; pool_get (lcm->fwd_entry_pool, fe); fe->dst_loc = a->dlocator; fe->src_loc = a->slocator; diff --git a/vnet/vnet/lisp-cp/lisp_types.c b/vnet/vnet/lisp-cp/lisp_types.c index 6c9cb7d2..1234c7ca 100644 --- a/vnet/vnet/lisp-cp/lisp_types.c +++ b/vnet/vnet/lisp-cp/lisp_types.c @@ -266,6 +266,18 @@ ip_address_cmp (ip_address_t * ip1, ip_address_t * ip2) return res; } +void +ip_address_copy (ip_address_t * dst , ip_address_t * src) +{ + clib_memcpy (dst, src, sizeof (ip_address_t)); +} + +void +ip_address_copy_addr (void * dst , ip_address_t * src) +{ + clib_memcpy (dst, src, ip_address_size(src)); +} + void * ip_prefix_cast (gid_address_t * a) { diff --git a/vnet/vnet/lisp-cp/lisp_types.h b/vnet/vnet/lisp-cp/lisp_types.h index 0e8b38ec..0aba9fb1 100644 --- a/vnet/vnet/lisp-cp/lisp_types.h +++ b/vnet/vnet/lisp-cp/lisp_types.h @@ -25,23 +25,25 @@ typedef enum IP6 } ip_address_type_t; -typedef struct +typedef CLIB_PACKED(struct ip_address { union { ip4_address_t v4; ip6_address_t v6; } ip; - ip_address_type_t version; -} ip_address_t; + u8 version; +}) ip_address_t; int ip_address_cmp (ip_address_t * ip1, ip_address_t * ip2); +void ip_address_copy (ip_address_t * dst , ip_address_t * src); +void ip_address_copy_addr (void * dst , ip_address_t * src); -typedef struct +typedef CLIB_PACKED(struct ip_prefix { ip_address_t addr; u8 len; -} ip_prefix_t; +}) ip_prefix_t; #define ip_addr_addr(_a) (_a)->ip #define ip_addr_v4(_a) (_a)->ip.v4 @@ -113,7 +115,6 @@ u32 gid_address_parse (u8 * offset, gid_address_t *a); #define gid_address_ip(_a) ip_prefix_addr(&gid_address_ippref(_a)) /* 'sub'address functions */ -int ip_address_cmp (ip_address_t * ip1, ip_address_t * ip2); u16 ip_prefix_size_to_write (void * pref); u16 ip_prefix_write (u8 * p, void * pref); u8 ip_prefix_length (void *a); diff --git a/vnet/vnet/lisp-cp/packets.h b/vnet/vnet/lisp-cp/packets.h index a9f9a109..d8966522 100644 --- a/vnet/vnet/lisp-cp/packets.h +++ b/vnet/vnet/lisp-cp/packets.h @@ -48,7 +48,7 @@ always_inline void * vlib_buffer_push_uninit (vlib_buffer_t *b, u8 size) { /* XXX should make sure there's enough space! */ - ASSERT (b->current_data > size); + ASSERT (b->current_data >= size); b->current_data -= size; b->current_length += size; diff --git a/vnet/vnet/lisp-gpe/decap.c b/vnet/vnet/lisp-gpe/decap.c index d0848ab5..807d4dad 100644 --- a/vnet/vnet/lisp-gpe/decap.c +++ b/vnet/vnet/lisp-gpe/decap.c @@ -48,7 +48,7 @@ format_lisp_gpe_rx_trace (u8 * s, va_list * args) } static u32 -next_proto_to_next_index[LISP_GPE_NEXT_PROTOS] ={ +next_proto_to_next_index[LISP_GPE_NEXT_PROTOS] = { LISP_GPE_INPUT_NEXT_DROP, LISP_GPE_INPUT_NEXT_IP4_INPUT, LISP_GPE_INPUT_NEXT_IP6_INPUT, @@ -78,8 +78,8 @@ next_protocol_to_next_index (lisp_gpe_header_t * lgh, u8 * next_header) } static uword -lisp_gpe_input (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_frame_t * from_frame) +lisp_gpe_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame, u8 is_v4) { u32 n_left_from, next_index, * from, * to_next; lisp_gpe_tunnel_key_t last_key; @@ -103,12 +103,12 @@ lisp_gpe_input (vlib_main_t * vm, vlib_node_runtime_t * node, { u32 bi0, bi1; vlib_buffer_t * b0, * b1; - ip4_udp_lisp_gpe_header_t * iul0, * iul1; - u32 next0, next1, error0 = 0, error1 = 0; + ip4_udp_lisp_gpe_header_t * iul4_0, * iul4_1; + ip6_udp_lisp_gpe_header_t * iul6_0, * iul6_1; + lisp_gpe_header_t * lh0, * lh1; + u32 next0, next1, error0, error1; uword * si0, * si1; - next0 = next1 = LISP_GPE_INPUT_NEXT_IP4_INPUT; - /* Prefetch next iteration. */ { vlib_buffer_t * p2, * p3; @@ -136,20 +136,45 @@ lisp_gpe_input (vlib_main_t * vm, vlib_node_runtime_t * node, b1 = vlib_get_buffer (vm, bi1); /* udp leaves current_data pointing at the lisp header */ - vlib_buffer_advance (b0, - IP_UDP_HDR_LEN); - vlib_buffer_advance (b1, - IP_UDP_HDR_LEN); + if (is_v4) + { + vlib_buffer_advance ( + b0, -(word) (sizeof(udp_header_t) + sizeof(ip4_header_t))); + vlib_buffer_advance ( + b1, -(word) (sizeof(udp_header_t) + sizeof(ip4_header_t))); + + iul4_0 = vlib_buffer_get_current (b0); + iul4_1 = vlib_buffer_get_current (b1); - iul0 = vlib_buffer_get_current (b0); - iul1 = vlib_buffer_get_current (b1); + /* pop (ip, udp, lisp-gpe) */ + vlib_buffer_advance (b0, sizeof(*iul4_0)); + vlib_buffer_advance (b1, sizeof(*iul4_1)); - /* pop (ip, udp, lisp-gpe) */ - vlib_buffer_advance (b0, sizeof (*iul0)); - vlib_buffer_advance (b1, sizeof (*iul1)); + lh0 = &iul4_0->lisp; + lh1 = &iul4_1->lisp; + } + else + { + vlib_buffer_advance ( + b0, -(word) (sizeof(udp_header_t) + sizeof(ip6_header_t))); + vlib_buffer_advance ( + b1, -(word) (sizeof(udp_header_t) + sizeof(ip6_header_t))); + + iul6_0 = vlib_buffer_get_current (b0); + iul6_1 = vlib_buffer_get_current (b1); + + /* pop (ip, udp, lisp-gpe) */ + vlib_buffer_advance (b0, sizeof(*iul6_0)); + vlib_buffer_advance (b1, sizeof(*iul6_1)); + + lh0 = &iul6_0->lisp; + lh1 = &iul6_1->lisp; + } /* determine next_index from lisp-gpe header */ - next0 = next_protocol_to_next_index (&iul0->lisp, + next0 = next_protocol_to_next_index (lh0, vlib_buffer_get_current (b0)); - next1 = next_protocol_to_next_index (&iul1->lisp, + next1 = next_protocol_to_next_index (lh1, vlib_buffer_get_current (b1)); /* Required to make the l2 tag push / pop code work on l2 subifs */ @@ -158,8 +183,8 @@ lisp_gpe_input (vlib_main_t * vm, vlib_node_runtime_t * node, /* map iid/vni to lisp-gpe sw_if_index which is used by ipx_input to * decide the rx vrf and the input features to be applied */ - si0 = hash_get(lgm->tunnel_term_sw_if_index_by_vni, iul0->lisp.iid); - si1 = hash_get(lgm->tunnel_term_sw_if_index_by_vni, iul1->lisp.iid); + si0 = hash_get(lgm->tunnel_term_sw_if_index_by_vni, lh0->iid); + si1 = hash_get(lgm->tunnel_term_sw_if_index_by_vni, lh1->iid); if (si0) { @@ -194,7 +219,7 @@ lisp_gpe_input (vlib_main_t * vm, vlib_node_runtime_t * node, sizeof(*tr)); tr->next_index = next0; tr->error = error0; - tr->h = iul0->lisp; + tr->h = lh0[0]; } if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED)) @@ -203,7 +228,7 @@ lisp_gpe_input (vlib_main_t * vm, vlib_node_runtime_t * node, sizeof(*tr)); tr->next_index = next1; tr->error = error1; - tr->h = iul1->lisp; + tr->h = lh1[0]; } vlib_validate_buffer_enqueue_x2(vm, node, next_index, to_next, @@ -216,7 +241,9 @@ lisp_gpe_input (vlib_main_t * vm, vlib_node_runtime_t * node, u32 bi0; vlib_buffer_t * b0; u32 next0; - ip4_udp_lisp_gpe_header_t * iul0; + ip4_udp_lisp_gpe_header_t * iul4_0; + ip6_udp_lisp_gpe_header_t * iul6_0; + lisp_gpe_header_t * lh0; u32 error0; uword * si0; @@ -230,12 +257,30 @@ lisp_gpe_input (vlib_main_t * vm, vlib_node_runtime_t * node, b0 = vlib_get_buffer (vm, bi0); /* udp leaves current_data pointing at the lisp header */ - vlib_buffer_advance (b0, - IP_UDP_HDR_LEN); + if (is_v4) + { + vlib_buffer_advance ( + b0, -(word) (sizeof(udp_header_t) + sizeof(ip4_header_t))); - iul0 = vlib_buffer_get_current (b0); + iul4_0 = vlib_buffer_get_current (b0); - /* pop (ip, udp, lisp-gpe) */ - vlib_buffer_advance (b0, sizeof (*iul0)); + /* pop (ip, udp, lisp-gpe) */ + vlib_buffer_advance (b0, sizeof(*iul4_0)); + + lh0 = &iul4_0->lisp; + } + else + { + vlib_buffer_advance ( + b0, -(word) (sizeof(udp_header_t) + sizeof(ip6_header_t))); + + iul6_0 = vlib_buffer_get_current (b0); + + /* pop (ip, udp, lisp-gpe) */ + vlib_buffer_advance (b0, sizeof(*iul6_0)); + + lh0 = &iul6_0->lisp; + } /* TODO if security is to be implemented, something similar to RPF, * probably we'd like to check that the peer is allowed to send us @@ -244,7 +289,7 @@ lisp_gpe_input (vlib_main_t * vm, vlib_node_runtime_t * node, * the packet is one of its locators */ /* determine next_index from lisp-gpe header */ - next0 = next_protocol_to_next_index (&iul0->lisp, + next0 = next_protocol_to_next_index (lh0, vlib_buffer_get_current (b0)); /* Required to make the l2 tag push / pop code work on l2 subifs */ @@ -252,7 +297,7 @@ lisp_gpe_input (vlib_main_t * vm, vlib_node_runtime_t * node, /* map iid/vni to lisp-gpe sw_if_index which is used by ipx_input to * decide the rx vrf and the input features to be applied */ - si0 = hash_get(lgm->tunnel_term_sw_if_index_by_vni, iul0->lisp.iid); + si0 = hash_get(lgm->tunnel_term_sw_if_index_by_vni, lh0->iid); if (si0) { @@ -275,7 +320,7 @@ lisp_gpe_input (vlib_main_t * vm, vlib_node_runtime_t * node, sizeof(*tr)); tr->next_index = next0; tr->error = error0; - tr->h = iul0->lisp; + tr->h = lh0[0]; } vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, @@ -284,12 +329,26 @@ lisp_gpe_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_put_next_frame (vm, node, next_index, n_left_to_next); } - vlib_node_increment_counter (vm, lisp_gpe_input_node.index, + vlib_node_increment_counter (vm, lisp_gpe_ip4_input_node.index, LISP_GPE_ERROR_DECAPSULATED, pkts_decapsulated); return from_frame->n_vectors; } +static uword +lisp_gpe_ip4_input (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return lisp_gpe_input_inline(vm, node, from_frame, 1); +} + +static uword +lisp_gpe_ip6_input (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return lisp_gpe_input_inline(vm, node, from_frame, 0); +} + static char * lisp_gpe_error_strings[] = { #define lisp_gpe_error(n,s) s, #include @@ -297,9 +356,30 @@ static char * lisp_gpe_error_strings[] = { #undef _ }; -VLIB_REGISTER_NODE (lisp_gpe_input_node) = { - .function = lisp_gpe_input, - .name = "lisp-gpe-input", +VLIB_REGISTER_NODE (lisp_gpe_ip4_input_node) = { + .function = lisp_gpe_ip4_input, + .name = "lisp-gpe-ip4-input", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .n_errors = LISP_GPE_N_ERROR, + .error_strings = lisp_gpe_error_strings, + + .n_next_nodes = LISP_GPE_INPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [LISP_GPE_INPUT_NEXT_##s] = n, + foreach_lisp_gpe_ip_input_next +#undef _ + }, + + .format_buffer = format_lisp_gpe_header_with_length, + .format_trace = format_lisp_gpe_rx_trace, + // $$$$ .unformat_buffer = unformat_lisp_gpe_header, +}; + +VLIB_REGISTER_NODE (lisp_gpe_ip6_input_node) = { + .function = lisp_gpe_ip6_input, + .name = "lisp-gpe-ip6-input", /* Takes a vector of packets. */ .vector_size = sizeof (u32), @@ -309,7 +389,7 @@ VLIB_REGISTER_NODE (lisp_gpe_input_node) = { .n_next_nodes = LISP_GPE_INPUT_N_NEXT, .next_nodes = { #define _(s,n) [LISP_GPE_INPUT_NEXT_##s] = n, - foreach_lisp_gpe_input_next + foreach_lisp_gpe_ip_input_next #undef _ }, diff --git a/vnet/vnet/lisp-gpe/interface.c b/vnet/vnet/lisp-gpe/interface.c index 0f7f6fca..58a9072b 100644 --- a/vnet/vnet/lisp-gpe/interface.c +++ b/vnet/vnet/lisp-gpe/interface.c @@ -23,7 +23,8 @@ #define foreach_lisp_gpe_tx_next \ _(DROP, "error-drop") \ - _(IP4_LOOKUP, "ip4-lookup") + _(IP4_LOOKUP, "ip4-lookup") \ + _(IP6_LOOKUP, "ip6-lookup") typedef enum { @@ -49,6 +50,104 @@ format_lisp_gpe_tx_trace (u8 * s, va_list * args) return s; } +always_inline void +get_one_tunnel_inline (lisp_gpe_main_t * lgm, vlib_buffer_t * b0, + lisp_gpe_tunnel_t ** t0, u8 is_v4) +{ + u32 adj_index0, tunnel_index0; + ip_adjacency_t * adj0; + + /* Get adjacency and from it the tunnel_index */ + adj_index0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX]; + + if (is_v4) + adj0 = ip_get_adjacency (lgm->lm4, adj_index0); + else + adj0 = ip_get_adjacency (lgm->lm6, adj_index0); + + tunnel_index0 = adj0->rewrite_header.node_index; + t0[0] = pool_elt_at_index(lgm->tunnels, tunnel_index0); + + ASSERT(t0[0] != 0); +} + +always_inline void +encap_one_inline (lisp_gpe_main_t * lgm, vlib_buffer_t * b0, + lisp_gpe_tunnel_t * t0, u32 * next0, u8 is_v4) +{ + ASSERT(sizeof(ip4_udp_lisp_gpe_header_t) == 36); + ASSERT(sizeof(ip6_udp_lisp_gpe_header_t) == 56); + + if (is_v4) + { + ip_udp_encap_one (lgm->vlib_main, b0, t0->rewrite, 36, 1); + next0[0] = LISP_GPE_TX_NEXT_IP4_LOOKUP; + + } + else + { + ip_udp_encap_one (lgm->vlib_main, b0, t0->rewrite, 56, 0); + next0[0] = LISP_GPE_TX_NEXT_IP6_LOOKUP; + } +} + +always_inline void +get_two_tunnels_inline (lisp_gpe_main_t * lgm, vlib_buffer_t * b0, + vlib_buffer_t * b1, lisp_gpe_tunnel_t ** t0, + lisp_gpe_tunnel_t ** t1, u8 is_v4) +{ + u32 adj_index0, adj_index1, tunnel_index0, tunnel_index1; + ip_adjacency_t * adj0, * adj1; + + /* Get adjacency and from it the tunnel_index */ + adj_index0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX]; + adj_index1 = vnet_buffer(b1)->ip.adj_index[VLIB_TX]; + + if (is_v4) + { + adj0 = ip_get_adjacency (lgm->lm4, adj_index0); + adj1 = ip_get_adjacency (lgm->lm4, adj_index1); + } + else + { + adj0 = ip_get_adjacency (lgm->lm6, adj_index0); + adj1 = ip_get_adjacency (lgm->lm6, adj_index1); + } + + tunnel_index0 = adj0->rewrite_header.node_index; + tunnel_index1 = adj1->rewrite_header.node_index; + + t0[0] = pool_elt_at_index(lgm->tunnels, tunnel_index0); + t1[0] = pool_elt_at_index(lgm->tunnels, tunnel_index1); + + ASSERT(t0[0] != 0); + ASSERT(t1[0] != 0); +} + +always_inline void +encap_two_inline (lisp_gpe_main_t * lgm, vlib_buffer_t * b0, vlib_buffer_t * b1, + lisp_gpe_tunnel_t * t0, lisp_gpe_tunnel_t * t1, u32 * next0, + u32 * next1, u8 is_v4) +{ + ASSERT(sizeof(ip4_udp_lisp_gpe_header_t) == 36); + ASSERT(sizeof(ip6_udp_lisp_gpe_header_t) == 56); + + if (is_v4) + { + ip_udp_encap_one (lgm->vlib_main, b0, t0->rewrite, 36, 1); + ip_udp_encap_one (lgm->vlib_main, b1, t1->rewrite, 36, 1); + next0[0] = next1[0] = LISP_GPE_TX_NEXT_IP4_LOOKUP; + } + else + { + ip_udp_encap_one (lgm->vlib_main, b0, t0->rewrite, 56, 0); + ip_udp_encap_one (lgm->vlib_main, b1, t1->rewrite, 56, 0); + next0[0] = next1[0] = LISP_GPE_TX_NEXT_IP6_LOOKUP; + } +} + +#define is_v4_packet(_h) ((*(u8*) _h) & 0xF0) == 0x40 + static uword lisp_gpe_interface_tx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * from_frame) @@ -74,9 +173,8 @@ lisp_gpe_interface_tx (vlib_main_t * vm, vlib_node_runtime_t * node, u32 bi0, bi1; vlib_buffer_t * b0, * b1; u32 next0, next1; - u32 adj_index0, adj_index1, tunnel_index0, tunnel_index1; - ip_adjacency_t * adj0, * adj1; - lisp_gpe_tunnel_t * t0, * t1; + lisp_gpe_tunnel_t * t0 = 0, * t1 = 0; + u8 is_v4_eid0, is_v4_eid1; next0 = next1 = LISP_GPE_TX_NEXT_IP4_LOOKUP; @@ -106,24 +204,33 @@ lisp_gpe_interface_tx (vlib_main_t * vm, vlib_node_runtime_t * node, b0 = vlib_get_buffer (vm, bi0); b1 = vlib_get_buffer (vm, bi1); - /* Get adjacency and from it the tunnel_index */ - adj_index0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX]; - adj_index1 = vnet_buffer(b1)->ip.adj_index[VLIB_TX]; + is_v4_eid0 = is_v4_packet(vlib_buffer_get_current (b0)); + is_v4_eid1 = is_v4_packet(vlib_buffer_get_current (b1)); - adj0 = ip_get_adjacency (lgm->lookup_main, adj_index0); - adj1 = ip_get_adjacency (lgm->lookup_main, adj_index1); - - tunnel_index0 = adj0->rewrite_header.node_index; - tunnel_index1 = adj1->rewrite_header.node_index; - - t0 = pool_elt_at_index (lgm->tunnels, tunnel_index0); - t1 = pool_elt_at_index (lgm->tunnels, tunnel_index1); - - ASSERT(t0 != 0); - ASSERT(t1 != 0); + if (PREDICT_TRUE(is_v4_eid0 == is_v4_eid1)) + { + get_two_tunnels_inline (lgm, b0, b1, &t0, &t1, + is_v4_eid0 ? 1 : 0); + } + else + { + get_one_tunnel_inline (lgm, b0, &t0, is_v4_eid0 ? 1 : 0); + get_one_tunnel_inline (lgm, b1, &t1, is_v4_eid1 ? 1 : 0); + } - ASSERT (sizeof(ip4_udp_lisp_gpe_header_t) == 36); - ip4_udp_encap_two (vm, b0, b1, t0->rewrite, t1->rewrite, 36); + if (PREDICT_TRUE( + ip_addr_version(&t0->dst) == ip_addr_version(&t1->dst))) + { + encap_two_inline (lgm, b0, b1, t0, t1, &next0, &next1, + ip_addr_version(&t0->dst) == IP4 ? 1 : 0); + } + else + { + encap_one_inline (lgm, b0, t0, &next0, + ip_addr_version(&t0->dst) == IP4 ? 1 : 0); + encap_one_inline (lgm, b1, t1, &next1, + ip_addr_version(&t1->dst) == IP4 ? 1 : 0); + } /* Reset to look up tunnel partner in the configured FIB */ vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->encap_fib_index; @@ -152,10 +259,9 @@ lisp_gpe_interface_tx (vlib_main_t * vm, vlib_node_runtime_t * node, while (n_left_from > 0 && n_left_to_next > 0) { vlib_buffer_t * b0; - u32 bi0, adj_index0, tunnel_index0; - u32 next0 = LISP_GPE_TX_NEXT_IP4_LOOKUP; + u32 bi0, next0 = LISP_GPE_TX_NEXT_IP4_LOOKUP; lisp_gpe_tunnel_t * t0 = 0; - ip_adjacency_t * adj0; + u8 is_v4_0; bi0 = from[0]; to_next[0] = bi0; @@ -166,17 +272,11 @@ lisp_gpe_interface_tx (vlib_main_t * vm, vlib_node_runtime_t * node, b0 = vlib_get_buffer (vm, bi0); - /* Get adjacency and from it the tunnel_index */ - adj_index0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX]; - adj0 = ip_get_adjacency (lgm->lookup_main, adj_index0); - - tunnel_index0 = adj0->rewrite_header.node_index; - t0 = pool_elt_at_index (lgm->tunnels, tunnel_index0); - - ASSERT(t0 != 0); + is_v4_0 = is_v4_packet(vlib_buffer_get_current (b0)); + get_one_tunnel_inline (lgm, b0, &t0, is_v4_0 ? 1 : 0); - ASSERT (sizeof(ip4_udp_lisp_gpe_header_t) == 36); - ip4_udp_encap_one (vm, b0, t0->rewrite, 36); + encap_one_inline (lgm, b0, t0, &next0, + ip_addr_version(&t0->dst) == IP4 ? 1 : 0); /* Reset to look up tunnel partner in the configured FIB */ vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->encap_fib_index; @@ -335,7 +435,8 @@ add_del_lisp_gpe_default_route (u32 table_id, u8 is_v4, u8 is_add) adj.n_adj = 1; adj.explicit_fib_index = ~0; - adj.lookup_next_index = lgm->ip4_lookup_next_lgpe_ip4_lookup; + adj.lookup_next_index = is_v4 ? lgm->ip4_lookup_next_lgpe_ip4_lookup : + lgm->ip6_lookup_next_lgpe_ip6_lookup; /* default route has tunnel_index ~0 */ adj.rewrite_header.sw_if_index = ~0; @@ -385,7 +486,7 @@ vnet_lisp_gpe_add_del_iface (vnet_lisp_gpe_add_del_iface_args_t * a, lisp_gpe_main_t * lgm = &lisp_gpe_main; vnet_main_t * vnm = lgm->vnet_main; vnet_hw_interface_t * hi; - u32 hw_if_index = ~0, lookup_next_index, flen; + u32 hw_if_index = ~0, lookup_next_index4, lookup_next_index6, flen; uword * hip, * vni; hip = hash_get(lgm->lisp_gpe_hw_if_index_by_table_id, a->table_id); @@ -421,12 +522,17 @@ vnet_lisp_gpe_add_del_iface (vnet_lisp_gpe_add_del_iface_args_t * a, hash_set(lgm->tunnel_term_sw_if_index_by_vni, a->vni, hi->sw_if_index); hash_set(lgm->vni_by_tunnel_term_sw_if_index, hi->sw_if_index, a->vni); - /* set ingress arc from lgpe_ip4_lookup */ - lookup_next_index = vlib_node_add_next (lgm->vlib_main, - lgpe_ip4_lookup_node.index, - hi->output_node_index); + /* set ingress arc from lgpe_ipX_lookup */ + lookup_next_index4 = vlib_node_add_next (lgm->vlib_main, + lgpe_ip4_lookup_node.index, + hi->output_node_index); + lookup_next_index6 = vlib_node_add_next (lgm->vlib_main, + lgpe_ip6_lookup_node.index, + hi->output_node_index); hash_set(lgm->lgpe_ip4_lookup_next_index_by_table_id, a->table_id, - lookup_next_index); + lookup_next_index4); + hash_set(lgm->lgpe_ip6_lookup_next_index_by_table_id, a->table_id, + lookup_next_index6); /* insert default routes that point to lgpe-ipx-lookup */ add_del_lisp_gpe_default_route (a->table_id, /* is_v4 */1, 1); diff --git a/vnet/vnet/lisp-gpe/ip_forward.c b/vnet/vnet/lisp-gpe/ip_forward.c new file mode 100644 index 00000000..83c52e34 --- /dev/null +++ b/vnet/vnet/lisp-gpe/ip_forward.c @@ -0,0 +1,1115 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +/* avoids calling route callbacks for src fib */ +static void +ip4_sd_fib_set_adj_index (lisp_gpe_main_t * lgm, ip4_fib_t * fib, u32 flags, + u32 dst_address_u32, u32 dst_address_length, + u32 adj_index) +{ + ip_lookup_main_t * lm = lgm->lm4; + uword * hash; + + if (vec_bytes(fib->old_hash_values)) + memset (fib->old_hash_values, ~0, vec_bytes (fib->old_hash_values)); + if (vec_bytes(fib->new_hash_values)) + memset (fib->new_hash_values, ~0, vec_bytes (fib->new_hash_values)); + fib->new_hash_values[0] = adj_index; + + /* Make sure adj index is valid. */ + if (CLIB_DEBUG > 0) + (void) ip_get_adjacency (lm, adj_index); + + hash = fib->adj_index_by_dst_address[dst_address_length]; + + hash = _hash_set3 (hash, dst_address_u32, + fib->new_hash_values, + fib->old_hash_values); + + fib->adj_index_by_dst_address[dst_address_length] = hash; +} + +/* copied from ip4_forward since it's static */ +static void +ip4_fib_init_adj_index_by_dst_address (ip_lookup_main_t * lm, + ip4_fib_t * fib, + u32 address_length) +{ + hash_t * h; + uword max_index; + + ASSERT (lm->fib_result_n_bytes >= sizeof (uword)); + lm->fib_result_n_words = round_pow2 (lm->fib_result_n_bytes, sizeof(uword)) + / sizeof(uword); + + fib->adj_index_by_dst_address[address_length] = + hash_create (32 /* elts */, lm->fib_result_n_words * sizeof (uword)); + + hash_set_flags (fib->adj_index_by_dst_address[address_length], + HASH_FLAG_NO_AUTO_SHRINK); + + h = hash_header (fib->adj_index_by_dst_address[address_length]); + max_index = (hash_value_bytes (h) / sizeof (fib->new_hash_values[0])) - 1; + + /* Initialize new/old hash value vectors. */ + vec_validate_init_empty (fib->new_hash_values, max_index, ~0); + vec_validate_init_empty (fib->old_hash_values, max_index, ~0); +} + +static void +ip4_sd_fib_add_del_src_route (lisp_gpe_main_t * lgm, + ip4_add_del_route_args_t * a) +{ + ip_lookup_main_t * lm = lgm->lm4; + ip4_fib_t * fib; + u32 dst_address, dst_address_length, adj_index, old_adj_index; + uword * hash, is_del; + + /* Either create new adjacency or use given one depending on arguments. */ + if (a->n_add_adj > 0) + ip_add_adjacency (lm, a->add_adj, a->n_add_adj, &adj_index); + else + adj_index = a->adj_index; + + dst_address = a->dst_address.data_u32; + dst_address_length = a->dst_address_length; + + fib = pool_elt_at_index(lgm->ip4_src_fibs, a->table_index_or_table_id); + + if (! fib->adj_index_by_dst_address[dst_address_length]) + ip4_fib_init_adj_index_by_dst_address (lm, fib, dst_address_length); + + hash = fib->adj_index_by_dst_address[dst_address_length]; + + is_del = (a->flags & IP4_ROUTE_FLAG_DEL) != 0; + + if (is_del) + { + fib->old_hash_values[0] = ~0; + hash = _hash_unset (hash, dst_address, fib->old_hash_values); + fib->adj_index_by_dst_address[dst_address_length] = hash; + } + else + ip4_sd_fib_set_adj_index (lgm, fib, a->flags, dst_address, + dst_address_length, adj_index); + + old_adj_index = fib->old_hash_values[0]; + + ip4_fib_mtrie_add_del_route (fib, a->dst_address, dst_address_length, + is_del ? old_adj_index : adj_index, + is_del); + + /* Delete old adjacency index if present and changed. */ + if (! (a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY) + && old_adj_index != ~0 + && old_adj_index != adj_index) + ip_del_adjacency (lm, old_adj_index); +} + +static void * +ip4_sd_get_src_route (lisp_gpe_main_t * lgm, u32 src_fib_index, + ip4_address_t * src, u32 address_length) +{ + ip4_fib_t * fib = pool_elt_at_index (lgm->ip4_src_fibs, src_fib_index); + uword * hash, * p; + + hash = fib->adj_index_by_dst_address[address_length]; + p = hash_get (hash, src->as_u32); + return (void *) p; +} + +typedef CLIB_PACKED (struct ip4_route { + ip4_address_t address; + u32 address_length : 6; + u32 index : 26; +}) ip4_route_t; + +void +ip4_sd_fib_clear_src_fib (lisp_gpe_main_t * lgm, ip4_fib_t * fib) +{ + ip4_route_t * routes = 0, * r; + u32 i; + + vec_reset_length (routes); + + for (i = 0; i < ARRAY_LEN (fib->adj_index_by_dst_address); i++) { + uword * hash = fib->adj_index_by_dst_address[i]; + hash_pair_t * p; + ip4_route_t x; + + x.address_length = i; + + hash_foreach_pair (p, hash, + ({ + x.address.data_u32 = p->key; + vec_add1 (routes, x); + })); + } + + vec_foreach (r, routes) { + ip4_add_del_route_args_t a; + + memset (&a, 0, sizeof (a)); + a.flags = IP4_ROUTE_FLAG_FIB_INDEX | IP4_ROUTE_FLAG_DEL; + a.table_index_or_table_id = fib - lgm->ip4_src_fibs; + a.dst_address = r->address; + a.dst_address_length = r->address_length; + a.adj_index = ~0; + + ip4_sd_fib_add_del_src_route (lgm, &a); + } +} + +static int +ip4_sd_fib_add_del_route (lisp_gpe_main_t * lgm, ip_prefix_t * dst_prefix, + ip_prefix_t * src_prefix, u32 table_id, + ip_adjacency_t * add_adj, u8 is_add) +{ + uword * p; + ip4_add_del_route_args_t a; + ip_adjacency_t * dst_adjp, dst_adj; + ip4_address_t dst = ip_prefix_v4(dst_prefix), src; + u32 dst_address_length = ip_prefix_len(dst_prefix), src_address_length = 0; + ip4_fib_t * src_fib; + + if (src_prefix) + { + src = ip_prefix_v4(src_prefix); + src_address_length = ip_prefix_len(src_prefix); + } + else + memset(&src, 0, sizeof(src)); + + /* lookup dst adj */ + p = ip4_get_route (lgm->im4, table_id, 0, dst.as_u8, dst_address_length); + + if (is_add) + { + /* insert dst prefix to ip4 fib, if it's not in yet */ + if (p == 0) + { + /* dst adj should point to lisp gpe lookup */ + dst_adj = add_adj[0]; + dst_adj.lookup_next_index = lgm->ip4_lookup_next_lgpe_ip4_lookup; + + memset(&a, 0, sizeof(a)); + a.flags = IP4_ROUTE_FLAG_TABLE_ID; + a.table_index_or_table_id = table_id; /* vrf */ + a.adj_index = ~0; + a.dst_address_length = dst_address_length; + a.dst_address = dst; + a.flags |= IP4_ROUTE_FLAG_ADD; + a.add_adj = &dst_adj; + a.n_add_adj = 1; + + ip4_add_del_route (lgm->im4, &a); + + /* lookup dst adj to obtain the adj index */ + p = ip4_get_route (lgm->im4, table_id, 0, dst.as_u8, + dst_address_length); + if (p == 0) + { + clib_warning("Failed to insert dst route for eid %U!", + format_ip4_address_and_length, dst.as_u8, + dst_address_length); + return -1; + } + + /* allocate and init src ip4 fib */ + pool_get(lgm->ip4_src_fibs, src_fib); + ip4_mtrie_init (&src_fib->mtrie); + + /* reuse rewrite header to store pointer to src fib */ + dst_adjp = ip_get_adjacency (lgm->lm4, p[0]); + dst_adjp->rewrite_header.sw_if_index = src_fib - lgm->ip4_src_fibs; + } + } + else + { + if (p == 0) + { + clib_warning("Trying to delete inexistent dst route for %U. Aborting", + format_ip4_address_and_length, dst.as_u8, + dst_address_length); + return -1; + } + } + + dst_adjp = ip_get_adjacency (lgm->lm4, p[0]); + + /* add/del src prefix to src fib */ + memset(&a, 0, sizeof(a)); + a.flags = IP4_ROUTE_FLAG_TABLE_ID; + a.table_index_or_table_id = dst_adjp->rewrite_header.sw_if_index; + a.adj_index = ~0; + a.flags |= is_add ? IP4_ROUTE_FLAG_ADD : IP4_ROUTE_FLAG_DEL; + a.add_adj = add_adj; + a.n_add_adj = 1; + /* if src prefix is null, add 0/0 */ + a.dst_address_length = src_address_length; + a.dst_address = src; + ip4_sd_fib_add_del_src_route (lgm, &a); + + /* if a delete, check if there are elements left in the src fib */ + if (!is_add) + { + src_fib = pool_elt_at_index(lgm->ip4_src_fibs, + dst_adjp->rewrite_header.sw_if_index); + if (!src_fib) + return 0; + + /* if there's nothing left */ + if (ARRAY_LEN(src_fib->adj_index_by_dst_address) == 0) + { + /* remove the src fib .. */ + pool_put(lgm->ip4_src_fibs, src_fib); + + /* .. and remove dst route */ + memset(&a, 0, sizeof(a)); + a.flags = IP4_ROUTE_FLAG_TABLE_ID; + a.table_index_or_table_id = table_id; /* vrf */ + a.adj_index = ~0; + a.dst_address_length = dst_address_length; + a.dst_address = dst; + a.flags |= IP4_ROUTE_FLAG_DEL; + + ip4_add_del_route (lgm->im4, &a); + } + } + + return 0; +} + +static void * +ip4_sd_fib_get_route (lisp_gpe_main_t * lgm, ip_prefix_t * dst_prefix, + ip_prefix_t * src_prefix, u32 table_id) +{ + uword * p; + ip4_address_t dst = ip_prefix_v4(dst_prefix), src; + u32 dst_address_length = ip_prefix_len(dst_prefix), src_address_length = 0; + ip_adjacency_t * dst_adj; + + if (src_prefix) + { + src = ip_prefix_v4(src_prefix); + src_address_length = ip_prefix_len(src_prefix); + } + else + memset(&src, 0, sizeof(src)); + + /* lookup dst adj */ + p = ip4_get_route (lgm->im4, table_id, 0, dst.as_u8, dst_address_length); + if (p == 0) + return p; + + dst_adj = ip_get_adjacency (lgm->lm4, p[0]); + return ip4_sd_get_src_route (lgm, dst_adj->rewrite_header.sw_if_index, &src, + src_address_length); +} + +static u32 +ip6_sd_get_src_route (lisp_gpe_main_t * lgm, u32 src_fib_index, + ip6_address_t * src, u32 address_length) +{ + int i, len; + int rv; + BVT(clib_bihash_kv) kv, value; + ip6_src_fib_t * fib = pool_elt_at_index (lgm->ip6_src_fibs, src_fib_index); + + len = vec_len (fib->prefix_lengths_in_search_order); + + for (i = 0; i < len; i++) + { + int dst_address_length = fib->prefix_lengths_in_search_order[i]; + ip6_address_t * mask; + + ASSERT(dst_address_length >= 0 && dst_address_length <= 128); + + mask = &fib->fib_masks[dst_address_length]; + + kv.key[0] = src->as_u64[0] & mask->as_u64[0]; + kv.key[1] = src->as_u64[1] & mask->as_u64[1]; + kv.key[2] = dst_address_length; + + rv = BV(clib_bihash_search_inline_2)(&fib->ip6_lookup_table, &kv, &value); + if (rv == 0) + return value.value; + } + + return 0; +} + +static void +compute_prefix_lengths_in_search_order (ip6_src_fib_t * fib) +{ + int i; + vec_reset_length(fib->prefix_lengths_in_search_order); + /* Note: bitmap reversed so this is in fact a longest prefix match */ + clib_bitmap_foreach(i, fib->non_empty_dst_address_length_bitmap, ({ + int dst_address_length = 128 - i; + vec_add1 (fib->prefix_lengths_in_search_order, dst_address_length); + })); +} + +/* Rewrite of ip6_add_del_route() because it uses im6 to find the fib */ +static void +ip6_sd_fib_add_del_src_route (lisp_gpe_main_t * lgm, + ip6_add_del_route_args_t * a) +{ + ip_lookup_main_t * lm = lgm->lm6; + ip6_src_fib_t * fib; + ip6_address_t dst_address; + u32 dst_address_length, adj_index; + uword is_del; + u32 old_adj_index = ~0; + BVT(clib_bihash_kv) kv, value; + + vlib_smp_unsafe_warning(); + + is_del = (a->flags & IP6_ROUTE_FLAG_DEL) != 0; + + /* Either create new adjacency or use given one depending on arguments. */ + if (a->n_add_adj > 0) + { + ip_add_adjacency (lm, a->add_adj, a->n_add_adj, &adj_index); + } + else + adj_index = a->adj_index; + + dst_address = a->dst_address; + dst_address_length = a->dst_address_length; + fib = pool_elt_at_index(lgm->ip6_src_fibs, a->table_index_or_table_id); + + ASSERT (dst_address_length < ARRAY_LEN (fib->fib_masks)); + ip6_address_mask (&dst_address, &fib->fib_masks[dst_address_length]); + + /* refcount accounting */ + if (is_del) + { + ASSERT(fib->dst_address_length_refcounts[dst_address_length] > 0); + if (--fib->dst_address_length_refcounts[dst_address_length] == 0) + { + fib->non_empty_dst_address_length_bitmap = clib_bitmap_set ( + fib->non_empty_dst_address_length_bitmap, + 128 - dst_address_length, 0); + compute_prefix_lengths_in_search_order (fib); + } + } + else + { + fib->dst_address_length_refcounts[dst_address_length]++; + + fib->non_empty_dst_address_length_bitmap = + clib_bitmap_set (fib->non_empty_dst_address_length_bitmap, + 128 - dst_address_length, 1); + compute_prefix_lengths_in_search_order (fib); + } + + kv.key[0] = dst_address.as_u64[0]; + kv.key[1] = dst_address.as_u64[1]; + kv.key[2] = dst_address_length; + + if (BV(clib_bihash_search)(&fib->ip6_lookup_table, &kv, &value) == 0) + old_adj_index = value.value; + + if (is_del) + BV(clib_bihash_add_del) (&fib->ip6_lookup_table, &kv, 0 /* is_add */); + else + { + /* Make sure adj index is valid. */ + if (CLIB_DEBUG > 0) + (void) ip_get_adjacency (lm, adj_index); + + kv.value = adj_index; + + BV(clib_bihash_add_del) (&fib->ip6_lookup_table, &kv, 1 /* is_add */); + } + + /* Avoid spurious reference count increments */ + if (old_adj_index == adj_index + && !(a->flags & IP6_ROUTE_FLAG_KEEP_OLD_ADJACENCY)) + { + ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index); + if (adj->share_count > 0) + adj->share_count --; + } + + /* Delete old adjacency index if present and changed. */ + { + if (! (a->flags & IP6_ROUTE_FLAG_KEEP_OLD_ADJACENCY) + && old_adj_index != ~0 + && old_adj_index != adj_index) + ip_del_adjacency (lm, old_adj_index); + } +} + +static void +ip6_src_fib_init (ip6_src_fib_t * fib) +{ + uword i; + + for (i = 0; i < ARRAY_LEN (fib->fib_masks); i++) + { + u32 j, i0, i1; + + i0 = i / 32; + i1 = i % 32; + + for (j = 0; j < i0; j++) + fib->fib_masks[i].as_u32[j] = ~0; + + if (i1) + fib->fib_masks[i].as_u32[i0] = clib_host_to_net_u32 ( + pow2_mask (i1) << (32 - i1)); + } + + if (fib->lookup_table_nbuckets == 0) + fib->lookup_table_nbuckets = IP6_FIB_DEFAULT_HASH_NUM_BUCKETS; + + fib->lookup_table_nbuckets = 1 << max_log2 (fib->lookup_table_nbuckets); + + if (fib->lookup_table_size == 0) + fib->lookup_table_size = IP6_FIB_DEFAULT_HASH_MEMORY_SIZE; + + BV(clib_bihash_init) (&fib->ip6_lookup_table, "ip6 lookup table", + fib->lookup_table_nbuckets, + fib->lookup_table_size); + +} + +static int +ip6_sd_fib_add_del_route (lisp_gpe_main_t * lgm, ip_prefix_t * dst_prefix, + ip_prefix_t * src_prefix, u32 table_id, + ip_adjacency_t * add_adj, u8 is_add) +{ + u32 adj_index; + ip6_add_del_route_args_t a; + ip_adjacency_t * dst_adjp, dst_adj; + ip6_address_t dst = ip_prefix_v6(dst_prefix), src; + u32 dst_address_length = ip_prefix_len(dst_prefix), src_address_length = 0; + ip6_src_fib_t * src_fib; + + if (src_prefix) + { + src = ip_prefix_v6(src_prefix); + src_address_length = ip_prefix_len(src_prefix); + } + else + memset(&src, 0, sizeof(src)); + + /* lookup dst adj and create it if it doesn't exist */ + adj_index = ip6_get_route (lgm->im6, table_id, 0, &dst, dst_address_length); + + if (is_add) + { + /* insert dst prefix to ip6 fib, if it's not in yet */ + if (adj_index == 0) + { + /* dst adj should point to lisp gpe ip lookup */ + dst_adj = add_adj[0]; + dst_adj.lookup_next_index = lgm->ip6_lookup_next_lgpe_ip6_lookup; + + memset(&a, 0, sizeof(a)); + a.flags = IP6_ROUTE_FLAG_TABLE_ID; + a.table_index_or_table_id = table_id; /* vrf */ + a.adj_index = ~0; + a.dst_address_length = dst_address_length; + a.dst_address = dst; + a.flags |= IP6_ROUTE_FLAG_ADD; + a.add_adj = &dst_adj; + a.n_add_adj = 1; + + ip6_add_del_route (lgm->im6, &a); + + /* lookup dst adj to obtain the adj index */ + adj_index = ip6_get_route (lgm->im6, table_id, 0, &dst, + dst_address_length); + + ASSERT(adj_index != 0); + + /* allocate and init src ip6 fib */ + pool_get(lgm->ip6_src_fibs, src_fib); + memset(src_fib, 0, sizeof(src_fib[0])); + ip6_src_fib_init (src_fib); + + /* reuse rewrite header to store pointer to src fib */ + dst_adjp = ip_get_adjacency (lgm->lm6, adj_index); + dst_adjp->rewrite_header.sw_if_index = src_fib - lgm->ip6_src_fibs; + } + } + else + { + if (adj_index == 0) + { + clib_warning("Trying to delete inexistent dst route for %U. Aborting", + format_ip6_address_and_length, dst.as_u8, + dst_address_length); + return -1; + } + } + + dst_adjp = ip_get_adjacency (lgm->lm6, adj_index); + + /* add/del src prefix to src fib */ + memset(&a, 0, sizeof(a)); + a.flags = IP6_ROUTE_FLAG_TABLE_ID; + a.table_index_or_table_id = dst_adjp->rewrite_header.sw_if_index; + a.adj_index = ~0; + a.flags |= is_add ? IP6_ROUTE_FLAG_ADD : IP6_ROUTE_FLAG_DEL; + a.add_adj = add_adj; + a.n_add_adj = 1; + /* if src prefix is null, add ::0 */ + a.dst_address_length = src_address_length; + a.dst_address = src; + ip6_sd_fib_add_del_src_route (lgm, &a); + + /* if a delete, check if there are elements left in the src fib */ + if (!is_add) + { + src_fib = pool_elt_at_index(lgm->ip6_src_fibs, + dst_adjp->rewrite_header.sw_if_index); + if (!src_fib) + return 0; + + /* if there's nothing left */ + if (clib_bitmap_count_set_bits ( + src_fib->non_empty_dst_address_length_bitmap) == 0) + { + /* remove src fib .. */ + pool_put(lgm->ip6_src_fibs, src_fib); + + /* .. and remove dst route */ + memset(&a, 0, sizeof(a)); + a.flags = IP6_ROUTE_FLAG_TABLE_ID; + a.table_index_or_table_id = table_id; /* vrf */ + a.adj_index = ~0; + a.dst_address_length = dst_address_length; + a.dst_address = dst; + a.flags |= IP6_ROUTE_FLAG_DEL; + + ip6_add_del_route (lgm->im6, &a); + } + } + + return 0; +} + +static u32 +ip6_sd_fib_get_route (lisp_gpe_main_t * lgm, ip_prefix_t * dst_prefix, + ip_prefix_t * src_prefix, u32 table_id) +{ + u32 adj_index; + ip6_address_t dst = ip_prefix_v6(dst_prefix), src; + u32 dst_address_length = ip_prefix_len(dst_prefix), src_address_length = 0; + ip_adjacency_t * dst_adj; + + if (src_prefix) + { + src = ip_prefix_v6(src_prefix); + src_address_length = ip_prefix_len(src_prefix); + } + else + memset(&src, 0, sizeof(src)); + + /* lookup dst adj */ + adj_index = ip6_get_route (lgm->im6, table_id, 0, &dst, dst_address_length); + if (adj_index == 0) + return adj_index; + + dst_adj = ip_get_adjacency (lgm->lm6, adj_index); + return ip6_sd_get_src_route (lgm, dst_adj->rewrite_header.sw_if_index, &src, + src_address_length); +} + +int +ip_sd_fib_add_del_route (lisp_gpe_main_t * lgm, ip_prefix_t * dst_prefix, + ip_prefix_t * src_prefix, u32 table_id, + ip_adjacency_t * add_adj, u8 is_add) +{ + return ( + ip_prefix_version(dst_prefix) == IP4 ? + ip4_sd_fib_add_del_route : ip6_sd_fib_add_del_route) (lgm, dst_prefix, + src_prefix, + table_id, add_adj, + is_add); +} + +u32 +ip_sd_fib_get_route (lisp_gpe_main_t * lgm, ip_prefix_t * dst_prefix, + ip_prefix_t * src_prefix, u32 table_id) +{ + if (ip_prefix_version(dst_prefix) == IP4) + { + u32 * adj_index = ip4_sd_fib_get_route (lgm, dst_prefix, src_prefix, + table_id); + return (adj_index == 0) ? 0 : adj_index[0]; + } + else + return ip6_sd_fib_get_route (lgm, dst_prefix, src_prefix, table_id); +} + +always_inline void +ip4_src_fib_lookup_one (lisp_gpe_main_t * lgm, u32 src_fib_index0, + ip4_address_t * addr0, u32 * src_adj_index0) +{ + ip4_fib_mtrie_leaf_t leaf0, leaf1; + ip4_fib_mtrie_t * mtrie0; + + mtrie0 = &vec_elt_at_index(lgm->ip4_src_fibs, src_fib_index0)->mtrie; + + leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT; + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 0); + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 1); + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 2); + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 3); + + /* Handle default route. */ + leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0); + src_adj_index0[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf0); +} + +always_inline void +ip4_src_fib_lookup_two (lisp_gpe_main_t * lgm, u32 src_fib_index0, + u32 src_fib_index1, ip4_address_t * addr0, + ip4_address_t * addr1, u32 * src_adj_index0, + u32 * src_adj_index1) +{ + ip4_fib_mtrie_leaf_t leaf0, leaf1; + ip4_fib_mtrie_t * mtrie0, * mtrie1; + + mtrie0 = &vec_elt_at_index(lgm->ip4_src_fibs, src_fib_index0)->mtrie; + mtrie1 = &vec_elt_at_index(lgm->ip4_src_fibs, src_fib_index1)->mtrie; + + leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT; + + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 0); + leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 0); + + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 1); + leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 1); + + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 2); + leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 2); + + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 3); + leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 3); + + /* Handle default route. */ + leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0); + leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1); + src_adj_index0[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf0); + src_adj_index1[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf1); +} + +always_inline uword +lgpe_ip4_lookup (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, next_index, * from, * to_next; + lisp_gpe_main_t * lgm = &lisp_gpe_main; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + ip4_header_t * ip0, * ip1; + u32 dst_adj_index0, src_adj_index0, src_fib_index0, dst_adj_index1, + src_adj_index1, src_fib_index1; + ip_adjacency_t * dst_adj0, * src_adj0, * dst_adj1, * src_adj1; + u32 next0, next1; + + next0 = next1 = LGPE_IP4_LOOKUP_NEXT_LISP_CP_LOOKUP; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD); + } + + bi0 = from[0]; + bi1 = from[1]; + to_next[0] = bi0; + to_next[1] = bi1; + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + ip0 = vlib_buffer_get_current (b0); + ip1 = vlib_buffer_get_current (b1); + + /* dst lookup was done by ip4 lookup */ + dst_adj_index0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX]; + dst_adj_index1 = vnet_buffer(b1)->ip.adj_index[VLIB_TX]; + + dst_adj0 = ip_get_adjacency (lgm->lm4, dst_adj_index0); + dst_adj1 = ip_get_adjacency (lgm->lm4, dst_adj_index1); + + src_fib_index0 = dst_adj0->rewrite_header.sw_if_index; + src_fib_index1 = dst_adj1->rewrite_header.sw_if_index; + + /* if default route not hit in ip4 lookup */ + if (PREDICT_TRUE(src_fib_index0 != (u32) ~0 + && src_fib_index1 != (u32) ~0)) + { + ip4_src_fib_lookup_two (lgm, src_fib_index0, src_fib_index1, + &ip0->src_address, &ip1->src_address, + &src_adj_index0, &src_adj_index1); + + vnet_buffer(b0)->ip.adj_index[VLIB_TX] = src_adj_index0; + vnet_buffer(b1)->ip.adj_index[VLIB_TX] = src_adj_index1; + + src_adj0 = ip_get_adjacency (lgm->lm4, src_adj_index0); + src_adj1 = ip_get_adjacency (lgm->lm4, src_adj_index1); + + next0 = src_adj0->lookup_next_index; + next1 = src_adj1->lookup_next_index; + + /* prepare buffer for lisp-gpe output node */ + vnet_buffer (b0)->sw_if_index[VLIB_TX] = + src_adj0->rewrite_header.sw_if_index; + vnet_buffer (b1)->sw_if_index[VLIB_TX] = + src_adj1->rewrite_header.sw_if_index; + } + else + { + if (src_fib_index0 != (u32) ~0) + { + ip4_src_fib_lookup_one (lgm, src_fib_index0, + &ip0->src_address, &src_adj_index0); + vnet_buffer(b0)->ip.adj_index[VLIB_TX] = src_adj_index0; + src_adj0 = ip_get_adjacency (lgm->lm4, src_adj_index0); + next0 = src_adj0->lookup_next_index; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = + src_adj0->rewrite_header.sw_if_index; + } + if (src_fib_index1 != (u32) ~0) + { + ip4_src_fib_lookup_one (lgm, src_fib_index1, + &ip1->src_address, &src_adj_index1); + vnet_buffer(b1)->ip.adj_index[VLIB_TX] = src_adj_index1; + src_adj1 = ip_get_adjacency (lgm->lm4, src_adj_index1); + next1 = src_adj1->lookup_next_index; + vnet_buffer (b1)->sw_if_index[VLIB_TX] = + src_adj1->rewrite_header.sw_if_index; + } + } + + vlib_validate_buffer_enqueue_x2(vm, node, next_index, to_next, + n_left_to_next, bi0, bi1, next0, + next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t * b0; + ip4_header_t * ip0; + u32 bi0, dst_adj_index0, src_adj_index0, src_fib_index0; + u32 next0 = LGPE_IP4_LOOKUP_NEXT_LISP_CP_LOOKUP; + ip_adjacency_t * dst_adj0, * src_adj0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + ip0 = vlib_buffer_get_current (b0); + + /* dst lookup was done by ip4 lookup */ + dst_adj_index0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX]; + dst_adj0 = ip_get_adjacency (lgm->lm4, dst_adj_index0); + src_fib_index0 = dst_adj0->rewrite_header.sw_if_index; + + /* if default route not hit in ip4 lookup */ + if (PREDICT_TRUE(src_fib_index0 != (u32 ) ~0)) + { + /* do src lookup */ + ip4_src_fib_lookup_one (lgm, src_fib_index0, &ip0->src_address, + &src_adj_index0); + vnet_buffer(b0)->ip.adj_index[VLIB_TX] = src_adj_index0; + src_adj0 = ip_get_adjacency (lgm->lm4, src_adj_index0); + next0 = src_adj0->lookup_next_index; + + /* prepare packet for lisp-gpe output node */ + vnet_buffer (b0)->sw_if_index[VLIB_TX] = + src_adj0->rewrite_header.sw_if_index; + } + + vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return from_frame->n_vectors; +} + +VLIB_REGISTER_NODE (lgpe_ip4_lookup_node) = { + .function = lgpe_ip4_lookup, + .name = "lgpe-ip4-lookup", + .vector_size = sizeof (u32), + + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_next_nodes = LGPE_IP4_LOOKUP_N_NEXT, + .next_nodes = { +#define _(sym,str) [LGPE_IP4_LOOKUP_NEXT_##sym] = str, + foreach_lgpe_ip4_lookup_next +#undef _ + }, +}; + +static u32 +ip6_src_fib_lookup (lisp_gpe_main_t * lgm, u32 src_fib_index, + ip6_address_t * src) +{ + int i, len; + int rv; + BVT(clib_bihash_kv) kv, value; + ip6_src_fib_t * fib = pool_elt_at_index (lgm->ip6_src_fibs, src_fib_index); + + len = vec_len (fib->prefix_lengths_in_search_order); + + for (i = 0; i < len; i++) + { + int dst_address_length = fib->prefix_lengths_in_search_order[i]; + ip6_address_t * mask; + + ASSERT(dst_address_length >= 0 && dst_address_length <= 128); + + mask = &fib->fib_masks[dst_address_length]; + + kv.key[0] = src->as_u64[0] & mask->as_u64[0]; + kv.key[1] = src->as_u64[1] & mask->as_u64[1]; + kv.key[2] = dst_address_length; + + rv = BV(clib_bihash_search_inline_2)(&fib->ip6_lookup_table, &kv, &value); + if (rv == 0) + return value.value; + } + + return 0; +} + +always_inline uword +lgpe_ip6_lookup (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, next_index, * from, * to_next; + lisp_gpe_main_t * lgm = &lisp_gpe_main; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + ip6_header_t * ip0, * ip1; + u32 dst_adj_index0, src_adj_index0, src_fib_index0, dst_adj_index1, + src_adj_index1, src_fib_index1; + ip_adjacency_t * dst_adj0, * src_adj0, * dst_adj1, * src_adj1; + u32 next0, next1; + + next0 = next1 = LGPE_IP6_LOOKUP_NEXT_LISP_CP_LOOKUP; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD); + } + + bi0 = from[0]; + bi1 = from[1]; + to_next[0] = bi0; + to_next[1] = bi1; + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + ip0 = vlib_buffer_get_current (b0); + ip1 = vlib_buffer_get_current (b1); + + /* dst lookup was done by ip6 lookup */ + dst_adj_index0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX]; + dst_adj_index1 = vnet_buffer(b1)->ip.adj_index[VLIB_TX]; + + dst_adj0 = ip_get_adjacency (lgm->lm6, dst_adj_index0); + dst_adj1 = ip_get_adjacency (lgm->lm6, dst_adj_index1); + + src_fib_index0 = dst_adj0->rewrite_header.sw_if_index; + src_fib_index1 = dst_adj1->rewrite_header.sw_if_index; + + /* if default route not hit in ip6 lookup */ + if (PREDICT_TRUE(src_fib_index0 != (u32) ~0 + && src_fib_index1 != (u32) ~0)) + { + /* do src lookup */ + src_adj_index0 = ip6_src_fib_lookup (lgm, src_fib_index0, + &ip0->src_address); + src_adj_index1 = ip6_src_fib_lookup (lgm, src_fib_index1, + &ip1->src_address); + + vnet_buffer(b0)->ip.adj_index[VLIB_TX] = src_adj_index0; + vnet_buffer(b1)->ip.adj_index[VLIB_TX] = src_adj_index1; + + src_adj0 = ip_get_adjacency (lgm->lm6, src_adj_index0); + src_adj1 = ip_get_adjacency (lgm->lm6, src_adj_index1); + + next0 = src_adj0->lookup_next_index; + next1 = src_adj1->lookup_next_index; + + /* prepare buffer for lisp-gpe output node */ + vnet_buffer (b0)->sw_if_index[VLIB_TX] = + src_adj0->rewrite_header.sw_if_index; + vnet_buffer (b1)->sw_if_index[VLIB_TX] = + src_adj1->rewrite_header.sw_if_index; + } + else + { + if (src_fib_index0 != (u32) ~0) + { + src_adj_index0 = ip6_src_fib_lookup (lgm, src_fib_index0, + &ip0->src_address); + vnet_buffer(b0)->ip.adj_index[VLIB_TX] = src_adj_index0; + src_adj0 = ip_get_adjacency (lgm->lm6, src_adj_index0); + next0 = src_adj0->lookup_next_index; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = + src_adj0->rewrite_header.sw_if_index; + } + if (src_fib_index1 != (u32) ~0) + { + src_adj_index1 = ip6_src_fib_lookup (lgm, src_fib_index1, + &ip1->src_address); + vnet_buffer(b1)->ip.adj_index[VLIB_TX] = src_adj_index1; + src_adj1 = ip_get_adjacency (lgm->lm6, src_adj_index1); + next1 = src_adj1->lookup_next_index; + vnet_buffer (b1)->sw_if_index[VLIB_TX] = + src_adj1->rewrite_header.sw_if_index; + } + } + + vlib_validate_buffer_enqueue_x2(vm, node, next_index, to_next, + n_left_to_next, bi0, bi1, next0, + next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t * b0; + ip6_header_t * ip0; + u32 bi0, dst_adj_index0, src_adj_index0, src_fib_index0; + u32 next0 = LGPE_IP6_LOOKUP_NEXT_LISP_CP_LOOKUP; + ip_adjacency_t * dst_adj0, * src_adj0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + ip0 = vlib_buffer_get_current (b0); + + /* dst lookup was done by ip6 lookup */ + dst_adj_index0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX]; + dst_adj0 = ip_get_adjacency (lgm->lm6, dst_adj_index0); + src_fib_index0 = dst_adj0->rewrite_header.sw_if_index; + + /* if default route not hit in ip6 lookup */ + if (PREDICT_TRUE(src_fib_index0 != (u32 ) ~0)) + { + /* do src lookup */ + src_adj_index0 = ip6_src_fib_lookup (lgm, src_fib_index0, + &ip0->src_address); + + vnet_buffer(b0)->ip.adj_index[VLIB_TX] = src_adj_index0; + src_adj0 = ip_get_adjacency (lgm->lm6, src_adj_index0); + next0 = src_adj0->lookup_next_index; + + /* prepare packet for lisp-gpe output node */ + vnet_buffer (b0)->sw_if_index[VLIB_TX] = + src_adj0->rewrite_header.sw_if_index; + } + + vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return from_frame->n_vectors; +} + +VLIB_REGISTER_NODE (lgpe_ip6_lookup_node) = { + .function = lgpe_ip6_lookup, + .name = "lgpe-ip6-lookup", + .vector_size = sizeof (u32), + + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_next_nodes = LGPE_IP6_LOOKUP_N_NEXT, + .next_nodes = { +#define _(sym,str) [LGPE_IP6_LOOKUP_NEXT_##sym] = str, + foreach_lgpe_ip6_lookup_next +#undef _ + }, +}; diff --git a/vnet/vnet/lisp-gpe/lisp_gpe.c b/vnet/vnet/lisp-gpe/lisp_gpe.c index a1d4b6fb..129bfc4c 100644 --- a/vnet/vnet/lisp-gpe/lisp_gpe.c +++ b/vnet/vnet/lisp-gpe/lisp_gpe.c @@ -17,585 +17,69 @@ lisp_gpe_main_t lisp_gpe_main; -/* avoids calling route callbacks for src fib */ -static void -ip4_sd_fib_set_adj_index (lisp_gpe_main_t * lgm, ip4_fib_t * fib, u32 flags, - u32 dst_address_u32, u32 dst_address_length, - u32 adj_index) -{ - ip_lookup_main_t * lm = lgm->lookup_main; - uword * hash; - - if (vec_bytes(fib->old_hash_values)) - memset (fib->old_hash_values, ~0, vec_bytes (fib->old_hash_values)); - if (vec_bytes(fib->new_hash_values)) - memset (fib->new_hash_values, ~0, vec_bytes (fib->new_hash_values)); - fib->new_hash_values[0] = adj_index; - - /* Make sure adj index is valid. */ - if (CLIB_DEBUG > 0) - (void) ip_get_adjacency (lm, adj_index); - - hash = fib->adj_index_by_dst_address[dst_address_length]; - - hash = _hash_set3 (hash, dst_address_u32, - fib->new_hash_values, - fib->old_hash_values); - - fib->adj_index_by_dst_address[dst_address_length] = hash; -} - -/* copied from ip4_forward since it's static */ -static void -ip4_fib_init_adj_index_by_dst_address (ip_lookup_main_t * lm, - ip4_fib_t * fib, - u32 address_length) -{ - hash_t * h; - uword max_index; - - ASSERT (lm->fib_result_n_bytes >= sizeof (uword)); - lm->fib_result_n_words = round_pow2 (lm->fib_result_n_bytes, sizeof (uword)) / sizeof (uword); - - fib->adj_index_by_dst_address[address_length] = - hash_create (32 /* elts */, lm->fib_result_n_words * sizeof (uword)); - - hash_set_flags (fib->adj_index_by_dst_address[address_length], - HASH_FLAG_NO_AUTO_SHRINK); - - h = hash_header (fib->adj_index_by_dst_address[address_length]); - max_index = (hash_value_bytes (h) / sizeof (fib->new_hash_values[0])) - 1; - - /* Initialize new/old hash value vectors. */ - vec_validate_init_empty (fib->new_hash_values, max_index, ~0); - vec_validate_init_empty (fib->old_hash_values, max_index, ~0); -} - -void -ip4_sd_fib_add_del_src_route (lisp_gpe_main_t * lgm, - ip4_add_del_route_args_t * a) +static int +lisp_gpe_rewrite (lisp_gpe_tunnel_t * t) { - ip_lookup_main_t * lm = lgm->lookup_main; - ip4_fib_t * fib; - u32 dst_address, dst_address_length, adj_index, old_adj_index; - uword * hash, is_del; - - /* Either create new adjacency or use given one depending on arguments. */ - if (a->n_add_adj > 0) - ip_add_adjacency (lm, a->add_adj, a->n_add_adj, &adj_index); - else - adj_index = a->adj_index; - - dst_address = a->dst_address.data_u32; - dst_address_length = a->dst_address_length; - - fib = pool_elt_at_index(lgm->src_fibs, a->table_index_or_table_id); - - if (! fib->adj_index_by_dst_address[dst_address_length]) - ip4_fib_init_adj_index_by_dst_address (lm, fib, dst_address_length); - - hash = fib->adj_index_by_dst_address[dst_address_length]; - - is_del = (a->flags & IP4_ROUTE_FLAG_DEL) != 0; + u8 *rw = 0; + lisp_gpe_header_t * lisp0; + int len; - if (is_del) + if (ip_addr_version(&t->src) == IP4) { - fib->old_hash_values[0] = ~0; - hash = _hash_unset (hash, dst_address, fib->old_hash_values); - fib->adj_index_by_dst_address[dst_address_length] = hash; - } - else - ip4_sd_fib_set_adj_index (lgm, fib, a->flags, dst_address, - dst_address_length, adj_index); + ip4_header_t * ip0; + ip4_udp_lisp_gpe_header_t * h0; + len = sizeof(*h0); - old_adj_index = fib->old_hash_values[0]; + vec_validate_aligned(rw, len - 1, CLIB_CACHE_LINE_BYTES); - ip4_fib_mtrie_add_del_route (fib, a->dst_address, dst_address_length, - is_del ? old_adj_index : adj_index, - is_del); + h0 = (ip4_udp_lisp_gpe_header_t *) rw; - /* Delete old adjacency index if present and changed. */ - if (! (a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY) - && old_adj_index != ~0 - && old_adj_index != adj_index) - ip_del_adjacency (lm, old_adj_index); -} - -void * -ip4_sd_get_src_route (lisp_gpe_main_t * lgm, u32 src_fib_index, - ip4_address_t * src, u32 address_length) -{ - ip4_fib_t * fib = pool_elt_at_index (lgm->src_fibs, src_fib_index); - uword * hash, * p; + /* Fixed portion of the (outer) ip4 header */ + ip0 = &h0->ip4; + ip0->ip_version_and_header_length = 0x45; + ip0->ttl = 254; + ip0->protocol = IP_PROTOCOL_UDP; - hash = fib->adj_index_by_dst_address[address_length]; - p = hash_get (hash, src->as_u32); - return (void *) p; -} + /* we fix up the ip4 header length and checksum after-the-fact */ + ip_address_copy_addr(&ip0->src_address, &t->src); + ip_address_copy_addr(&ip0->dst_address, &t->dst); + ip0->checksum = ip4_header_checksum (ip0); -typedef CLIB_PACKED (struct { - ip4_address_t address; - u32 address_length : 6; - u32 index : 26; -}) ip4_route_t; + /* UDP header, randomize src port on something, maybe? */ + h0->udp.src_port = clib_host_to_net_u16 (4341); + h0->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_lisp_gpe); -static void -ip4_sd_fib_clear_src_fib (lisp_gpe_main_t * lgm, ip4_fib_t * fib) -{ - ip4_route_t * routes = 0, * r; - u32 i; - - vec_reset_length (routes); - - for (i = 0; i < ARRAY_LEN (fib->adj_index_by_dst_address); i++) { - uword * hash = fib->adj_index_by_dst_address[i]; - hash_pair_t * p; - ip4_route_t x; - - x.address_length = i; - - hash_foreach_pair (p, hash, - ({ - x.address.data_u32 = p->key; - vec_add1 (routes, x); - })); - } - - vec_foreach (r, routes) { - ip4_add_del_route_args_t a; - - memset (&a, 0, sizeof (a)); - a.flags = IP4_ROUTE_FLAG_FIB_INDEX | IP4_ROUTE_FLAG_DEL; - a.table_index_or_table_id = fib - lgm->src_fibs; - a.dst_address = r->address; - a.dst_address_length = r->address_length; - a.adj_index = ~0; - - ip4_sd_fib_add_del_src_route (lgm, &a); - } -} - -int -ip4_sd_fib_add_del_route (lisp_gpe_main_t * lgm, ip_prefix_t * dst_prefix, - ip_prefix_t * src_prefix, u32 table_id, - ip_adjacency_t * add_adj, u8 is_add) -{ - uword * p; - ip4_add_del_route_args_t a; - ip_adjacency_t * dst_adjp, dst_adj; - ip4_address_t dst = ip_prefix_v4(dst_prefix), src; - u32 dst_address_length = ip_prefix_len(dst_prefix), src_address_length = 0; - ip4_fib_t * src_fib; - - if (src_prefix) - { - src = ip_prefix_v4(src_prefix); - src_address_length = ip_prefix_len(src_prefix); - } - else - memset(&src, 0, sizeof(src)); - - /* lookup dst adj */ - p = ip4_get_route (lgm->im4, table_id, 0, dst.as_u8, dst_address_length); - - if (is_add) - { - /* insert dst prefix to ip4 fib, if it's not in yet */ - if (p == 0) - { - /* dst adj should point to lisp gpe lookup */ - dst_adj = add_adj[0]; - dst_adj.lookup_next_index = lgm->ip4_lookup_next_lgpe_ip4_lookup; - - memset(&a, 0, sizeof(a)); - a.flags = IP4_ROUTE_FLAG_TABLE_ID; - a.table_index_or_table_id = table_id; /* vrf */ - a.adj_index = ~0; - a.dst_address_length = dst_address_length; - a.dst_address = dst; - a.flags |= IP4_ROUTE_FLAG_ADD; - a.add_adj = &dst_adj; - a.n_add_adj = 1; - - ip4_add_del_route (lgm->im4, &a); - - /* lookup dst adj to obtain the adj index */ - p = ip4_get_route (lgm->im4, table_id, 0, dst.as_u8, - dst_address_length); - if (p == 0) - { - clib_warning("Failed to insert dst route for eid %U!", - format_ip4_address_and_length, dst.as_u8, - dst_address_length); - return -1; - } - - /* allocate and init src ip4 fib */ - pool_get(lgm->src_fibs, src_fib); - ip4_mtrie_init (&src_fib->mtrie); - - /* reuse rewrite header to store pointer to src fib */ - dst_adjp = ip_get_adjacency (lgm->lookup_main, p[0]); - dst_adjp->rewrite_header.sw_if_index = src_fib - lgm->src_fibs; - } + /* LISP-gpe header */ + lisp0 = &h0->lisp; } else { - if (p == 0) - { - clib_warning("Trying to delete inexistent dst route for %U. Aborting", - format_ip4_address_and_length, dst.as_u8, - dst_address_length); - return -1; - } - } - - dst_adjp = ip_get_adjacency (lgm->lookup_main, p[0]); - - /* add/del src prefix to src fib */ - memset(&a, 0, sizeof(a)); - a.flags = IP4_ROUTE_FLAG_TABLE_ID; - a.table_index_or_table_id = dst_adjp->rewrite_header.sw_if_index; - a.adj_index = ~0; - a.flags |= is_add ? IP4_ROUTE_FLAG_ADD : IP4_ROUTE_FLAG_DEL; - a.add_adj = add_adj; - a.n_add_adj = 1; - /* if src prefix is null, add 0/0 */ - a.dst_address_length = src_address_length; - a.dst_address = src; - ip4_sd_fib_add_del_src_route (lgm, &a); - - /* if a delete, check if there are elements left in the src fib */ - if (!is_add) - { - src_fib = pool_elt_at_index(lgm->src_fibs, - dst_adjp->rewrite_header.sw_if_index); - if (!src_fib) - return 0; - - /* if there's nothing left, clear src fib .. */ - if (ARRAY_LEN(src_fib->adj_index_by_dst_address) == 0) - { - ip4_sd_fib_clear_src_fib (lgm, src_fib); - pool_put(lgm->src_fibs, src_fib); - } - - /* .. and remove dst route */ - memset(&a, 0, sizeof(a)); - a.flags = IP4_ROUTE_FLAG_TABLE_ID; - a.table_index_or_table_id = table_id; /* vrf */ - a.adj_index = ~0; - a.dst_address_length = dst_address_length; - a.dst_address = dst; - a.flags |= IP4_ROUTE_FLAG_DEL; - - ip4_add_del_route (lgm->im4, &a); - } - - return 0; -} - -static void * -ip4_sd_fib_get_route (lisp_gpe_main_t * lgm, ip_prefix_t * dst_prefix, - ip_prefix_t * src_prefix, u32 table_id) -{ - uword * p; - ip4_address_t dst = ip_prefix_v4(dst_prefix), src; - u32 dst_address_length = ip_prefix_len(dst_prefix), src_address_length = 0; - ip_adjacency_t * dst_adj; - - if (src_prefix) - { - src = ip_prefix_v4(src_prefix); - src_address_length = ip_prefix_len(src_prefix); - } - else - memset(&src, 0, sizeof(src)); + ip6_header_t * ip0; + ip6_udp_lisp_gpe_header_t * h0; + len = sizeof(*h0); - /* lookup dst adj */ - p = ip4_get_route (lgm->im4, table_id, 0, dst.as_u8, dst_address_length); - if (p == 0) - return p; + vec_validate_aligned(rw, len - 1, CLIB_CACHE_LINE_BYTES); - dst_adj = ip_get_adjacency (lgm->lookup_main, p[0]); - return ip4_sd_get_src_route (lgm, dst_adj->rewrite_header.sw_if_index, &src, - src_address_length); -} + h0 = (ip6_udp_lisp_gpe_header_t *) rw; -typedef enum -{ - LGPE_IP4_LOOKUP_NEXT_DROP, - LGPE_IP4_LOOKUP_NEXT_LISP_CP_LOOKUP, - LGPE_IP4_LOOKUP_N_NEXT, -} lgpe_ip4_lookup_next_t; - -always_inline void -ip4_src_fib_lookup_one (lisp_gpe_main_t * lgm, u32 src_fib_index0, - ip4_address_t * addr0, u32 * src_adj_index0) -{ - ip4_fib_mtrie_leaf_t leaf0, leaf1; - ip4_fib_mtrie_t * mtrie0; + /* Fixed portion of the (outer) ip6 header */ + ip0 = &h0->ip6; + ip0->ip_version_traffic_class_and_flow_label = + clib_host_to_net_u32 (0x6 << 28); + ip0->hop_limit = 254; + ip0->protocol = IP_PROTOCOL_UDP; - mtrie0 = &vec_elt_at_index(lgm->src_fibs, src_fib_index0)->mtrie; - - leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT; - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 0); - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 1); - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 2); - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 3); - - /* Handle default route. */ - leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0); - src_adj_index0[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf0); -} - -always_inline void -ip4_src_fib_lookup_two (lisp_gpe_main_t * lgm, u32 src_fib_index0, - u32 src_fib_index1, ip4_address_t * addr0, - ip4_address_t * addr1, u32 * src_adj_index0, - u32 * src_adj_index1) -{ - ip4_fib_mtrie_leaf_t leaf0, leaf1; - ip4_fib_mtrie_t * mtrie0, * mtrie1; - - mtrie0 = &vec_elt_at_index(lgm->src_fibs, src_fib_index0)->mtrie; - mtrie1 = &vec_elt_at_index(lgm->src_fibs, src_fib_index1)->mtrie; - - leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT; - - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 0); - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 0); - - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 1); - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 1); - - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 2); - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 2); - - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 3); - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 3); - - /* Handle default route. */ - leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0); - leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1); - src_adj_index0[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf0); - src_adj_index1[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf1); -} - -always_inline uword -lgpe_ip4_lookup (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_frame_t * from_frame) -{ - u32 n_left_from, next_index, * from, * to_next; - lisp_gpe_main_t * lgm = &lisp_gpe_main; + /* we fix up the ip6 header length after-the-fact */ + ip_address_copy_addr(&ip0->src_address, &t->src); + ip_address_copy_addr(&ip0->dst_address, &t->dst); - from = vlib_frame_vector_args (from_frame); - n_left_from = from_frame->n_vectors; + /* UDP header, randomize src port on something, maybe? */ + h0->udp.src_port = clib_host_to_net_u16 (4341); + h0->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_lisp_gpe); - next_index = node->cached_next_index; - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); - - while (n_left_from >= 4 && n_left_to_next >= 2) - { - u32 bi0, bi1; - vlib_buffer_t * b0, * b1; - ip4_header_t * ip0, * ip1; - u32 dst_adj_index0, src_adj_index0, src_fib_index0, dst_adj_index1, - src_adj_index1, src_fib_index1; - ip_adjacency_t * dst_adj0, * src_adj0, * dst_adj1, * src_adj1; - u32 next0, next1; - - next0 = next1 = LGPE_IP4_LOOKUP_NEXT_LISP_CP_LOOKUP; - - /* Prefetch next iteration. */ - { - vlib_buffer_t * p2, * p3; - - p2 = vlib_get_buffer (vm, from[2]); - p3 = vlib_get_buffer (vm, from[3]); - - vlib_prefetch_buffer_header (p2, LOAD); - vlib_prefetch_buffer_header (p3, LOAD); - - CLIB_PREFETCH (p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD); - CLIB_PREFETCH (p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD); - } - - bi0 = from[0]; - bi1 = from[1]; - to_next[0] = bi0; - to_next[1] = bi1; - from += 2; - to_next += 2; - n_left_to_next -= 2; - n_left_from -= 2; - - b0 = vlib_get_buffer (vm, bi0); - b1 = vlib_get_buffer (vm, bi1); - - ip0 = vlib_buffer_get_current (b0); - ip1 = vlib_buffer_get_current (b1); - - /* dst lookup was done by ip4 lookup */ - dst_adj_index0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX]; - dst_adj_index1 = vnet_buffer(b1)->ip.adj_index[VLIB_TX]; - - dst_adj0 = ip_get_adjacency (lgm->lookup_main, dst_adj_index0); - dst_adj1 = ip_get_adjacency (lgm->lookup_main, dst_adj_index1); - - src_fib_index0 = dst_adj0->rewrite_header.sw_if_index; - src_fib_index1 = dst_adj1->rewrite_header.sw_if_index; - - /* if default route not hit in ip4 lookup */ - if (PREDICT_TRUE(src_fib_index0 != (u32) ~0 - && src_fib_index1 != (u32) ~0)) - { - ip4_src_fib_lookup_two (lgm, src_fib_index0, src_fib_index1, - &ip0->src_address, &ip1->src_address, - &src_adj_index0, &src_adj_index1); - - vnet_buffer(b0)->ip.adj_index[VLIB_TX] = src_adj_index0; - vnet_buffer(b1)->ip.adj_index[VLIB_TX] = src_adj_index1; - - src_adj0 = ip_get_adjacency (lgm->lookup_main, src_adj_index0); - src_adj1 = ip_get_adjacency (lgm->lookup_main, src_adj_index1); - - next0 = src_adj0->lookup_next_index; - next1 = src_adj1->lookup_next_index; - - /* prepare buffer for lisp-gpe output node */ - vnet_buffer (b0)->sw_if_index[VLIB_TX] = - src_adj0->rewrite_header.sw_if_index; - vnet_buffer (b1)->sw_if_index[VLIB_TX] = - src_adj1->rewrite_header.sw_if_index; - } - else - { - if (src_fib_index0 != (u32) ~0) - { - ip4_src_fib_lookup_one (lgm, src_fib_index0, - &ip0->src_address, &src_adj_index0); - vnet_buffer(b0)->ip.adj_index[VLIB_TX] = src_adj_index0; - src_adj0 = ip_get_adjacency (lgm->lookup_main, - src_adj_index0); - next0 = src_adj0->lookup_next_index; - vnet_buffer (b0)->sw_if_index[VLIB_TX] = src_adj_index0; - } - if (src_fib_index1 != (u32) ~0) - { - ip4_src_fib_lookup_one (lgm, src_fib_index1, - &ip1->src_address, &src_adj_index1); - vnet_buffer(b1)->ip.adj_index[VLIB_TX] = src_adj_index1; - src_adj1 = ip_get_adjacency (lgm->lookup_main, - src_adj_index1); - next1 = src_adj1->lookup_next_index; - vnet_buffer (b1)->sw_if_index[VLIB_TX] = src_adj_index1; - } - } - - vlib_validate_buffer_enqueue_x2(vm, node, next_index, to_next, - n_left_to_next, bi0, bi1, next0, - next1); - } - - while (n_left_from > 0 && n_left_to_next > 0) - { - vlib_buffer_t * b0; - ip4_header_t * ip0; - u32 bi0, dst_adj_index0, src_adj_index0, src_fib_index0; - u32 next0 = LGPE_IP4_LOOKUP_NEXT_LISP_CP_LOOKUP; - ip_adjacency_t * dst_adj0, * src_adj0; - - bi0 = from[0]; - to_next[0] = bi0; - from += 1; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; - - b0 = vlib_get_buffer (vm, bi0); - ip0 = vlib_buffer_get_current (b0); - - /* dst lookup was done by ip4 lookup */ - dst_adj_index0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX]; - dst_adj0 = ip_get_adjacency (lgm->lookup_main, dst_adj_index0); - src_fib_index0 = dst_adj0->rewrite_header.sw_if_index; - - /* default route hit in ip4 lookup, send to lisp control plane */ - if (src_fib_index0 == (u32) ~0) - goto done; - - /* src lookup we do here */ - ip4_src_fib_lookup_one (lgm, src_fib_index0, &ip0->src_address, - &src_adj_index0); - vnet_buffer(b0)->ip.adj_index[VLIB_TX] = src_adj_index0; - src_adj0 = ip_get_adjacency (lgm->lookup_main, src_adj_index0); - next0 = src_adj0->lookup_next_index; - - /* prepare packet for lisp-gpe output node */ - vnet_buffer (b0)->sw_if_index[VLIB_TX] = - src_adj0->rewrite_header.sw_if_index; - done: - vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, - n_left_to_next, bi0, next0); - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); + /* LISP-gpe header */ + lisp0 = &h0->lisp; } - return from_frame->n_vectors; -} - - -VLIB_REGISTER_NODE (lgpe_ip4_lookup_node) = { - .function = lgpe_ip4_lookup, - .name = "lgpe-ip4-lookup", - .vector_size = sizeof (u32), - - .type = VLIB_NODE_TYPE_INTERNAL, - - .n_next_nodes = LGPE_IP4_LOOKUP_N_NEXT, - .next_nodes = { - [LGPE_IP4_LOOKUP_NEXT_DROP] = "error-drop", - [LGPE_IP4_LOOKUP_NEXT_LISP_CP_LOOKUP] = "lisp-cp-lookup", - }, -}; - -static int -lisp_gpe_rewrite (lisp_gpe_tunnel_t * t) -{ - u8 *rw = 0; - ip4_header_t * ip0; - lisp_gpe_header_t * lisp0; - ip4_udp_lisp_gpe_header_t * h0; - int len; - - len = sizeof(*h0); - - vec_validate_aligned(rw, len - 1, CLIB_CACHE_LINE_BYTES); - - h0 = (ip4_udp_lisp_gpe_header_t *) rw; - - /* Fixed portion of the (outer) ip4 header */ - ip0 = &h0->ip4; - ip0->ip_version_and_header_length = 0x45; - ip0->ttl = 254; - ip0->protocol = IP_PROTOCOL_UDP; - - /* we fix up the ip4 header length and checksum after-the-fact */ - ip0->src_address.as_u32 = t->src.as_u32; - ip0->dst_address.as_u32 = t->dst.as_u32; - ip0->checksum = ip4_header_checksum (ip0); - - /* UDP header, randomize src port on something, maybe? */ - h0->udp.src_port = clib_host_to_net_u16 (4341); - h0->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_lisp_gpe); - - /* LISP-gpe header */ - lisp0 = &h0->lisp; lisp0->flags = t->flags; lisp0->ver_res = t->ver_res; @@ -636,8 +120,8 @@ add_del_tunnel (vnet_lisp_gpe_add_del_fwd_entry_args_t *a, u32 * tun_index_res) lisp_gpe_tunnel_key_t key; memset(&key, 0, sizeof(key)); - gid_address_copy(&key.eid, &a->deid); - key.dst_loc = ip_addr_v4(&a->dlocator).as_u32; + ip_prefix_copy(&key.eid, &gid_address_ippref(&a->deid)); + ip_address_copy(&key.dst_loc, &a->dlocator); key.iid = clib_host_to_net_u32 (a->vni); p = mhash_get (&lgm->lisp_gpe_tunnel_by_key, &key); @@ -659,8 +143,8 @@ add_del_tunnel (vnet_lisp_gpe_add_del_fwd_entry_args_t *a, u32 * tun_index_res) foreach_copy_field; #undef _ - t->src = ip_addr_v4(&a->slocator); - t->dst = ip_addr_v4(&a->dlocator); + ip_address_copy(&t->src, &a->slocator); + ip_address_copy(&t->dst, &a->dlocator); rv = lisp_gpe_rewrite (t); @@ -721,11 +205,12 @@ add_del_negative_fwd_entry (lisp_gpe_main_t * lgm, /* TODO insert tunnel that always sends map-request */ case DROP: /* for drop fwd entries, just add route, no need to add encap tunnel */ - adj.lookup_next_index = (u16) LGPE_IP4_LOOKUP_NEXT_DROP; + adj.lookup_next_index = ip_prefix_version(dpref) == IP4 ? + LGPE_IP4_LOOKUP_NEXT_DROP : LGPE_IP6_LOOKUP_NEXT_DROP; /* add/delete route for prefix */ - return ip4_sd_fib_add_del_route (lgm, dpref, spref, a->table_id, &adj, - a->is_add); + return ip_sd_fib_add_del_route (lgm, dpref, spref, a->table_id, &adj, + a->is_add); break; default: return -1; @@ -738,10 +223,10 @@ vnet_lisp_gpe_add_del_fwd_entry (vnet_lisp_gpe_add_del_fwd_entry_args_t * a, { lisp_gpe_main_t * lgm = &lisp_gpe_main; ip_adjacency_t adj, * adjp; - u32 * adj_index, rv, tun_index = ~0; + u32 adj_index, rv, tun_index = ~0; ip_prefix_t * dpref, * spref; - uword * lookup_next_index, * lgpe_sw_if_index; - + uword * lookup_next_index, * lgpe_sw_if_index, * lnip; + u8 ip_ver; /* treat negative fwd entries separately */ if (a->is_negative) return add_del_negative_fwd_entry (lgm, a); @@ -753,6 +238,7 @@ vnet_lisp_gpe_add_del_fwd_entry (vnet_lisp_gpe_add_del_fwd_entry_args_t * a, dpref = &gid_address_ippref(&a->deid); spref = &gid_address_ippref(&a->seid); + ip_ver = ip_prefix_version(dpref); /* setup adjacency for eid */ memset (&adj, 0, sizeof(adj)); @@ -763,8 +249,10 @@ vnet_lisp_gpe_add_del_fwd_entry (vnet_lisp_gpe_add_del_fwd_entry_args_t * a, { /* send packets that hit this adj to lisp-gpe interface output node in * requested vrf. */ - lookup_next_index = hash_get(lgm->lgpe_ip4_lookup_next_index_by_table_id, - a->table_id); + lnip = ip_ver == IP4 ? + lgm->lgpe_ip4_lookup_next_index_by_table_id : + lgm->lgpe_ip6_lookup_next_index_by_table_id; + lookup_next_index = hash_get(lnip, a->table_id); lgpe_sw_if_index = hash_get(lgm->lisp_gpe_hw_if_index_by_table_id, a->table_id); @@ -779,16 +267,17 @@ vnet_lisp_gpe_add_del_fwd_entry (vnet_lisp_gpe_add_del_fwd_entry_args_t * a, } /* add/delete route for prefix */ - rv = ip4_sd_fib_add_del_route (lgm, dpref, spref, a->table_id, &adj, - a->is_add); + rv = ip_sd_fib_add_del_route (lgm, dpref, spref, a->table_id, &adj, + a->is_add); /* check that everything worked */ if (CLIB_DEBUG && a->is_add) { - adj_index = ip4_sd_fib_get_route (lgm, dpref, spref, a->table_id); + adj_index = ip_sd_fib_get_route (lgm, dpref, spref, a->table_id); ASSERT(adj_index != 0); - adjp = ip_get_adjacency (lgm->lookup_main, adj_index[0]); + adjp = ip_get_adjacency ((ip_ver == IP4) ? lgm->lm4 : lgm->lm6, + adj_index); ASSERT(adjp != 0); ASSERT(adjp->rewrite_header.node_index == tun_index); @@ -908,8 +397,8 @@ format_lisp_gpe_tunnel (u8 * s, va_list * args) s = format (s, "[%d] %U (src) %U (dst) fibs: encap %d, decap %d", t - lgm->tunnels, - format_ip4_address, &t->src, - format_ip4_address, &t->dst, + format_ip_address, &t->src, + format_ip_address, &t->dst, t->encap_fib_index, t->decap_fib_index); @@ -966,6 +455,13 @@ vnet_lisp_gpe_enable_disable (vnet_lisp_gpe_enable_disable_args_t * a) vnm->vlib_main, ip4_lookup_node.index, lgpe_ip4_lookup_node.index); } + /* add lgpe_ip6_lookup as possible next_node for ip6 lookup */ + if (lgm->ip6_lookup_next_lgpe_ip6_lookup == ~0) + { + lgm->ip6_lookup_next_lgpe_ip6_lookup = vlib_node_add_next ( + vnm->vlib_main, ip6_lookup_node.index, + lgpe_ip6_lookup_node.index); + } else { /* ask cp to re-add ifaces and defaults */ @@ -988,8 +484,9 @@ vnet_lisp_gpe_enable_disable (vnet_lisp_gpe_enable_disable_args_t * a) vec_foreach(tunnel, tunnels) { memset(at, 0, sizeof(at[0])); at->is_add = 0; - gid_address_copy(&at->deid, &tunnel->eid); - ip_addr_v4(&at->dlocator).as_u32= tunnel->dst_loc; + gid_address_type(&at->deid) = IP_PREFIX; + ip_prefix_copy(&gid_address_ippref(&at->deid), &tunnel->eid); + ip_address_copy(&at->dlocator, &tunnel->dst_loc); vnet_lisp_gpe_add_del_fwd_entry (at, 0); } vec_free(tunnels); @@ -1061,14 +558,19 @@ lisp_gpe_init (vlib_main_t *vm) lgm->vnet_main = vnet_get_main(); lgm->vlib_main = vm; lgm->im4 = &ip4_main; - lgm->lookup_main = &ip4_main.lookup_main; + lgm->im6 = &ip6_main; + lgm->lm4 = &ip4_main.lookup_main; + lgm->lm6 = &ip6_main.lookup_main; lgm->ip4_lookup_next_lgpe_ip4_lookup = ~0; + lgm->ip6_lookup_next_lgpe_ip6_lookup = ~0; mhash_init (&lgm->lisp_gpe_tunnel_by_key, sizeof(uword), sizeof(lisp_gpe_tunnel_key_t)); udp_register_dst_port (vm, UDP_DST_PORT_lisp_gpe, - lisp_gpe_input_node.index, 1 /* is_ip4 */); + lisp_gpe_ip4_input_node.index, 1 /* is_ip4 */); + udp_register_dst_port (vm, UDP_DST_PORT_lisp_gpe6, + lisp_gpe_ip4_input_node.index, 0 /* is_ip4 */); return 0; } diff --git a/vnet/vnet/lisp-gpe/lisp_gpe.h b/vnet/vnet/lisp-gpe/lisp_gpe.h index 1452b798..329083af 100644 --- a/vnet/vnet/lisp-gpe/lisp_gpe.h +++ b/vnet/vnet/lisp-gpe/lisp_gpe.h @@ -27,25 +27,29 @@ #include #include -#define IP_UDP_HDR_LEN (word) (sizeof(udp_header_t) + sizeof(ip4_header_t)) - typedef CLIB_PACKED (struct { ip4_header_t ip4; /* 20 bytes */ udp_header_t udp; /* 8 bytes */ lisp_gpe_header_t lisp; /* 8 bytes */ }) ip4_udp_lisp_gpe_header_t; +typedef CLIB_PACKED (struct { + ip6_header_t ip6; /* 40 bytes */ + udp_header_t udp; /* 8 bytes */ + lisp_gpe_header_t lisp; /* 8 bytes */ +}) ip6_udp_lisp_gpe_header_t; + typedef struct { union { struct - { - gid_address_t eid; - u32 dst_loc; - u32 iid; - }; - u8 as_u8[6]; + { + ip_prefix_t eid; /* within the dp only ip and mac can be eids */ + ip_address_t dst_loc; + u32 iid; + }; + u8 as_u8[40]; }; } lisp_gpe_tunnel_key_t; @@ -58,8 +62,8 @@ typedef struct u32 decap_next_index; /* tunnel src and dst addresses */ - ip4_address_t src; - ip4_address_t dst; + ip_address_t src; + ip_address_t dst; /* FIB indices */ u32 encap_fib_index; /* tunnel partner lookup here */ @@ -77,7 +81,7 @@ typedef struct u32 vni; } lisp_gpe_tunnel_t; -#define foreach_lisp_gpe_input_next \ +#define foreach_lisp_gpe_ip_input_next \ _(DROP, "error-drop") \ _(IP4_INPUT, "ip4-input") \ _(IP6_INPUT, "ip6-input") \ @@ -85,7 +89,7 @@ _(ETHERNET_INPUT, "ethernet-input") typedef enum { #define _(s,n) LISP_GPE_INPUT_NEXT_##s, - foreach_lisp_gpe_input_next + foreach_lisp_gpe_ip_input_next #undef _ LISP_GPE_INPUT_N_NEXT, } lisp_gpe_input_next_t; @@ -100,12 +104,28 @@ typedef enum { /* As a first step, reuse v4 fib. The goal of the typedef is to shield * consumers from future updates that may result in the lisp ip4 fib diverging * from ip4 fib */ -typedef ip4_fib_t lisp_ip4_fib_t; +typedef ip4_fib_t ip4_src_fib_t; + +typedef struct ip6_src_fib +{ + BVT(clib_bihash) ip6_lookup_table; + + /* bitmap/vector of mask widths to search */ + uword * non_empty_dst_address_length_bitmap; + u8 * prefix_lengths_in_search_order; + ip6_address_t fib_masks[129]; + i32 dst_address_length_refcounts[129]; + + /* ip6 lookup table config parameters */ + u32 lookup_table_nbuckets; + uword lookup_table_size; +} ip6_src_fib_t; typedef struct lisp_gpe_main { /* Pool of src fibs that are paired with dst fibs */ - ip4_fib_t * src_fibs; + ip4_src_fib_t * ip4_src_fibs; + ip6_src_fib_t * ip6_src_fibs; /* vector of encap tunnel instances */ lisp_gpe_tunnel_t * tunnels; @@ -123,26 +143,30 @@ typedef struct lisp_gpe_main /* Lookup lisp-gpe interfaces by vrf */ uword * lisp_gpe_hw_if_index_by_table_id; - /* Lookup lgpe_ip4_lookup_next by vrf */ + /* Lookup lgpe_ipX_lookup_next by vrf */ uword * lgpe_ip4_lookup_next_index_by_table_id; + uword * lgpe_ip6_lookup_next_index_by_table_id; - /* next node indexes that points ip4 lookup to lisp gpe lookup and lisp cp */ + /* next node indexes that point ip4/6 lookup to lisp gpe ip lookup */ u32 ip4_lookup_next_lgpe_ip4_lookup; + u32 ip6_lookup_next_lgpe_ip6_lookup; /* convenience */ vlib_main_t * vlib_main; vnet_main_t * vnet_main; - ip_lookup_main_t * lookup_main; ip4_main_t * im4; + ip6_main_t * im6; + ip_lookup_main_t * lm4; + ip_lookup_main_t * lm6; } lisp_gpe_main_t; lisp_gpe_main_t lisp_gpe_main; extern vlib_node_registration_t lgpe_ip4_lookup_node; -extern vlib_node_registration_t lisp_gpe_input_node; +extern vlib_node_registration_t lgpe_ip6_lookup_node; +extern vlib_node_registration_t lisp_gpe_ip4_input_node; +extern vlib_node_registration_t lisp_gpe_ip6_input_node; -u8 * -format_lisp_gpe_tx_trace (u8 * s, va_list * args); u8 * format_lisp_gpe_header_with_length (u8 * s, va_list * args); @@ -202,7 +226,7 @@ typedef struct ip_address_t dlocator; u32 encap_fib_index; u32 decap_fib_index; - u32 decap_next_index; + u32 decap_next_index; /* TODO is this really needed? */ u8 flags; u8 ver_res; u8 res; @@ -215,7 +239,36 @@ int vnet_lisp_gpe_add_del_fwd_entry (vnet_lisp_gpe_add_del_fwd_entry_args_t *a, u32 * hw_if_indexp); -u8 * -format_lisp_gpe_header_with_length (u8 * s, va_list * args); +int +ip_sd_fib_add_del_route (lisp_gpe_main_t * lgm, ip_prefix_t * dst_prefix, + ip_prefix_t * src_prefix, u32 table_id, + ip_adjacency_t * add_adj, u8 is_add); +u32 +ip_sd_fib_get_route (lisp_gpe_main_t * lgm, ip_prefix_t * dst_prefix, + ip_prefix_t * src_prefix, u32 table_id); + +#define foreach_lgpe_ip4_lookup_next \ + _(DROP, "error-drop") \ + _(LISP_CP_LOOKUP, "lisp-cp-lookup") + +typedef enum lgpe_ip4_lookup_next +{ +#define _(sym,str) LGPE_IP4_LOOKUP_NEXT_##sym, + foreach_lgpe_ip4_lookup_next +#undef _ + LGPE_IP4_LOOKUP_N_NEXT, +} lgpe_ip4_lookup_next_t; + +#define foreach_lgpe_ip6_lookup_next \ + _(DROP, "error-drop") \ + _(LISP_CP_LOOKUP, "lisp-cp-lookup") + +typedef enum lgpe_ip6_lookup_next +{ +#define _(sym,str) LGPE_IP6_LOOKUP_NEXT_##sym, + foreach_lgpe_ip6_lookup_next +#undef _ + LGPE_IP6_LOOKUP_N_NEXT, +} lgpe_ip6_lookup_next_t; #endif /* included_vnet_lisp_gpe_h */ diff --git a/vpp/api/api.c b/vpp/api/api.c index 1b6d52be..bc2f64cd 100644 --- a/vpp/api/api.c +++ b/vpp/api/api.c @@ -4860,7 +4860,6 @@ send_lisp_gpe_tunnel_details (lisp_gpe_tunnel_t *tunnel, { vl_api_lisp_gpe_tunnel_details_t *rmp; lisp_gpe_main_t * lgm = &lisp_gpe_main; - ip4_address_t *ip4 = NULL; rmp = vl_msg_api_alloc (sizeof (*rmp)); memset (rmp, 0, sizeof (*rmp)); @@ -4868,12 +4867,9 @@ send_lisp_gpe_tunnel_details (lisp_gpe_tunnel_t *tunnel, rmp->tunnels = tunnel - lgm->tunnels; - /*list_gpe_tunnel now support only IPv4*/ - rmp->is_ipv6 = 0; - ip4 = &tunnel->src; - clib_memcpy(rmp->source_ip, ip4, sizeof(*ip4)); - ip4 = &tunnel->dst; - clib_memcpy(rmp->destination_ip, ip4, sizeof(*ip4)); + rmp->is_ipv6 = ip_addr_version(&tunnel->src) == IP6 ? 1 : 0; + ip_address_copy_addr(rmp->source_ip, &tunnel->src); + ip_address_copy_addr(rmp->destination_ip, &tunnel->dst); rmp->encap_fib_id = htonl(tunnel->encap_fib_index); rmp->decap_fib_id = htonl(tunnel->decap_fib_index); -- cgit 1.2.3-korg