From c42912de8e2873c6a107fee047773b13950a764b Mon Sep 17 00:00:00 2001 From: John Lo Date: Mon, 7 Nov 2016 18:30:47 -0500 Subject: VXLAN tunnel encap forwarding optimization with FIB 2.0 Optimize VXLAN encap node so that vxlan4-encap node is used for IP4 and vxlan6-encap node is used for IP6 underlay. Also stack the VXLAN encap nodes to the appropriate FIB IP4 or IP6 load-balance node instead of ip4/ip6-lookup node to save IP lookup operation. For VXLAN decap node, check VXLAN header FLAGS field for each packet and remove the code to support decap-next for IP4 or IP6. These decap- next values were intended for experimentation purposes and not needed any more since VXLAN-GPE tunnel is supported. The decap-next field is still kept in API for backward compatibility and its value has no effect. Decap next for both vxlan4-decap and vxlan6-decap nodes is always l2-input node. Change-Id: I8ac95774946549ec403ab691f999df0c006b460f Signed-off-by: John Lo --- vnet/vnet/fib/fib_node.h | 2 + vnet/vnet/vxlan/decap.c | 51 +++-- vnet/vnet/vxlan/encap.c | 497 ++++++++++++++++++++-------------------- vnet/vnet/vxlan/vxlan.c | 243 ++++++++++++++------ vnet/vnet/vxlan/vxlan.h | 55 +++-- vnet/vnet/vxlan/vxlan_error.def | 1 + vnet/vnet/vxlan/vxlan_packet.h | 11 +- 7 files changed, 490 insertions(+), 370 deletions(-) (limited to 'vnet') diff --git a/vnet/vnet/fib/fib_node.h b/vnet/vnet/fib/fib_node.h index c820546bcc2..791d63b9591 100644 --- a/vnet/vnet/fib/fib_node.h +++ b/vnet/vnet/fib/fib_node.h @@ -38,6 +38,7 @@ typedef enum fib_node_type_t_ { FIB_NODE_TYPE_LISP_GPE_FWD_ENTRY, FIB_NODE_TYPE_LISP_ADJ, FIB_NODE_TYPE_GRE_TUNNEL, + FIB_NODE_TYPE_VXLAN_TUNNEL, /** * Marker. New types before this one. leave the test last. */ @@ -57,6 +58,7 @@ typedef enum fib_node_type_t_ { [FIB_NODE_TYPE_LISP_GPE_FWD_ENTRY] = "lisp-gpe-fwd-entry", \ [FIB_NODE_TYPE_LISP_ADJ] = "lisp-adj", \ [FIB_NODE_TYPE_GRE_TUNNEL] = "gre-tunnel", \ + [FIB_NODE_TYPE_VXLAN_TUNNEL] = "vxlan-tunnel", \ } /** diff --git a/vnet/vnet/vxlan/decap.c b/vnet/vnet/vxlan/decap.c index 2b74ce22dfe..812a8416dc3 100644 --- a/vnet/vnet/vxlan/decap.c +++ b/vnet/vnet/vxlan/decap.c @@ -37,13 +37,13 @@ static u8 * format_vxlan_rx_trace (u8 * s, va_list * args) if (t->tunnel_index != ~0) { - s = format (s, "VXLAN: tunnel %d vni %d next %d error %d", + s = format (s, "VXLAN decap from vxlan_tunnel%d vni %d next %d error %d", t->tunnel_index, t->vni, t->next_index, t->error); } else { - s = format (s, "VXLAN: no tunnel for vni %d next %d error %d", - t->vni, t->next_index, t->error); + s = format (s, "VXLAN decap error - tunnel for vni %d does not exist", + t->vni); } return s; } @@ -129,11 +129,13 @@ vxlan_input (vlib_main_t * vm, vxlan0 = vlib_buffer_get_current (b0); vxlan1 = vlib_buffer_get_current (b1); + next0 = next1 = VXLAN_INPUT_NEXT_L2_INPUT; + if (is_ip4) { - vlib_buffer_advance - (b0, -(word)(sizeof(udp_header_t)+sizeof(ip4_header_t))); - vlib_buffer_advance - (b1, -(word)(sizeof(udp_header_t)+sizeof(ip4_header_t))); + vlib_buffer_advance + (b0, -(word)(sizeof(udp_header_t)+sizeof(ip4_header_t))); + vlib_buffer_advance + (b1, -(word)(sizeof(udp_header_t)+sizeof(ip4_header_t))); ip4_0 = vlib_buffer_get_current (b0); ip4_1 = vlib_buffer_get_current (b1); } else { @@ -149,10 +151,10 @@ vxlan_input (vlib_main_t * vm, if (is_ip4) { vlib_buffer_advance (b0, sizeof(*ip4_0)+sizeof(udp_header_t)+sizeof(*vxlan0)); - vlib_buffer_advance + vlib_buffer_advance (b1, sizeof(*ip4_1)+sizeof(udp_header_t)+sizeof(*vxlan1)); } else { - vlib_buffer_advance + vlib_buffer_advance (b0, sizeof(*ip6_0)+sizeof(udp_header_t)+sizeof(*vxlan0)); vlib_buffer_advance (b1, sizeof(*ip6_1)+sizeof(udp_header_t)+sizeof(*vxlan1)); @@ -164,6 +166,13 @@ vxlan_input (vlib_main_t * vm, tunnel_index1 = ~0; error1 = 0; + if (PREDICT_FALSE (vxlan0->flags != VXLAN_FLAGS_I)) + { + error0 = VXLAN_ERROR_BAD_FLAGS; + next0 = VXLAN_INPUT_NEXT_DROP; + goto trace0; + } + if (is_ip4) { key4_0.src = ip4_0->src_address.as_u32; key4_0.vni = vxlan0->vni_reserved; @@ -209,7 +218,6 @@ vxlan_input (vlib_main_t * vm, t0 = pool_elt_at_index (vxm->tunnels, tunnel_index0); - next0 = t0->decap_next_index; sw_if_index0 = t0->sw_if_index; len0 = vlib_buffer_length_in_chain (vm, b0); @@ -253,6 +261,12 @@ vxlan_input (vlib_main_t * vm, tr->vni = vnet_get_vni (vxlan0); } + if (PREDICT_FALSE (vxlan1->flags != VXLAN_FLAGS_I)) + { + error1 = VXLAN_ERROR_BAD_FLAGS; + next1 = VXLAN_INPUT_NEXT_DROP; + goto trace1; + } if (is_ip4) { key4_1.src = ip4_1->src_address.as_u32; @@ -299,7 +313,6 @@ vxlan_input (vlib_main_t * vm, t1 = pool_elt_at_index (vxm->tunnels, tunnel_index1); - next1 = t1->decap_next_index; sw_if_index1 = t1->sw_if_index; len1 = vlib_buffer_length_in_chain (vm, b1); @@ -376,9 +389,11 @@ vxlan_input (vlib_main_t * vm, /* udp leaves current_data pointing at the vxlan header */ vxlan0 = vlib_buffer_get_current (b0); + next0 = VXLAN_INPUT_NEXT_L2_INPUT; + if (is_ip4) { - vlib_buffer_advance - (b0, -(word)(sizeof(udp_header_t)+sizeof(ip4_header_t))); + vlib_buffer_advance + (b0, -(word)(sizeof(udp_header_t)+sizeof(ip4_header_t))); ip4_0 = vlib_buffer_get_current (b0); } else { vlib_buffer_advance @@ -391,13 +406,20 @@ vxlan_input (vlib_main_t * vm, vlib_buffer_advance (b0, sizeof(*ip4_0)+sizeof(udp_header_t)+sizeof(*vxlan0)); } else { - vlib_buffer_advance + vlib_buffer_advance (b0, sizeof(*ip6_0)+sizeof(udp_header_t)+sizeof(*vxlan0)); } tunnel_index0 = ~0; error0 = 0; + if (PREDICT_FALSE (vxlan0->flags != VXLAN_FLAGS_I)) + { + error0 = VXLAN_ERROR_BAD_FLAGS; + next0 = VXLAN_INPUT_NEXT_DROP; + goto trace00; + } + if (is_ip4) { key4_0.src = ip4_0->src_address.as_u32; key4_0.vni = vxlan0->vni_reserved; @@ -443,7 +465,6 @@ vxlan_input (vlib_main_t * vm, t0 = pool_elt_at_index (vxm->tunnels, tunnel_index0); - next0 = t0->decap_next_index; sw_if_index0 = t0->sw_if_index; len0 = vlib_buffer_length_in_chain (vm, b0); diff --git a/vnet/vnet/vxlan/encap.c b/vnet/vnet/vxlan/encap.c index e7d49b027ce..5b63064a848 100644 --- a/vnet/vnet/vxlan/encap.c +++ b/vnet/vnet/vxlan/encap.c @@ -37,8 +37,6 @@ typedef enum { } vxlan_encap_error_t; typedef enum { - VXLAN_ENCAP_NEXT_IP4_LOOKUP, - VXLAN_ENCAP_NEXT_IP6_LOOKUP, VXLAN_ENCAP_NEXT_DROP, VXLAN_ENCAP_N_NEXT, } vxlan_encap_next_t; @@ -55,7 +53,8 @@ u8 * format_vxlan_encap_trace (u8 * s, va_list * args) vxlan_encap_trace_t * t = va_arg (*args, vxlan_encap_trace_t *); - s = format (s, "VXLAN-ENCAP: tunnel %d vni %d", t->tunnel_index, t->vni); + s = format (s, "VXLAN encap to vxlan_tunnel%d vni %d", + t->tunnel_index, t->vni); return s; } @@ -66,10 +65,11 @@ u8 * format_vxlan_encap_trace (u8 * s, va_list * args) #define foreach_fixed_header6_offset \ _(0) _(1) _(2) _(3) _(4) _(5) _(6) -static uword -vxlan_encap (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * from_frame) +always_inline uword +vxlan_encap_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame, + u32 is_ip4) { u32 n_left_from, next_index, * from, * to_next; vxlan_main_t * vxm = &vxlan_main; @@ -79,6 +79,10 @@ vxlan_encap (vlib_main_t * vm, u16 old_l0 = 0, old_l1 = 0; u32 cpu_index = os_get_cpu_number(); u32 stats_sw_if_index, stats_n_packets, stats_n_bytes; + u32 sw_if_index0 = 0, sw_if_index1 = 0; + u32 next0 = 0, next1 = 0; + vnet_hw_interface_t * hi0, * hi1; + vxlan_tunnel_t * t0 = NULL, * t1 = NULL; from = vlib_frame_vector_args (from_frame); n_left_from = from_frame->n_vectors; @@ -99,10 +103,7 @@ vxlan_encap (vlib_main_t * vm, u32 bi0, bi1; vlib_buffer_t * b0, * b1; u32 flow_hash0, flow_hash1; - u32 next0 = VXLAN_ENCAP_NEXT_IP4_LOOKUP; - u32 next1 = VXLAN_ENCAP_NEXT_IP4_LOOKUP; - u32 sw_if_index0, sw_if_index1, len0, len1; - vnet_hw_interface_t * hi0, * hi1; + u32 len0, len1; ip4_header_t * ip4_0, * ip4_1; ip6_header_t * ip6_0, * ip6_1; udp_header_t * udp0, * udp1; @@ -110,10 +111,8 @@ vxlan_encap (vlib_main_t * vm, u64 * copy_src1, * copy_dst1; u32 * copy_src_last0, * copy_dst_last0; u32 * copy_src_last1, * copy_dst_last1; - vxlan_tunnel_t * t0, * t1; u16 new_l0, new_l1; ip_csum_t sum0, sum1; - u8 is_ip4_0, is_ip4_1; /* Prefetch next iteration. */ { @@ -144,169 +143,147 @@ vxlan_encap (vlib_main_t * vm, flow_hash0 = vnet_l2_compute_flow_hash (b0); flow_hash1 = vnet_l2_compute_flow_hash (b1); - /* 1-wide cache? */ - sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_TX]; - sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_TX]; - hi0 = vnet_get_sup_hw_interface (vnm, sw_if_index0); - hi1 = vnet_get_sup_hw_interface (vnm, sw_if_index1); - - t0 = &vxm->tunnels[hi0->dev_instance]; - t1 = &vxm->tunnels[hi1->dev_instance]; - - is_ip4_0 = (t0->flags & VXLAN_TUNNEL_IS_IPV4); - is_ip4_1 = (t1->flags & VXLAN_TUNNEL_IS_IPV4); - - if (PREDICT_FALSE(!is_ip4_0)) next0 = VXLAN_ENCAP_NEXT_IP6_LOOKUP; - if (PREDICT_FALSE(!is_ip4_1)) next1 = VXLAN_ENCAP_NEXT_IP6_LOOKUP; + /* Get next node index and adj index from tunnel next_dpo */ + if (sw_if_index0 != vnet_buffer(b0)->sw_if_index[VLIB_TX]) + { + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_TX]; + hi0 = vnet_get_sup_hw_interface (vnm, sw_if_index0); + t0 = &vxm->tunnels[hi0->dev_instance]; + /* Note: change to always set next0 if it may be set to drop */ + next0 = t0->next_dpo.dpoi_next_node; + } + vnet_buffer(b0)->ip.adj_index[VLIB_TX] = t0->next_dpo.dpoi_index; - /* IP4 VXLAN header sizeof(ip4_vxlan_header_t) should be 36 octects */ - /* IP6 VXLAN header sizeof(ip6_vxlan_header_t) should be 56 octects */ - if (PREDICT_TRUE(is_ip4_0)) - ASSERT(vec_len(t0->rewrite) == 36); - else - ASSERT(vec_len(t0->rewrite) == 56); - if (PREDICT_TRUE(is_ip4_1)) - ASSERT(vec_len(t1->rewrite) == 36); - else - ASSERT(vec_len(t1->rewrite) == 56); + /* Get next node index and adj index from tunnel next_dpo */ + if (sw_if_index1 != vnet_buffer(b1)->sw_if_index[VLIB_TX]) + { + sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_TX]; + hi1 = vnet_get_sup_hw_interface (vnm, sw_if_index1); + t1 = &vxm->tunnels[hi1->dev_instance]; + /* Note: change to always set next1 if it may be set to drop */ + next1 = t1->next_dpo.dpoi_next_node; + } + vnet_buffer(b1)->ip.adj_index[VLIB_TX] = t1->next_dpo.dpoi_index; /* Apply the rewrite string. $$$$ vnet_rewrite? */ vlib_buffer_advance (b0, -(word)_vec_len(t0->rewrite)); vlib_buffer_advance (b1, -(word)_vec_len(t1->rewrite)); - /* assign both v4 and v6; avoid a branch, optimizer will help us */ - ip4_0 = vlib_buffer_get_current(b0); - ip6_0 = (void *)ip4_0; - ip4_1 = vlib_buffer_get_current(b1); - ip6_1 = (void *)ip4_1; - - /* Copy the fixed header (v4 and v6 variables point to the same - * place at this point) - */ - copy_dst0 = (u64 *) ip4_0; - copy_src0 = (u64 *) t0->rewrite; - - copy_dst1 = (u64 *) ip4_1; - copy_src1 = (u64 *) t1->rewrite; - - /* Copy first 32 (ip4)/56 (ip6) octets 8-bytes at a time */ + if (is_ip4) + { + /* IP4 VXLAN header should be 36 octects */ + ASSERT(sizeof(ip4_vxlan_header_t) == 36); + ASSERT(vec_len(t0->rewrite) == sizeof(ip4_vxlan_header_t)); + ASSERT(vec_len(t1->rewrite) == sizeof(ip4_vxlan_header_t)); + + ip4_0 = vlib_buffer_get_current(b0); + ip4_1 = vlib_buffer_get_current(b1); + + /* Copy the fixed header */ + copy_dst0 = (u64 *) ip4_0; + copy_src0 = (u64 *) t0->rewrite; + copy_dst1 = (u64 *) ip4_1; + copy_src1 = (u64 *) t1->rewrite; + /* Copy first 32 octets 8-bytes at a time */ #define _(offs) copy_dst0[offs] = copy_src0[offs]; - if (PREDICT_TRUE(is_ip4_0)) { - foreach_fixed_header4_offset; - } else { - foreach_fixed_header6_offset; - } + foreach_fixed_header4_offset; #undef _ #define _(offs) copy_dst1[offs] = copy_src1[offs]; - if (PREDICT_TRUE(is_ip4_1)) { - foreach_fixed_header4_offset; - } else { - foreach_fixed_header6_offset; - } + foreach_fixed_header4_offset; #undef _ - /* Last 4 octets. Hopefully gcc will be our friend */ - if (PREDICT_TRUE(is_ip4_0)) { + /* Last 4 octets. Hopefully gcc will be our friend */ copy_dst_last0 = (u32 *)(©_dst0[4]); copy_src_last0 = (u32 *)(©_src0[4]); copy_dst_last0[0] = copy_src_last0[0]; - } - if (PREDICT_TRUE(is_ip4_1)) { copy_dst_last1 = (u32 *)(©_dst1[4]); copy_src_last1 = (u32 *)(©_src1[4]); copy_dst_last1[0] = copy_src_last1[0]; - } - if (PREDICT_TRUE(is_ip4_0)) { - /* fix the ing outer-IP checksum */ - sum0 = ip4_0->checksum; - - /* old_l0 always 0, see the rewrite setup */ - new_l0 = - clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); + /* Fix the IP4 checksum and length */ + sum0 = ip4_0->checksum; + new_l0 = /* old_l0 always 0, see the rewrite setup */ + clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t, - length /* changed member */); - ip4_0->checksum = ip_csum_fold (sum0); - ip4_0->length = new_l0; - } else { - new_l0 = - clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) - - sizeof(*ip6_0)); - ip6_0->payload_length = new_l0; - } - - if (PREDICT_TRUE(is_ip4_1)) { - /* fix the ing outer-IP checksum */ - sum1 = ip4_1->checksum; - - /* old_l1 always 0, see the rewrite setup */ - new_l1 = - clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1)); + length /* changed member */); + ip4_0->checksum = ip_csum_fold (sum0); + ip4_0->length = new_l0; + sum1 = ip4_1->checksum; + new_l1 = /* old_l1 always 0, see the rewrite setup */ + clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1)); sum1 = ip_csum_update (sum1, old_l1, new_l1, ip4_header_t, - length /* changed member */); - ip4_1->checksum = ip_csum_fold (sum1); - ip4_1->length = new_l1; - } else { - new_l1 = - clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1) - - sizeof(*ip6_1)); - ip6_1->payload_length = new_l1; - } - - /* Fix UDP length */ - if (PREDICT_TRUE(is_ip4_0)) { - udp0 = (udp_header_t *)(ip4_0+1); - new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) - - sizeof (*ip4_0)); - } else { - udp0 = (udp_header_t *)(ip6_0+1); - new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) - - sizeof (*ip6_0)); - } - if (PREDICT_TRUE(is_ip4_1)) { - udp1 = (udp_header_t *)(ip4_1+1); - new_l1 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1) - - sizeof (*ip4_1)); - } else { - udp1 = (udp_header_t *)(ip6_1+1); - new_l1 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1) - - sizeof (*ip6_1)); - } - - udp0->length = new_l0; - udp0->src_port = flow_hash0; - - udp1->length = new_l1; - udp1->src_port = flow_hash1; - - if (PREDICT_FALSE(!is_ip4_0)) { - int bogus = 0; - /* IPv6 UDP checksum is mandatory */ - udp0->checksum = ip6_tcp_udp_icmp_compute_checksum(vm, b0, - ip6_0, &bogus); - ASSERT(bogus == 0); - if (udp0->checksum == 0) - udp0->checksum = 0xffff; - } - - if (PREDICT_FALSE(!is_ip4_1)) { - int bogus = 0; - /* IPv6 UDP checksum is mandatory */ - udp1->checksum = ip6_tcp_udp_icmp_compute_checksum(vm, b1, + length /* changed member */); + ip4_1->checksum = ip_csum_fold (sum1); + ip4_1->length = new_l1; + + /* Fix UDP length and set source port */ + udp0 = (udp_header_t *)(ip4_0+1); + new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain(vm, b0) + - sizeof (*ip4_0)); + udp0->length = new_l0; + udp0->src_port = flow_hash0; + udp1 = (udp_header_t *)(ip4_1+1); + new_l1 = clib_host_to_net_u16 (vlib_buffer_length_in_chain(vm, b1) + - sizeof (*ip4_1)); + udp1->length = new_l1; + udp1->src_port = flow_hash1; + } + else /* ipv6 */ + { + int bogus = 0; + + /* IP6 VXLAN header should be 56 octects */ + ASSERT(sizeof(ip6_vxlan_header_t) == 56); + ASSERT(vec_len(t0->rewrite) == sizeof(ip6_vxlan_header_t)); + ASSERT(vec_len(t1->rewrite) == sizeof(ip6_vxlan_header_t)); + ip6_0 = vlib_buffer_get_current(b0); + ip6_1 = vlib_buffer_get_current(b1); + + /* Copy the fixed header */ + copy_dst0 = (u64 *) ip6_0; + copy_src0 = (u64 *) t0->rewrite; + copy_dst1 = (u64 *) ip6_1; + copy_src1 = (u64 *) t1->rewrite; + /* Copy first 56 (ip6) octets 8-bytes at a time */ +#define _(offs) copy_dst0[offs] = copy_src0[offs]; + foreach_fixed_header6_offset; +#undef _ +#define _(offs) copy_dst1[offs] = copy_src1[offs]; + foreach_fixed_header6_offset; +#undef _ + /* Fix IP6 payload length */ + new_l0 = + clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) + - sizeof(*ip6_0)); + ip6_0->payload_length = new_l0; + new_l1 = + clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1) + - sizeof(*ip6_1)); + ip6_1->payload_length = new_l1; + + /* Fix UDP length and set source port */ + udp0 = (udp_header_t *)(ip6_0+1); + udp0->length = new_l0; + udp0->src_port = flow_hash0; + udp1 = (udp_header_t *)(ip6_1+1); + udp1->length = new_l1; + udp1->src_port = flow_hash1; + + /* IPv6 UDP checksum is mandatory */ + udp0->checksum = ip6_tcp_udp_icmp_compute_checksum(vm, b0, + ip6_0, &bogus); + ASSERT(bogus == 0); + if (udp0->checksum == 0) + udp0->checksum = 0xffff; + udp1->checksum = ip6_tcp_udp_icmp_compute_checksum(vm, b1, ip6_1, &bogus); - ASSERT(bogus == 0); - if (udp1->checksum == 0) - udp1->checksum = 0xffff; - } - - /* Reset to look up tunnel partner in the configured FIB */ - vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->encap_fib_index; - vnet_buffer(b1)->sw_if_index[VLIB_TX] = t1->encap_fib_index; - vnet_buffer(b0)->sw_if_index[VLIB_RX] = sw_if_index0; - vnet_buffer(b1)->sw_if_index[VLIB_RX] = sw_if_index1; - pkts_encapsulated += 2; + ASSERT(bogus == 0); + if (udp1->checksum == 0) + udp1->checksum = 0xffff; + } + pkts_encapsulated += 2; len0 = vlib_buffer_length_in_chain (vm, b0); - len1 = vlib_buffer_length_in_chain (vm, b0); + len1 = vlib_buffer_length_in_chain (vm, b1); stats_n_packets += 2; stats_n_bytes += len0 + len1; @@ -367,18 +344,14 @@ vxlan_encap (vlib_main_t * vm, u32 bi0; vlib_buffer_t * b0; u32 flow_hash0; - u32 next0 = VXLAN_ENCAP_NEXT_IP4_LOOKUP; - u32 sw_if_index0, len0; - vnet_hw_interface_t * hi0; + u32 len0; ip4_header_t * ip4_0; ip6_header_t * ip6_0; udp_header_t * udp0; u64 * copy_src0, * copy_dst0; u32 * copy_src_last0, * copy_dst_last0; - vxlan_tunnel_t * t0; u16 new_l0; ip_csum_t sum0; - u8 is_ip4_0; bi0 = from[0]; to_next[0] = bi0; @@ -391,102 +364,91 @@ vxlan_encap (vlib_main_t * vm, flow_hash0 = vnet_l2_compute_flow_hash(b0); - /* 1-wide cache? */ - sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_TX]; - hi0 = vnet_get_sup_hw_interface (vnm, sw_if_index0); - - t0 = &vxm->tunnels[hi0->dev_instance]; - - is_ip4_0 = (t0->flags & VXLAN_TUNNEL_IS_IPV4); - - if (PREDICT_FALSE(!is_ip4_0)) next0 = VXLAN_ENCAP_NEXT_IP6_LOOKUP; - - /* IP4 VXLAN header sizeof(ip4_vxlan_header_t) should be 36 octets */ - /* IP6 VXLAN header sizeof(ip4_vxlan_header_t) should be 56 octets */ - if (PREDICT_TRUE(is_ip4_0)) - ASSERT(vec_len(t0->rewrite) == 36); - else - ASSERT(vec_len(t0->rewrite) == 56); + /* Get next node index and adj index from tunnel next_dpo */ + if (sw_if_index0 != vnet_buffer(b0)->sw_if_index[VLIB_TX]) + { + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_TX]; + hi0 = vnet_get_sup_hw_interface (vnm, sw_if_index0); + t0 = &vxm->tunnels[hi0->dev_instance]; + /* Note: change to always set next0 if it may be set to drop */ + next0 = t0->next_dpo.dpoi_next_node; + } + vnet_buffer(b0)->ip.adj_index[VLIB_TX] = t0->next_dpo.dpoi_index; /* Apply the rewrite string. $$$$ vnet_rewrite? */ vlib_buffer_advance (b0, -(word)_vec_len(t0->rewrite)); - /* assign both v4 and v6; avoid a branch, optimizer will help us */ - ip4_0 = vlib_buffer_get_current(b0); - ip6_0 = (void *)ip4_0; + if (is_ip4) + { + /* IP4 VXLAN header should be 36 octects */ + ASSERT(sizeof(ip4_vxlan_header_t) == 36); + ASSERT(vec_len(t0->rewrite) == sizeof(ip4_vxlan_header_t)); + ip4_0 = vlib_buffer_get_current(b0); + + /* Copy the fixed header */ + copy_dst0 = (u64 *) ip4_0; + copy_src0 = (u64 *) t0->rewrite; + /* Copy first 32 octets 8-bytes at a time */ +#define _(offs) copy_dst0[offs] = copy_src0[offs]; + foreach_fixed_header4_offset; +#undef _ + /* Last 4 octets. Hopefully gcc will be our friend */ + copy_dst_last0 = (u32 *)(©_dst0[4]); + copy_src_last0 = (u32 *)(©_src0[4]); + copy_dst_last0[0] = copy_src_last0[0]; - /* Copy the fixed header (v4 and v6 variables point to the same - * place at this point) - */ - copy_dst0 = (u64 *) ip4_0; - copy_src0 = (u64 *) t0->rewrite; + /* Fix the IP4 checksum and length */ + sum0 = ip4_0->checksum; + new_l0 = /* old_l0 always 0, see the rewrite setup */ + clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); + sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t, + length /* changed member */); + ip4_0->checksum = ip_csum_fold (sum0); + ip4_0->length = new_l0; + + /* Fix UDP length and set source port */ + udp0 = (udp_header_t *)(ip4_0+1); + new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain(vm, b0) + - sizeof (*ip4_0)); + udp0->length = new_l0; + udp0->src_port = flow_hash0; + } - /* Copy first 32 octets 8-bytes at a time */ + else /* ip6 path */ + { + int bogus = 0; + + /* IP6 VXLAN header should be 56 octects */ + ASSERT(sizeof(ip6_vxlan_header_t) == 56); + ASSERT(vec_len(t0->rewrite) == sizeof(ip6_vxlan_header_t)); + ip6_0 = vlib_buffer_get_current(b0); + /* Copy the fixed header */ + copy_dst0 = (u64 *) ip6_0; + copy_src0 = (u64 *) t0->rewrite; + /* Copy first 56 (ip6) octets 8-bytes at a time */ #define _(offs) copy_dst0[offs] = copy_src0[offs]; - if (PREDICT_TRUE(is_ip4_0)) { - foreach_fixed_header4_offset; - } else { - foreach_fixed_header6_offset; - } + foreach_fixed_header6_offset; #undef _ - if (PREDICT_TRUE(is_ip4_0)) { - /* Last 4 octets. Hopefully gcc will be our friend */ - copy_dst_last0 = (u32 *)(©_dst0[4]); - copy_src_last0 = (u32 *)(©_src0[4]); - - copy_dst_last0[0] = copy_src_last0[0]; - } - - if (PREDICT_TRUE(is_ip4_0)) { - /* fix the ing outer-IP checksum */ - sum0 = ip4_0->checksum; - - /* old_l0 always 0, see the rewrite setup */ - new_l0 = - clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); - sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t, - length /* changed member */); - ip4_0->checksum = ip_csum_fold (sum0); - ip4_0->length = new_l0; - } else { - new_l0 = - clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) - - sizeof(*ip6_0)); - ip6_0->payload_length = new_l0; - } - - /* Fix UDP length */ - if (PREDICT_TRUE(is_ip4_0)) { - udp0 = (udp_header_t *)(ip4_0+1); - new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) - - sizeof (*ip4_0)); - } else { - udp0 = (udp_header_t *)(ip6_0+1); - new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) - - sizeof (*ip6_0)); - } - - udp0->length = new_l0; - udp0->src_port = flow_hash0; - - if (PREDICT_FALSE(!is_ip4_0)) { - int bogus = 0; - /* IPv6 UDP checksum is mandatory */ - udp0->checksum = ip6_tcp_udp_icmp_compute_checksum(vm, b0, - ip6_0, &bogus); - ASSERT(bogus == 0); - if (udp0->checksum == 0) - udp0->checksum = 0xffff; - } - - - /* vnet_update_l2_len (b0); do we need this? cluke */ - - /* Reset to look up tunnel partner in the configured FIB */ - vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->encap_fib_index; - vnet_buffer(b0)->sw_if_index[VLIB_RX] = sw_if_index0; - pkts_encapsulated ++; + /* Fix IP6 payload length */ + new_l0 = + clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) + - sizeof(*ip6_0)); + ip6_0->payload_length = new_l0; + + /* Fix UDP length and set source port */ + udp0 = (udp_header_t *)(ip6_0+1); + udp0->length = new_l0; + udp0->src_port = flow_hash0; + + /* IPv6 UDP checksum is mandatory */ + udp0->checksum = ip6_tcp_udp_icmp_compute_checksum(vm, b0, + ip6_0, &bogus); + ASSERT(bogus == 0); + if (udp0->checksum == 0) + udp0->checksum = 0xffff; + } + pkts_encapsulated ++; len0 = vlib_buffer_length_in_chain (vm, b0); stats_n_packets += 1; stats_n_bytes += len0; @@ -541,24 +503,51 @@ vxlan_encap (vlib_main_t * vm, return from_frame->n_vectors; } -VLIB_REGISTER_NODE (vxlan_encap_node) = { - .function = vxlan_encap, - .name = "vxlan-encap", +static uword +vxlan4_encap (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return vxlan_encap_inline (vm, node, from_frame, /* is_ip4 */ 1); +} + +static uword +vxlan6_encap (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return vxlan_encap_inline (vm, node, from_frame, /* is_ip4 */ 0); +} + +VLIB_REGISTER_NODE (vxlan4_encap_node) = { + .function = vxlan4_encap, + .name = "vxlan4-encap", .vector_size = sizeof (u32), .format_trace = format_vxlan_encap_trace, .type = VLIB_NODE_TYPE_INTERNAL, - .n_errors = ARRAY_LEN(vxlan_encap_error_strings), .error_strings = vxlan_encap_error_strings, - .n_next_nodes = VXLAN_ENCAP_N_NEXT, + .next_nodes = { + [VXLAN_ENCAP_NEXT_DROP] = "error-drop", + }, +}; +VLIB_NODE_FUNCTION_MULTIARCH (vxlan4_encap_node, vxlan4_encap) + +VLIB_REGISTER_NODE (vxlan6_encap_node) = { + .function = vxlan6_encap, + .name = "vxlan6-encap", + .vector_size = sizeof (u32), + .format_trace = format_vxlan_encap_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN(vxlan_encap_error_strings), + .error_strings = vxlan_encap_error_strings, + .n_next_nodes = VXLAN_ENCAP_N_NEXT, .next_nodes = { - [VXLAN_ENCAP_NEXT_IP4_LOOKUP] = "ip4-lookup", - [VXLAN_ENCAP_NEXT_IP6_LOOKUP] = "ip6-lookup", [VXLAN_ENCAP_NEXT_DROP] = "error-drop", }, }; -VLIB_NODE_FUNCTION_MULTIARCH (vxlan_encap_node, vxlan_encap) +VLIB_NODE_FUNCTION_MULTIARCH (vxlan6_encap_node, vxlan6_encap) diff --git a/vnet/vnet/vxlan/vxlan.c b/vnet/vnet/vxlan/vxlan.c index 9ec4c74157f..d37e9d688f1 100644 --- a/vnet/vnet/vxlan/vxlan.c +++ b/vnet/vnet/vxlan/vxlan.c @@ -14,6 +14,8 @@ */ #include #include +#include +#include /** * @file @@ -34,39 +36,18 @@ vxlan_main_t vxlan_main; -static u8 * format_decap_next (u8 * s, va_list * args) -{ - u32 next_index = va_arg (*args, u32); - - switch (next_index) - { - case VXLAN_INPUT_NEXT_DROP: - return format (s, "drop"); - case VXLAN_INPUT_NEXT_L2_INPUT: - return format (s, "l2"); - case VXLAN_INPUT_NEXT_IP4_INPUT: - return format (s, "ip4"); - case VXLAN_INPUT_NEXT_IP6_INPUT: - return format (s, "ip6"); - default: - return format (s, "unknown %d", next_index); - } - return s; -} - u8 * format_vxlan_tunnel (u8 * s, va_list * args) { vxlan_tunnel_t * t = va_arg (*args, vxlan_tunnel_t *); vxlan_main_t * ngm = &vxlan_main; s = format (s, - "[%d] %U (src) %U (dst) vni %d encap_fib_index %d", + "[%d] src %U dst %U vni %d encap_fib_index %d sw_if_index %d " + "fib_entry_index %d\n", t - ngm->tunnels, format_ip46_address, &t->src, IP46_TYPE_ANY, format_ip46_address, &t->dst, IP46_TYPE_ANY, - t->vni, - t->encap_fib_index); - s = format (s, " decap_next %U\n", format_decap_next, t->decap_next_index); + t->vni, t->encap_fib_index, t->sw_if_index, t->fib_entry_index); return s; } @@ -116,10 +97,85 @@ VNET_HW_INTERFACE_CLASS (vxlan_hw_class) = { .build_rewrite = default_build_rewrite, }; + +static vxlan_tunnel_t * +vxlan_tunnel_from_fib_node (fib_node_t *node) +{ +#if (CLIB_DEBUG > 0) + ASSERT(FIB_NODE_TYPE_VXLAN_TUNNEL == node->fn_type); +#endif + return ((vxlan_tunnel_t*) (((char*)node) - + STRUCT_OFFSET_OF(vxlan_tunnel_t, node))); +} + +/** + * Function definition to backwalk a FIB node - + * Here we will restack the new dpo of VXLAN DIP to encap node. + */ +static fib_node_back_walk_rc_t +vxlan_tunnel_back_walk (fib_node_t *node, + fib_node_back_walk_ctx_t *ctx) +{ + vxlan_tunnel_t *t = vxlan_tunnel_from_fib_node(node); + dpo_id_t dpo = DPO_INVALID; + + if (ip46_address_is_ip4(&t->dst)) { + fib_entry_contribute_forwarding + (t->fib_entry_index, FIB_FORW_CHAIN_TYPE_UNICAST_IP4, &dpo); + dpo_stack_from_node + (vxlan4_encap_node.index, &t->next_dpo, &dpo); + } else { + fib_entry_contribute_forwarding + (t->fib_entry_index, FIB_FORW_CHAIN_TYPE_UNICAST_IP6, &dpo); + dpo_stack_from_node + (vxlan6_encap_node.index, &t->next_dpo, &dpo); + } + dpo_reset(&dpo); + + return (FIB_NODE_BACK_WALK_CONTINUE); +} + +/** + * Function definition to get a FIB node from its index + */ +static fib_node_t* +vxlan_tunnel_fib_node_get (fib_node_index_t index) +{ + vxlan_tunnel_t * t; + vxlan_main_t * vxm = &vxlan_main; + + t = pool_elt_at_index(vxm->tunnels, index); + + return (&t->node); +} + +/** + * Function definition to inform the FIB node that its last lock has gone. + */ +static void +vxlan_tunnel_last_lock_gone (fib_node_t *node) +{ + /* + * The VXLAN tunnel is a root of the graph. As such + * it never has children and thus is never locked. + */ + ASSERT(0); +} + +/* + * Virtual function table registered by VXLAN tunnels + * for participation in the FIB object graph. + */ +const static fib_node_vft_t vxlan_vft = { + .fnv_get = vxlan_tunnel_fib_node_get, + .fnv_last_lock = vxlan_tunnel_last_lock_gone, + .fnv_back_walk = vxlan_tunnel_back_walk, +}; + + #define foreach_copy_field \ _(vni) \ -_(encap_fib_index) \ -_(decap_next_index) +_(encap_fib_index) #define foreach_copy_ipv4 { \ _(src.ip4.as_u32) \ @@ -205,8 +261,6 @@ int vnet_vxlan_add_del_tunnel vxlan_main_t * vxm = &vxlan_main; vxlan_tunnel_t *t = 0; vnet_main_t * vnm = vxm->vnet_main; - ip4_main_t * im4 = &ip4_main; - ip6_main_t * im6 = &ip6_main; vnet_hw_interface_t * hi; uword * p; u32 hw_if_index = ~0; @@ -214,8 +268,9 @@ int vnet_vxlan_add_del_tunnel int rv; vxlan4_tunnel_key_t key4; vxlan6_tunnel_key_t key6; + u32 is_ip6 = a->is_ip6; - if (!a->is_ip6) { + if (!is_ip6) { key4.src = a->dst.ip4.as_u32; /* decap src in key is encap dst in config */ key4.vni = clib_host_to_net_u32 (a->vni << 8); @@ -230,28 +285,24 @@ int vnet_vxlan_add_del_tunnel if (a->is_add) { + l2input_main_t * l2im = &l2input_main; + /* adding a tunnel: tunnel must not already exist */ if (p) return VNET_API_ERROR_TUNNEL_EXIST; - if (a->decap_next_index == ~0) - a->decap_next_index = VXLAN_INPUT_NEXT_L2_INPUT; - - if (a->decap_next_index >= VXLAN_INPUT_N_NEXT) - return VNET_API_ERROR_INVALID_DECAP_NEXT; - pool_get_aligned (vxm->tunnels, t, CLIB_CACHE_LINE_BYTES); memset (t, 0, sizeof (*t)); /* copy from arg structure */ #define _(x) t->x = a->x; foreach_copy_field; - if (!a->is_ip6) foreach_copy_ipv4 - else foreach_copy_ipv6 + if (!is_ip6) foreach_copy_ipv4 + else foreach_copy_ipv6 #undef _ /* copy the key */ - if (a->is_ip6) + if (is_ip6) { t->key6 = clib_mem_alloc (sizeof(vxlan6_tunnel_key_t)); clib_memcpy (t->key6, &key6, sizeof(key6)); @@ -261,9 +312,7 @@ int vnet_vxlan_add_del_tunnel t->key4 = 0; /* not yet used */ } - if (!a->is_ip6) t->flags |= VXLAN_TUNNEL_IS_IPV4; - - if (!a->is_ip6) { + if (!is_ip6) { rv = vxlan4_rewrite (t); } else { rv = vxlan6_rewrite (t); @@ -275,7 +324,7 @@ int vnet_vxlan_add_del_tunnel return rv; } - if (!a->is_ip6) + if (!is_ip6) hash_set (vxm->vxlan4_tunnel_by_key, key4.as_u64, t - vxm->tunnels); else hash_set_mem (vxm->vxlan6_tunnel_by_key, t->key6, t - vxm->tunnels); @@ -308,7 +357,6 @@ int vnet_vxlan_add_del_tunnel (vnm, vxlan_device_class.index, t - vxm->tunnels, vxlan_hw_class.index, t - vxm->tunnels); hi = vnet_get_hw_interface (vnm, hw_if_index); - hi->output_node_index = vxlan_encap_node.index; } t->hw_if_index = hw_if_index; @@ -317,26 +365,73 @@ int vnet_vxlan_add_del_tunnel vec_validate_init_empty (vxm->tunnel_index_by_sw_if_index, sw_if_index, ~0); vxm->tunnel_index_by_sw_if_index[sw_if_index] = t - vxm->tunnels; - if (a->decap_next_index == VXLAN_INPUT_NEXT_L2_INPUT) - { - l2input_main_t * l2im = &l2input_main; - /* setup l2 input config with l2 feature and bd 0 to drop packet */ - vec_validate (l2im->configs, sw_if_index); - l2im->configs[sw_if_index].feature_bitmap = L2INPUT_FEAT_DROP; - l2im->configs[sw_if_index].bd_index = 0; - } + /* setup l2 input config with l2 feature and bd 0 to drop packet */ + vec_validate (l2im->configs, sw_if_index); + l2im->configs[sw_if_index].feature_bitmap = L2INPUT_FEAT_DROP; + l2im->configs[sw_if_index].bd_index = 0; vnet_sw_interface_set_flags (vnm, sw_if_index, VNET_SW_INTERFACE_FLAG_ADMIN_UP); - if (!a->is_ip6) { - vec_validate (im4->fib_index_by_sw_if_index, sw_if_index); - im4->fib_index_by_sw_if_index[sw_if_index] = t->encap_fib_index; - ip4_sw_interface_enable_disable(sw_if_index, 1); - } else { - vec_validate (im6->fib_index_by_sw_if_index, sw_if_index); - im6->fib_index_by_sw_if_index[sw_if_index] = t->encap_fib_index; - ip6_sw_interface_enable_disable(sw_if_index, 1); - } + /* + * source the FIB entry for the tunnel's destination + * and become a child thereof. The tunnel will then get poked + * when the forwarding for the entry updates, and the tunnel can + * re-stack accordingly + */ + fib_node_init(&t->node, FIB_NODE_TYPE_VXLAN_TUNNEL); + if (!is_ip6) + { + dpo_id_t dpo = DPO_INVALID; + const fib_prefix_t tun_dst_pfx = + { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = + { + .ip4 = t->dst.ip4, + } + }; + + t->fib_entry_index = fib_table_entry_special_add + (t->encap_fib_index, &tun_dst_pfx, + FIB_SOURCE_RR, FIB_ENTRY_FLAG_NONE, ADJ_INDEX_INVALID); + t->sibling_index = fib_entry_child_add + (t->fib_entry_index, FIB_NODE_TYPE_VXLAN_TUNNEL, t - vxm->tunnels); + fib_entry_contribute_forwarding + (t->fib_entry_index, FIB_FORW_CHAIN_TYPE_UNICAST_IP4, &dpo); + dpo_stack_from_node (vxlan4_encap_node.index, &t->next_dpo, &dpo); + dpo_reset(&dpo); + + /* Set vxlan tunnel output node to ip4 version */ + hi->output_node_index = vxlan4_encap_node.index; + } + else + { + dpo_id_t dpo = DPO_INVALID; + const fib_prefix_t tun_dst_pfx = + { + .fp_len = 128, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_addr = + { + .ip6 = t->dst.ip6, + } + }; + + t->fib_entry_index = fib_table_entry_special_add + (t->encap_fib_index, &tun_dst_pfx, + FIB_SOURCE_RR, FIB_ENTRY_FLAG_NONE, ADJ_INDEX_INVALID); + t->sibling_index = fib_entry_child_add + (t->fib_entry_index, FIB_NODE_TYPE_VXLAN_TUNNEL, t - vxm->tunnels); + fib_entry_contribute_forwarding + (t->fib_entry_index, FIB_FORW_CHAIN_TYPE_UNICAST_IP6, &dpo); + dpo_stack_from_node + (vxlan6_encap_node.index, &t->next_dpo, &dpo); + dpo_reset(&dpo); + + /* Set vxlan tunnel output node to ip6 version */ + hi->output_node_index = vxlan6_encap_node.index; + } } else { @@ -353,16 +448,18 @@ int vnet_vxlan_add_del_tunnel vxm->tunnel_index_by_sw_if_index[t->sw_if_index] = ~0; - if (!a->is_ip6) + fib_entry_child_remove(t->fib_entry_index, t->sibling_index); + fib_table_entry_delete_index(t->fib_entry_index, FIB_SOURCE_RR); + fib_node_deinit(&t->node); + + if (!is_ip6) { hash_unset (vxm->vxlan4_tunnel_by_key, key4.as_u64); - ip4_sw_interface_enable_disable(t->sw_if_index, 1); } else { hash_unset_mem (vxm->vxlan6_tunnel_by_key, t->key6); clib_mem_free (t->key6); - ip6_sw_interface_enable_disable(t->sw_if_index, 1); } vec_free (t->rewrite); pool_put (vxm->tunnels, t); @@ -405,12 +502,6 @@ static uword unformat_decap_next (unformat_input_t * input, va_list * args) if (unformat (input, "l2")) *result = VXLAN_INPUT_NEXT_L2_INPUT; - else if (unformat (input, "drop")) - *result = VXLAN_INPUT_NEXT_DROP; - else if (unformat (input, "ip4")) - *result = VXLAN_INPUT_NEXT_IP4_INPUT; - else if (unformat (input, "ip6")) - *result = VXLAN_INPUT_NEXT_IP6_INPUT; else if (unformat (input, "%d", &tmp)) *result = tmp; else @@ -528,10 +619,9 @@ vxlan_add_del_tunnel_command_fn (vlib_main_t * vm, { case 0: if (is_add) - vlib_cli_output(vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main(), sw_if_index); + vlib_cli_output(vm, "%U\n", format_vnet_sw_if_index_name, + vnet_get_main(), sw_if_index); break; - case VNET_API_ERROR_INVALID_DECAP_NEXT: - return clib_error_return (0, "invalid decap-next..."); case VNET_API_ERROR_TUNNEL_EXIST: return clib_error_return (0, "tunnel already exists..."); @@ -563,7 +653,7 @@ vxlan_add_del_tunnel_command_fn (vlib_main_t * vm, * * @cliexpar * Example of how to create a VXLAN Tunnel: - * @cliexcmd{create vxlan tunnel src 10.0.3.1 dst 10.0.3.3 vni 13 encap-vrf-id 7 decap-next l2} + * @cliexcmd{create vxlan tunnel src 10.0.3.1 dst 10.0.3.3 vni 13 encap-vrf-id 7} * Example of how to delete a VXLAN Tunnel: * @cliexcmd{create vxlan tunnel src 10.0.3.1 dst 10.0.3.3 vni 13 del} ?*/ @@ -572,7 +662,7 @@ VLIB_CLI_COMMAND (create_vxlan_tunnel_command, static) = { .path = "create vxlan tunnel", .short_help = "create vxlan tunnel src dst vni " - " [encap-vrf-id ] [decap-next [l2|ip4|ip6]] [del]", + " [encap-vrf-id ]", .function = vxlan_add_del_tunnel_command_fn, }; /* *INDENT-ON* */ @@ -602,7 +692,7 @@ show_vxlan_tunnel_command_fn (vlib_main_t * vm, * @cliexpar * Example of how to display the VXLAN Tunnel entries: * @cliexstart{show vxlan tunnel} - * [0] 10.0.3.1 (src) 10.0.3.3 (dst) vni 13 encap_fib_index 1 decap_next l2 + * [0] src 10.0.3.1 dst 10.0.3.3 vni 13 encap_fib_index 0 sw_if_index 5 * @cliexend ?*/ /* *INDENT-OFF* */ @@ -630,6 +720,9 @@ clib_error_t *vxlan_init (vlib_main_t *vm) vxlan4_input_node.index, /* is_ip4 */ 1); udp_register_dst_port (vm, UDP_DST_PORT_vxlan6, vxlan6_input_node.index, /* is_ip4 */ 0); + + fib_node_register_type(FIB_NODE_TYPE_VXLAN_TUNNEL, &vxlan_vft); + return 0; } diff --git a/vnet/vnet/vxlan/vxlan.h b/vnet/vnet/vxlan/vxlan.h index 703741af60b..e37f09aac99 100644 --- a/vnet/vnet/vxlan/vxlan.h +++ b/vnet/vnet/vxlan/vxlan.h @@ -27,6 +27,7 @@ #include #include #include +#include typedef CLIB_PACKED (struct { ip4_header_t ip4; /* 20 bytes */ @@ -67,40 +68,49 @@ typedef struct { /* Rewrite string. $$$$ embed vnet_rewrite header */ u8 * rewrite; - /* tunnel src and dst addresses */ - ip46_address_t src; - ip46_address_t dst; + /* FIB DPO for IP forwarding of VXLAN encap packet */ + dpo_id_t next_dpo; + + /* storage for the hash key */ + union { + vxlan4_tunnel_key_t *key4; /* unused for now */ + vxlan6_tunnel_key_t *key6; + }; /* vxlan VNI in HOST byte order */ u32 vni; - /* decap next index */ - u32 decap_next_index; + /* tunnel src and dst addresses */ + ip46_address_t src; + ip46_address_t dst; - /* L3 FIB index and L2 BD ID */ - u16 encap_fib_index; /* tunnel partner IP lookup here */ + /* The FIB index for src/dst addresses */ + u32 encap_fib_index; - /* vnet intfc hw/sw_if_index */ - u16 hw_if_index; + /* vnet intfc index */ u32 sw_if_index; + u32 hw_if_index; - union { /* storage for the hash key */ - vxlan4_tunnel_key_t *key4; - vxlan6_tunnel_key_t *key6; - }; + /** + * Linkage into the FIB object graph + */ + fib_node_t node; - /* flags */ - u32 flags; -} vxlan_tunnel_t; + /* The FIB entry sourced by the tunnel for its destination prefix */ + fib_node_index_t fib_entry_index; -/* Flags for vxlan_tunnel_t.flags */ -#define VXLAN_TUNNEL_IS_IPV4 1 + /** + * The tunnel is a child of the FIB entry for its desintion. This is + * so it receives updates when the forwarding information for that entry + * changes. + * The tunnels sibling index on the FIB entry's dependency list. + */ + u32 sibling_index; +} vxlan_tunnel_t; #define foreach_vxlan_input_next \ _(DROP, "error-drop") \ -_(L2_INPUT, "l2-input") \ -_(IP4_INPUT, "ip4-input") \ -_(IP6_INPUT, "ip6-input") +_(L2_INPUT, "l2-input") typedef enum { #define _(s,n) VXLAN_INPUT_NEXT_##s, @@ -139,7 +149,8 @@ vxlan_main_t vxlan_main; extern vlib_node_registration_t vxlan4_input_node; extern vlib_node_registration_t vxlan6_input_node; -extern vlib_node_registration_t vxlan_encap_node; +extern vlib_node_registration_t vxlan4_encap_node; +extern vlib_node_registration_t vxlan6_encap_node; u8 * format_vxlan_encap_trace (u8 * s, va_list * args); diff --git a/vnet/vnet/vxlan/vxlan_error.def b/vnet/vnet/vxlan/vxlan_error.def index 3ead986cca8..17f905950f5 100644 --- a/vnet/vnet/vxlan/vxlan_error.def +++ b/vnet/vnet/vxlan/vxlan_error.def @@ -14,3 +14,4 @@ */ vxlan_error (DECAPSULATED, "good packets decapsulated") vxlan_error (NO_SUCH_TUNNEL, "no such tunnel packets") +vxlan_error (BAD_FLAGS, "packets with bad flags field in vxlan header") diff --git a/vnet/vnet/vxlan/vxlan_packet.h b/vnet/vnet/vxlan/vxlan_packet.h index 8a9a3b80532..5f93a36fd8f 100644 --- a/vnet/vnet/vxlan/vxlan_packet.h +++ b/vnet/vnet/vxlan/vxlan_packet.h @@ -42,12 +42,14 @@ */ typedef struct { - u32 flags; + u8 flags; + u8 res1; + u8 res2; + u8 res3; u32 vni_reserved; } vxlan_header_t; -#define VXLAN_FLAGS_VALID_HOST_BYTE_ORDER (1<<27) -#define VXLAN_FLAGS_VALID_NET_BYTE_ORDER (clib_host_to_net_u32(1<<27)) +#define VXLAN_FLAGS_I 0x08 static inline u32 vnet_get_vni (vxlan_header_t * h) { @@ -60,7 +62,8 @@ static inline u32 vnet_get_vni (vxlan_header_t * h) static inline void vnet_set_vni_and_flags (vxlan_header_t * h, u32 vni) { h->vni_reserved = clib_host_to_net_u32 (vni<<8); - h->flags = VXLAN_FLAGS_VALID_NET_BYTE_ORDER; + * (u32 *) h = 0; + h->flags = VXLAN_FLAGS_I; } #endif /* __included_vxlan_packet_h__ */ -- cgit 1.2.3-korg