From 3ef822e1c3eedef8dd3cd95a000a012667641f37 Mon Sep 17 00:00:00 2001 From: John Lo Date: Tue, 7 Jun 2016 09:14:07 -0400 Subject: VPP-110: vxlan encap node should never touch a deleted tunnel Remove usage of dummy encap string for output from BD to a tunnel which has been deleted. Instead, use a node l2-outpt-del-tunnel so that if there are stale entries in the L2FIB for any deleted tunnel sw_if_index, l2-output will send packets using this entry to the l2-output-tunnel-del node which just setup the proper drop reason before sending packets to the error-drop node to drop the packet. Change-Id: I590982ee25e924ab74e2855c55c58baf29a9fad4 Signed-off-by: John Lo --- vnet/vnet/l2/l2_output.c | 124 +++++++++++++++++++++++++++++++++++++++-------- vnet/vnet/l2/l2_output.h | 20 ++++++++ vnet/vnet/vxlan/encap.c | 29 +---------- vnet/vnet/vxlan/vxlan.c | 45 ++++++----------- vnet/vnet/vxlan/vxlan.h | 8 +-- 5 files changed, 141 insertions(+), 85 deletions(-) diff --git a/vnet/vnet/l2/l2_output.c b/vnet/vnet/l2/l2_output.c index f5b22117dae..acca349475c 100644 --- a/vnet/vnet/l2/l2_output.c +++ b/vnet/vnet/l2/l2_output.c @@ -62,31 +62,12 @@ static u8 * format_l2output_trace (u8 * s, va_list * args) } -#define foreach_l2output_error \ -_(L2OUTPUT, "L2 output packets") \ -_(EFP_DROP, "L2 EFP filter pre-rewrite drops") \ -_(VTR_DROP, "L2 output tag rewrite drops") \ -_(SHG_DROP, "L2 split horizon drops") \ -_(DROP, "L2 output drops") - -typedef enum { -#define _(sym,str) L2OUTPUT_ERROR_##sym, - foreach_l2output_error -#undef _ - L2OUTPUT_N_ERROR, -} l2output_error_t; - static char * l2output_error_strings[] = { #define _(sym,string) string, foreach_l2output_error #undef _ }; -typedef enum { - L2OUTPUT_NEXT_DROP, - L2OUTPUT_N_NEXT, -} l2output_next_t; - // Return 0 if split horizon check passes, otherwise return non-zero // Packets should not be transmitted out an interface with the same // split-horizon group as the input interface, except if the shg is 0 @@ -411,9 +392,114 @@ VLIB_REGISTER_NODE (l2output_node,static) = { /* edit / add dispositions here */ .next_nodes = { [L2OUTPUT_NEXT_DROP] = "error-drop", + [L2OUTPUT_NEXT_DEL_TUNNEL] = "l2-output-del-tunnel", + }, +}; + + +#define foreach_l2output_del_tunnel_error \ +_(DROP, "L2 output to deleted tunnel") + +static char * l2output_del_tunnel_error_strings[] = { +#define _(sym,string) string, + foreach_l2output_del_tunnel_error +#undef _ +}; + +typedef enum { +#define _(sym,str) L2OUTPUT_DEL_TUNNEL_ERROR_##sym, + foreach_l2output_del_tunnel_error +#undef _ + L2OUTPUT_DEL_TUNNEL_N_ERROR, +} l2output_del_tunnel_error_t; + + +// Output node for tunnels which was in L2 BD's but were deleted. +// On deletion of any tunnel which was on a L2 BD, its entry in +// l2_output_main table next_nodes.output_node_index_vec[sw_if_index] +// MUST be set to the value of L2OUTPUT_NEXT_DEL_TUNNEL. Thus, if there +// are stale entries in the L2FIB for this tunnel sw_if_index, l2-output +// will send packets for this sw_if_index to the l2-output-tunnel-del +// node which just setup the proper drop reason before sending packets +// to the error-drop node to drop the packet. Then, stale L2FIB entries +// for delted tunnels won't cause possible packet or memory corrpution. +static vlib_node_registration_t l2output_del_tunnel_node; + +static uword +l2output_del_tunnel_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, * from, * to_next; + l2output_next_t next_index = 0; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; // number of packets to process + + while (n_left_from > 0) + { + u32 n_left_to_next; + + // get space to enqueue frame to graph node "next_index" + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + b0->error = node->errors[L2OUTPUT_DEL_TUNNEL_ERROR_DROP]; + b1->error = node->errors[L2OUTPUT_DEL_TUNNEL_ERROR_DROP]; + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + b0 = vlib_get_buffer (vm, bi0); + b0->error = node->errors[L2OUTPUT_DEL_TUNNEL_ERROR_DROP]; + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (l2output_del_tunnel_node,static) = { + .function = l2output_del_tunnel_node_fn, + .name = "l2-output-del-tunnel", + .vector_size = sizeof (u32), + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(l2output_del_tunnel_error_strings), + .error_strings = l2output_del_tunnel_error_strings, + + .n_next_nodes = 1, + + /* edit / add dispositions here */ + .next_nodes = { + [0] = "error-drop", }, }; + VLIB_NODE_FUNCTION_MULTIARCH (l2output_node, l2output_node_fn) clib_error_t *l2output_init (vlib_main_t *vm) diff --git a/vnet/vnet/l2/l2_output.h b/vnet/vnet/l2/l2_output.h index 1c7b033cb44..b525a745ba7 100644 --- a/vnet/vnet/l2/l2_output.h +++ b/vnet/vnet/l2/l2_output.h @@ -104,6 +104,26 @@ typedef enum { #undef _ } l2output_feat_masks_t; +#define foreach_l2output_error \ +_(L2OUTPUT, "L2 output packets") \ +_(EFP_DROP, "L2 EFP filter pre-rewrite drops") \ +_(VTR_DROP, "L2 output tag rewrite drops") \ +_(SHG_DROP, "L2 split horizon drops") \ +_(DROP, "L2 output drops") + +typedef enum { + L2OUTPUT_NEXT_DROP, + L2OUTPUT_NEXT_DEL_TUNNEL, + L2OUTPUT_N_NEXT, +} l2output_next_t; + +typedef enum { +#define _(sym,str) L2OUTPUT_ERROR_##sym, + foreach_l2output_error +#undef _ + L2OUTPUT_N_ERROR, +} l2output_error_t; + // Return an array of strings containing graph node names of each feature char **l2output_get_feat_names(void); diff --git a/vnet/vnet/vxlan/encap.c b/vnet/vnet/vxlan/encap.c index 387a728a9c3..e7d49b027ce 100644 --- a/vnet/vnet/vxlan/encap.c +++ b/vnet/vnet/vxlan/encap.c @@ -21,8 +21,7 @@ /* Statistics (not all errors) */ #define foreach_vxlan_encap_error \ -_(ENCAPSULATED, "good packets encapsulated") \ -_(DEL_TUNNEL, "deleted tunnel packets") +_(ENCAPSULATED, "good packets encapsulated") static char * vxlan_encap_error_strings[] = { #define _(sym,string) string, @@ -160,22 +159,6 @@ vxlan_encap (vlib_main_t * vm, if (PREDICT_FALSE(!is_ip4_0)) next0 = VXLAN_ENCAP_NEXT_IP6_LOOKUP; if (PREDICT_FALSE(!is_ip4_1)) next1 = VXLAN_ENCAP_NEXT_IP6_LOOKUP; - /* Check rewrite string and drop packet if tunnel is deleted */ - if (PREDICT_FALSE(t0->rewrite == vxlan4_dummy_rewrite || - t0->rewrite == vxlan6_dummy_rewrite)) - { - next0 = VXLAN_ENCAP_NEXT_DROP; - b0->error = node->errors[VXLAN_ENCAP_ERROR_DEL_TUNNEL]; - pkts_encapsulated --; - } /* Still go through normal encap with dummy rewrite */ - if (PREDICT_FALSE(t1->rewrite == vxlan4_dummy_rewrite || - t1->rewrite == vxlan6_dummy_rewrite)) - { - next1 = VXLAN_ENCAP_NEXT_DROP; - b1->error = node->errors[VXLAN_ENCAP_ERROR_DEL_TUNNEL]; - pkts_encapsulated --; - } /* Still go through normal encap with dummy rewrite */ - /* IP4 VXLAN header sizeof(ip4_vxlan_header_t) should be 36 octects */ /* IP6 VXLAN header sizeof(ip6_vxlan_header_t) should be 56 octects */ if (PREDICT_TRUE(is_ip4_0)) @@ -418,16 +401,6 @@ vxlan_encap (vlib_main_t * vm, if (PREDICT_FALSE(!is_ip4_0)) next0 = VXLAN_ENCAP_NEXT_IP6_LOOKUP; - /* Check rewrite string and drop packet if tunnel is deleted */ - if (PREDICT_FALSE(t0->rewrite == vxlan4_dummy_rewrite || - t0->rewrite == vxlan6_dummy_rewrite)) - { - next0 = VXLAN_ENCAP_NEXT_DROP; - b0->error = node->errors[VXLAN_ENCAP_ERROR_DEL_TUNNEL]; - pkts_encapsulated --; - } /* Still go through normal encap with dummy rewrite */ - - /* IP4 VXLAN header sizeof(ip4_vxlan_header_t) should be 36 octets */ /* IP6 VXLAN header sizeof(ip4_vxlan_header_t) should be 56 octets */ if (PREDICT_TRUE(is_ip4_0)) diff --git a/vnet/vnet/vxlan/vxlan.c b/vnet/vnet/vxlan/vxlan.c index da893d5ddbb..133fc6f450c 100644 --- a/vnet/vnet/vxlan/vxlan.c +++ b/vnet/vnet/vxlan/vxlan.c @@ -207,6 +207,7 @@ int vnet_vxlan_add_del_tunnel int rv; vxlan4_tunnel_key_t key4; vxlan6_tunnel_key_t key6; + l2output_main_t * l2om = &l2output_main; if (!a->is_ip6) { key4.src = a->dst.ip4.as_u32; /* decap src in key is encap dst in config */ @@ -318,6 +319,15 @@ int vnet_vxlan_add_del_tunnel l2im->configs[sw_if_index].feature_bitmap = L2INPUT_FEAT_DROP; l2im->configs[sw_if_index].bd_index = 0; } + + /* + * Directs the l2 output path to work out the interface + * output next-arc itself. Needed when recycling a tunnel. + */ + vec_validate_init_empty(l2om->next_nodes.output_node_index_vec, + sw_if_index, ~0); + l2om->next_nodes.output_node_index_vec[t->sw_if_index] + = ~0; vnet_sw_interface_set_flags (vnm, sw_if_index, VNET_SW_INTERFACE_FLAG_ADMIN_UP); if (!a->is_ip6) { @@ -343,24 +353,16 @@ int vnet_vxlan_add_del_tunnel vxm->tunnel_index_by_sw_if_index[t->sw_if_index] = ~0; + /* Directs the l2 path to turf packets sent to this sw_if_index */ + l2om->next_nodes.output_node_index_vec[t->sw_if_index] + = L2OUTPUT_NEXT_DEL_TUNNEL; + if (!a->is_ip6) hash_unset (vxm->vxlan4_tunnel_by_key, key4.as_u64); else hash_unset_mem (vxm->vxlan6_tunnel_by_key, t->key6); vec_free (t->rewrite); - if (!a->is_ip6) - { - t->rewrite = vxlan4_dummy_rewrite; - t->key4 = 0; - } - else - { - t->rewrite = vxlan6_dummy_rewrite; - clib_mem_free (t->key6); - t->key6 = 0; - } - pool_put (vxm->tunnels, t); } @@ -579,10 +581,6 @@ VLIB_CLI_COMMAND (show_vxlan_tunnel_command, static) = { clib_error_t *vxlan_init (vlib_main_t *vm) { vxlan_main_t * vxm = &vxlan_main; - ip4_vxlan_header_t * hdr4; - ip4_header_t * ip4; - ip6_vxlan_header_t * hdr6; - ip6_header_t * ip6; vxm->vnet_main = vnet_get_main(); vxm->vlib_main = vm; @@ -592,21 +590,6 @@ clib_error_t *vxlan_init (vlib_main_t *vm) sizeof(vxlan6_tunnel_key_t), sizeof(uword)); - /* init dummy rewrite string for deleted vxlan tunnels */ - _vec_len(vxlan4_dummy_rewrite) = sizeof(ip4_vxlan_header_t); - hdr4 = (ip4_vxlan_header_t *) vxlan4_dummy_rewrite; - ip4 = &hdr4->ip4; - /* minimal rewrite setup, see vxlan_rewite() above as reference */ - ip4->ip_version_and_header_length = 0x45; - ip4->checksum = ip4_header_checksum (ip4); - - /* Same again for IPv6 */ - _vec_len(vxlan6_dummy_rewrite) = sizeof(ip6_vxlan_header_t); - hdr6 = (ip6_vxlan_header_t *) vxlan6_dummy_rewrite; - ip6 = &hdr6->ip6; - /* minimal rewrite setup, see vxlan_rewite() above as reference */ - ip6->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32(6 << 28); - udp_register_dst_port (vm, UDP_DST_PORT_vxlan, vxlan4_input_node.index, /* is_ip4 */ 1); udp_register_dst_port (vm, UDP_DST_PORT_vxlan6, diff --git a/vnet/vnet/vxlan/vxlan.h b/vnet/vnet/vxlan/vxlan.h index 1c70c75793a..703741af60b 100644 --- a/vnet/vnet/vxlan/vxlan.h +++ b/vnet/vnet/vxlan/vxlan.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -126,13 +127,6 @@ typedef struct { /* Free vlib hw_if_indices */ u32 * free_vxlan_tunnel_hw_if_indices; - /* Dummy rewrite for deleted vxlan_tunnels with hw_if_indices as above */ - u64 dummy4_str [sizeof(ip4_vxlan_header_t)/sizeof(u64) + 2]; -#define vxlan4_dummy_rewrite ((u8 *) &vxlan_main.dummy4_str[1]) - - u64 dummy6_str [sizeof(ip6_vxlan_header_t)/sizeof(u64) + 2]; -#define vxlan6_dummy_rewrite ((u8 *) &vxlan_main.dummy6_str[1]) - /* Mapping from sw_if_index to tunnel index */ u32 * tunnel_index_by_sw_if_index; -- cgit 1.2.3-korg