diff options
Diffstat (limited to 'src/vnet/ip')
73 files changed, 4081 insertions, 2675 deletions
diff --git a/src/vnet/ip/icmp4.c b/src/vnet/ip/icmp4.c index 5f9ffa3b2b7..fa4a0e12276 100644 --- a/src/vnet/ip/icmp4.c +++ b/src/vnet/ip/icmp4.c @@ -41,12 +41,10 @@ #include <vnet/ip/ip.h> #include <vnet/pg/pg.h> #include <vnet/ip/ip_sas.h> +#include <vnet/util/throttle.h> -static char *icmp_error_strings[] = { -#define _(f,s) s, - foreach_icmp4_error -#undef _ -}; +/** ICMP throttling */ +static throttle_t icmp_throttle; static u8 * format_ip4_icmp_type_and_code (u8 * s, va_list * args) @@ -206,7 +204,6 @@ ip4_icmp_input (vlib_main_t * vm, return frame->n_vectors; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_icmp_input_node) = { .function = ip4_icmp_input, .name = "ip4-icmp-input", @@ -215,15 +212,14 @@ VLIB_REGISTER_NODE (ip4_icmp_input_node) = { .format_trace = format_icmp_input_trace, - .n_errors = ARRAY_LEN (icmp_error_strings), - .error_strings = icmp_error_strings, + .n_errors = ICMP4_N_ERROR, + .error_counters = icmp4_error_counters, .n_next_nodes = 1, .next_nodes = { [ICMP_INPUT_NEXT_ERROR] = "ip4-punt", }, }; -/* *INDENT-ON* */ typedef enum { @@ -255,11 +251,14 @@ ip4_icmp_error (vlib_main_t * vm, u32 *from, *to_next; uword n_left_from, n_left_to_next; ip4_icmp_error_next_t next_index; + u32 thread_index = vm->thread_index; from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; next_index = node->cached_next_index; + u64 seed = throttle_seed (&icmp_throttle, thread_index, vlib_time_now (vm)); + if (node->flags & VLIB_NODE_FLAG_TRACE) vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors, /* stride */ 1, @@ -289,6 +288,21 @@ ip4_icmp_error (vlib_main_t * vm, ip_csum_t sum; org_p0 = vlib_get_buffer (vm, org_pi0); + ip0 = vlib_buffer_get_current (org_p0); + + /* Rate limit based on the src,dst addresses in the original packet + */ + u64 r0 = + (u64) ip0->dst_address.as_u32 << 32 | ip0->src_address.as_u32; + + if (throttle_check (&icmp_throttle, thread_index, r0, seed)) + { + vlib_error_count (vm, node->node_index, ICMP4_ERROR_DROP, 1); + from += 1; + n_left_from -= 1; + continue; + } + p0 = vlib_buffer_copy_no_chain (vm, org_p0, &pi0); if (!p0 || pi0 == ~0) /* Out of buffers */ continue; @@ -300,14 +314,16 @@ ip4_icmp_error (vlib_main_t * vm, n_left_from -= 1; n_left_to_next -= 1; - ip0 = vlib_buffer_get_current (p0); sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX]; + vlib_buffer_copy_trace_flag (vm, org_p0, pi0); + /* Add IP header and ICMPv4 header including a 4 byte data field */ vlib_buffer_advance (p0, -sizeof (ip4_header_t) - sizeof (icmp46_header_t) - 4); + p0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; p0->current_length = p0->current_length > 576 ? 576 : p0->current_length; out_ip0 = vlib_buffer_get_current (p0); @@ -325,7 +341,7 @@ ip4_icmp_error (vlib_main_t * vm, /* Prefer a source address from "offending interface" */ if (!ip4_sas_by_sw_if_index (sw_if_index0, &out_ip0->dst_address, &out_ip0->src_address)) - { /* interface has no IP6 address - should not happen */ + { /* interface has no IP4 address - should not happen */ next0 = IP4_ICMP_ERROR_NEXT_DROP; error0 = ICMP4_ERROR_DROP; } @@ -370,14 +386,13 @@ ip4_icmp_error (vlib_main_t * vm, return frame->n_vectors; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_icmp_error_node) = { .function = ip4_icmp_error, .name = "ip4-icmp-error", .vector_size = sizeof (u32), - .n_errors = ARRAY_LEN (icmp_error_strings), - .error_strings = icmp_error_strings, + .n_errors = ICMP4_N_ERROR, + .error_counters = icmp4_error_counters, .n_next_nodes = IP4_ICMP_ERROR_N_NEXT, .next_nodes = { @@ -387,7 +402,6 @@ VLIB_REGISTER_NODE (ip4_icmp_error_node) = { .format_trace = format_icmp_input_trace, }; -/* *INDENT-ON* */ static uword @@ -570,6 +584,11 @@ icmp4_init (vlib_main_t * vm) ICMP_INPUT_NEXT_ERROR, sizeof (cm->ip4_input_next_index_by_type)); + vlib_thread_main_t *tm = &vlib_thread_main; + u32 n_vlib_mains = tm->n_vlib_mains; + + throttle_init (&icmp_throttle, n_vlib_mains, THROTTLE_BITS, 1e-5); + return 0; } diff --git a/src/vnet/ip/icmp4.h b/src/vnet/ip/icmp4.h index e2a95673fc7..22a4fc508e5 100644 --- a/src/vnet/ip/icmp4.h +++ b/src/vnet/ip/icmp4.h @@ -15,29 +15,6 @@ #ifndef included_vnet_icmp4_h #define included_vnet_icmp4_h -#define foreach_icmp4_error \ - _ (NONE, "valid packets") \ - _ (UNKNOWN_TYPE, "unknown type") \ - _ (INVALID_CODE_FOR_TYPE, "invalid code for type") \ - _ (INVALID_HOP_LIMIT_FOR_TYPE, "hop_limit != 255") \ - _ (LENGTH_TOO_SMALL_FOR_TYPE, "payload length too small for type") \ - _ (OPTIONS_WITH_ODD_LENGTH, \ - "total option length not multiple of 8 bytes") \ - _ (OPTION_WITH_ZERO_LENGTH, "option has zero length") \ - _ (ECHO_REPLIES_SENT, "echo replies sent") \ - _ (DST_LOOKUP_MISS, "icmp6 dst address lookup misses") \ - _ (DEST_UNREACH_SENT, "destination unreachable response sent") \ - _ (TTL_EXPIRE_SENT, "hop limit exceeded response sent") \ - _ (PARAM_PROBLEM_SENT, "parameter problem response sent") \ - _ (DROP, "error message dropped") - -typedef enum -{ -#define _(f,s) ICMP4_ERROR_##f, - foreach_icmp4_error -#undef _ -} icmp4_error_t; - typedef struct { u8 packet_data[64]; diff --git a/src/vnet/ip/icmp46_packet.h b/src/vnet/ip/icmp46_packet.h index 0545046fe60..08e73f6cd7d 100644 --- a/src/vnet/ip/icmp46_packet.h +++ b/src/vnet/ip/icmp46_packet.h @@ -187,7 +187,6 @@ typedef enum #undef _ } icmp6_code_t; -/* *INDENT-OFF* */ typedef CLIB_PACKED (struct { u8 type; @@ -195,7 +194,6 @@ typedef CLIB_PACKED (struct /* IP checksum of icmp header plus data which follows. */ u16 checksum; }) icmp46_header_t; -/* *INDENT-ON* */ /* ip6 neighbor discovery */ #define foreach_icmp6_neighbor_discovery_option \ @@ -238,7 +236,6 @@ typedef enum icmp6_neighbor_discovery_option_type #undef _ } icmp6_neighbor_discovery_option_type_t; -/* *INDENT-OFF* */ typedef CLIB_PACKED (struct { /* Option type. */ @@ -357,6 +354,5 @@ typedef CLIB_PACKED (struct icmp6_neighbor_discovery_ethernet_link_layer_address_option_t link_layer_option; }) icmp6_neighbor_solicitation_header_t; -/* *INDENT-ON* */ #endif /* included_vnet_icmp46_packet_h */ diff --git a/src/vnet/ip/icmp6.c b/src/vnet/ip/icmp6.c index b6ed3ea0ec9..b095f679cc8 100644 --- a/src/vnet/ip/icmp6.c +++ b/src/vnet/ip/icmp6.c @@ -41,6 +41,10 @@ #include <vnet/ip/ip.h> #include <vnet/pg/pg.h> #include <vnet/ip/ip_sas.h> +#include <vnet/util/throttle.h> + +/** ICMP throttling */ +static throttle_t icmp_throttle; static u8 * format_ip6_icmp_type_and_code (u8 * s, va_list * args) @@ -123,12 +127,6 @@ format_icmp6_input_trace (u8 * s, va_list * va) return s; } -static char *icmp_error_strings[] = { -#define _(f,s) s, - foreach_icmp6_error -#undef _ -}; - typedef enum { ICMP_INPUT_NEXT_PUNT, @@ -237,7 +235,6 @@ ip6_icmp_input (vlib_main_t * vm, return frame->n_vectors; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_icmp_input_node) = { .function = ip6_icmp_input, .name = "ip6-icmp-input", @@ -246,195 +243,14 @@ VLIB_REGISTER_NODE (ip6_icmp_input_node) = { .format_trace = format_icmp6_input_trace, - .n_errors = ARRAY_LEN (icmp_error_strings), - .error_strings = icmp_error_strings, + .n_errors = ICMP6_N_ERROR, + .error_counters = icmp6_error_counters, .n_next_nodes = 1, .next_nodes = { [ICMP_INPUT_NEXT_PUNT] = "ip6-punt", }, }; -/* *INDENT-ON* */ - -typedef enum -{ - ICMP6_ECHO_REQUEST_NEXT_LOOKUP, - ICMP6_ECHO_REQUEST_NEXT_OUTPUT, - ICMP6_ECHO_REQUEST_N_NEXT, -} icmp6_echo_request_next_t; - -static uword -ip6_icmp_echo_request (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) -{ - u32 *from, *to_next; - u32 n_left_from, n_left_to_next, next_index; - ip6_main_t *im = &ip6_main; - - from = vlib_frame_vector_args (frame); - n_left_from = frame->n_vectors; - next_index = node->cached_next_index; - - if (node->flags & VLIB_NODE_FLAG_TRACE) - vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors, - /* stride */ 1, - sizeof (icmp6_input_trace_t)); - - while (n_left_from > 0) - { - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (n_left_from > 2 && n_left_to_next > 2) - { - vlib_buffer_t *p0, *p1; - ip6_header_t *ip0, *ip1; - icmp46_header_t *icmp0, *icmp1; - ip6_address_t tmp0, tmp1; - ip_csum_t sum0, sum1; - u32 bi0, bi1; - u32 fib_index0, fib_index1; - u32 next0 = ICMP6_ECHO_REQUEST_NEXT_LOOKUP; - u32 next1 = ICMP6_ECHO_REQUEST_NEXT_LOOKUP; - - bi0 = to_next[0] = from[0]; - bi1 = to_next[1] = from[1]; - - from += 2; - n_left_from -= 2; - to_next += 2; - n_left_to_next -= 2; - - p0 = vlib_get_buffer (vm, bi0); - p1 = vlib_get_buffer (vm, bi1); - ip0 = vlib_buffer_get_current (p0); - ip1 = vlib_buffer_get_current (p1); - icmp0 = ip6_next_header (ip0); - icmp1 = ip6_next_header (ip1); - - /* Check icmp type to echo reply and update icmp checksum. */ - sum0 = icmp0->checksum; - sum1 = icmp1->checksum; - - ASSERT (icmp0->type == ICMP6_echo_request); - ASSERT (icmp1->type == ICMP6_echo_request); - sum0 = ip_csum_update (sum0, ICMP6_echo_request, ICMP6_echo_reply, - icmp46_header_t, type); - sum1 = ip_csum_update (sum1, ICMP6_echo_request, ICMP6_echo_reply, - icmp46_header_t, type); - - icmp0->checksum = ip_csum_fold (sum0); - icmp1->checksum = ip_csum_fold (sum1); - - icmp0->type = ICMP6_echo_reply; - icmp1->type = ICMP6_echo_reply; - - /* Swap source and destination address. */ - tmp0 = ip0->src_address; - tmp1 = ip1->src_address; - - ip0->src_address = ip0->dst_address; - ip1->src_address = ip1->dst_address; - - ip0->dst_address = tmp0; - ip1->dst_address = tmp1; - - /* New hop count. */ - ip0->hop_limit = im->host_config.ttl; - ip1->hop_limit = im->host_config.ttl; - - /* Determine the correct lookup fib indices... */ - fib_index0 = vec_elt (im->fib_index_by_sw_if_index, - vnet_buffer (p0)->sw_if_index[VLIB_RX]); - vnet_buffer (p0)->sw_if_index[VLIB_TX] = fib_index0; - /* Determine the correct lookup fib indices... */ - fib_index1 = vec_elt (im->fib_index_by_sw_if_index, - vnet_buffer (p1)->sw_if_index[VLIB_RX]); - vnet_buffer (p1)->sw_if_index[VLIB_TX] = fib_index1; - - /* verify speculative enqueues, maybe switch current next frame */ - /* if next0==next1==next_index then nothing special needs to be done */ - vlib_validate_buffer_enqueue_x2 (vm, node, next_index, - to_next, n_left_to_next, - bi0, bi1, next0, next1); - } - - while (n_left_from > 0 && n_left_to_next > 0) - { - vlib_buffer_t *p0; - ip6_header_t *ip0; - icmp46_header_t *icmp0; - u32 bi0; - ip6_address_t tmp0; - ip_csum_t sum0; - u32 fib_index0; - u32 next0 = ICMP6_ECHO_REQUEST_NEXT_LOOKUP; - - bi0 = to_next[0] = from[0]; - - from += 1; - n_left_from -= 1; - to_next += 1; - n_left_to_next -= 1; - - p0 = vlib_get_buffer (vm, bi0); - ip0 = vlib_buffer_get_current (p0); - icmp0 = ip6_next_header (ip0); - - /* Check icmp type to echo reply and update icmp checksum. */ - sum0 = icmp0->checksum; - - ASSERT (icmp0->type == ICMP6_echo_request); - sum0 = ip_csum_update (sum0, ICMP6_echo_request, ICMP6_echo_reply, - icmp46_header_t, type); - - icmp0->checksum = ip_csum_fold (sum0); - - icmp0->type = ICMP6_echo_reply; - - /* Swap source and destination address. */ - tmp0 = ip0->src_address; - ip0->src_address = ip0->dst_address; - ip0->dst_address = tmp0; - - ip0->hop_limit = im->host_config.ttl; - - /* if the packet is link local, we'll bounce through the link-local - * table with the RX interface correctly set */ - fib_index0 = vec_elt (im->fib_index_by_sw_if_index, - vnet_buffer (p0)->sw_if_index[VLIB_RX]); - vnet_buffer (p0)->sw_if_index[VLIB_TX] = fib_index0; - - /* Verify speculative enqueue, maybe switch current next frame */ - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi0, next0); - } - - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - - vlib_error_count (vm, ip6_icmp_input_node.index, - ICMP6_ERROR_ECHO_REPLIES_SENT, frame->n_vectors); - - return frame->n_vectors; -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (ip6_icmp_echo_request_node,static) = { - .function = ip6_icmp_echo_request, - .name = "ip6-icmp-echo-request", - - .vector_size = sizeof (u32), - - .format_trace = format_icmp6_input_trace, - - .n_next_nodes = ICMP6_ECHO_REQUEST_N_NEXT, - .next_nodes = { - [ICMP6_ECHO_REQUEST_NEXT_LOOKUP] = "ip6-lookup", - [ICMP6_ECHO_REQUEST_NEXT_OUTPUT] = "interface-output", - }, -}; -/* *INDENT-ON* */ typedef enum { @@ -476,11 +292,14 @@ ip6_icmp_error (vlib_main_t * vm, u32 *from, *to_next; uword n_left_from, n_left_to_next; ip6_icmp_error_next_t next_index; + u32 thread_index = vm->thread_index; from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; next_index = node->cached_next_index; + u64 seed = throttle_seed (&icmp_throttle, thread_index, vlib_time_now (vm)); + if (node->flags & VLIB_NODE_FLAG_TRACE) vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors, /* stride */ 1, @@ -510,6 +329,21 @@ ip6_icmp_error (vlib_main_t * vm, int bogus_length; org_p0 = vlib_get_buffer (vm, org_pi0); + ip0 = vlib_buffer_get_current (org_p0); + + /* Rate limit based on the src,dst addresses in the original packet + */ + u64 r0 = (ip6_address_hash_to_u64 (&ip0->dst_address) ^ + ip6_address_hash_to_u64 (&ip0->src_address)); + + if (throttle_check (&icmp_throttle, thread_index, r0, seed)) + { + vlib_error_count (vm, node->node_index, ICMP4_ERROR_DROP, 1); + from += 1; + n_left_from -= 1; + continue; + } + p0 = vlib_buffer_copy_no_chain (vm, org_p0, &pi0); if (!p0 || pi0 == ~0) /* Out of buffers */ continue; @@ -521,15 +355,15 @@ ip6_icmp_error (vlib_main_t * vm, n_left_from -= 1; n_left_to_next -= 1; - ip0 = vlib_buffer_get_current (p0); sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX]; + vlib_buffer_copy_trace_flag (vm, org_p0, pi0); + /* Add IP header and ICMPv6 header including a 4 byte data field */ vlib_buffer_advance (p0, -(sizeof (ip6_header_t) + sizeof (icmp46_header_t) + 4)); - vnet_buffer (p0)->sw_if_index[VLIB_TX] = ~0; p0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; p0->current_length = p0->current_length > 1280 ? 1280 : p0->current_length; @@ -590,14 +424,13 @@ ip6_icmp_error (vlib_main_t * vm, return frame->n_vectors; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_icmp_error_node) = { .function = ip6_icmp_error, .name = "ip6-icmp-error", .vector_size = sizeof (u32), - .n_errors = ARRAY_LEN (icmp_error_strings), - .error_strings = icmp_error_strings, + .n_errors = ICMP6_N_ERROR, + .error_counters = icmp6_error_counters, .n_next_nodes = IP6_ICMP_ERROR_N_NEXT, .next_nodes = { @@ -607,7 +440,6 @@ VLIB_REGISTER_NODE (ip6_icmp_error_node) = { .format_trace = format_icmp6_input_trace, }; -/* *INDENT-ON* */ static uword @@ -804,8 +636,10 @@ icmp6_init (vlib_main_t * vm) cm->min_valid_length_by_type[ICMP6_redirect] = sizeof (icmp6_redirect_header_t); - icmp6_register_type (vm, ICMP6_echo_request, - ip6_icmp_echo_request_node.index); + vlib_thread_main_t *tm = &vlib_thread_main; + u32 n_vlib_mains = tm->n_vlib_mains; + + throttle_init (&icmp_throttle, n_vlib_mains, THROTTLE_BITS, 1e-3); return (NULL); } diff --git a/src/vnet/ip/icmp6.h b/src/vnet/ip/icmp6.h index 7a5eef5df18..119aaf0bae9 100644 --- a/src/vnet/ip/icmp6.h +++ b/src/vnet/ip/icmp6.h @@ -17,48 +17,6 @@ #include <vnet/ip/icmp46_packet.h> -#define foreach_icmp6_error \ - _ (NONE, "valid packets") \ - _ (UNKNOWN_TYPE, "unknown type") \ - _ (INVALID_CODE_FOR_TYPE, "invalid code for type") \ - _ (INVALID_HOP_LIMIT_FOR_TYPE, "hop_limit != 255") \ - _ (LENGTH_TOO_SMALL_FOR_TYPE, "payload length too small for type") \ - _ (OPTIONS_WITH_ODD_LENGTH, \ - "total option length not multiple of 8 bytes") \ - _ (OPTION_WITH_ZERO_LENGTH, "option has zero length") \ - _ (ECHO_REPLIES_SENT, "echo replies sent") \ - _ (NEIGHBOR_SOLICITATION_SOURCE_NOT_ON_LINK, \ - "neighbor solicitations from source not on link") \ - _ (NEIGHBOR_SOLICITATION_SOURCE_UNKNOWN, \ - "neighbor solicitations for unknown targets") \ - _ (NEIGHBOR_ADVERTISEMENTS_TX, "neighbor advertisements sent") \ - _ (NEIGHBOR_ADVERTISEMENTS_RX, "neighbor advertisements received") \ - _ (ROUTER_SOLICITATION_SOURCE_NOT_ON_LINK, \ - "router solicitations from source not on link") \ - _ (ROUTER_SOLICITATION_UNSUPPORTED_INTF, \ - "neighbor discovery unsupported interface") \ - _ (ROUTER_SOLICITATION_RADV_NOT_CONFIG, \ - "neighbor discovery not configured") \ - _ (ROUTER_ADVERTISEMENT_SOURCE_NOT_LINK_LOCAL, \ - "router advertisement source not link local") \ - _ (ROUTER_ADVERTISEMENTS_TX, "router advertisements sent") \ - _ (ROUTER_ADVERTISEMENTS_RX, "router advertisements received") \ - _ (DST_LOOKUP_MISS, "icmp6 dst address lookup misses") \ - _ (DEST_UNREACH_SENT, "destination unreachable response sent") \ - _ (PACKET_TOO_BIG_SENT, "packet too big response sent") \ - _ (TTL_EXPIRE_SENT, "hop limit exceeded response sent") \ - _ (PARAM_PROBLEM_SENT, "parameter problem response sent") \ - _ (DROP, "error message dropped") \ - _ (ALLOC_FAILURE, "buffer allocation failure") - - -typedef enum -{ -#define _(f,s) ICMP6_ERROR_##f, - foreach_icmp6_error -#undef _ -} icmp6_error_t; - typedef struct { u8 packet_data[64]; diff --git a/src/vnet/ip/ip.api b/src/vnet/ip/ip.api index 28786fa5a90..967f56cf917 100644 --- a/src/vnet/ip/ip.api +++ b/src/vnet/ip/ip.api @@ -20,7 +20,7 @@ called through a shared memory interface. */ -option version = "3.1.0"; +option version = "3.2.0"; import "vnet/interface_types.api"; import "vnet/fib/fib_types.api"; @@ -57,6 +57,35 @@ autoreply define ip_table_add_del vl_api_ip_table_t table; }; +/** \brief Allocate an unused table + A table can be added multiple times. + If a large number of tables are in use (millions), this API might + fail to find a free ID with very low probability, and will return + EAGAIN. A subsequent attempt may be successful. + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param table - if table.table_id == ~0, vpp allocates an unused table_id and + proceeds as in ip_table_add_del with is_add = true + if table.table_id != ~0, vpp uses the table.table_id and + proceeds as in ip_table_add_del with is_add = true + table.table_id should never be 0 +*/ +define ip_table_allocate +{ + u32 client_index; + u32 context; + + vl_api_ip_table_t table; +}; + +define ip_table_allocate_reply +{ + u32 context; + i32 retval; + + vl_api_ip_table_t table; +}; + /** \brief Dump IP all fib tables @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -337,6 +366,41 @@ autoreply define set_ip_flow_hash_v2 vl_api_ip_flow_hash_config_t flow_hash_config; }; +/** + @brief flow hash settings for an IP table + @param src - include src in flow hash + @param dst - include dst in flow hash + @param sport - include sport in flow hash + @param dport - include dport in flow hash + @param proto - include proto in flow hash + @param reverse - include reverse in flow hash + @param symmetric - include symmetry in flow hash + @param flowlabel - include flowlabel in flow hash + @param gtpv1teid - include gtpv1teid in flow hash +*/ +enumflag ip_flow_hash_config_v2 +{ + IP_API_V2_FLOW_HASH_SRC_IP = 0x01, + IP_API_V2_FLOW_HASH_DST_IP = 0x02, + IP_API_V2_FLOW_HASH_SRC_PORT = 0x04, + IP_API_V2_FLOW_HASH_DST_PORT = 0x08, + IP_API_V2_FLOW_HASH_PROTO = 0x10, + IP_API_V2_FLOW_HASH_REVERSE = 0x20, + IP_API_V2_FLOW_HASH_SYMETRIC = 0x40, + IP_API_V2_FLOW_HASH_FLOW_LABEL = 0x80, + IP_API_V2_FLOW_HASH_GTPV1_TEID = 0x100, +}; + +autoreply define set_ip_flow_hash_v3 +{ + u32 client_index; + u32 context; + u32 table_id; + vl_api_address_family_t af; + vl_api_ip_flow_hash_config_v2_t flow_hash_config; + option status="in_progress"; +}; + /** \brief Set the ip flow hash router ID @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -558,6 +622,7 @@ typedef punt_redirect autoreply define ip_punt_redirect { option deprecated; + u32 client_index; u32 context; vl_api_punt_redirect_t punt; @@ -566,6 +631,8 @@ autoreply define ip_punt_redirect define ip_punt_redirect_dump { + option deprecated; + u32 client_index; u32 context; vl_api_interface_index_t sw_if_index; @@ -574,6 +641,8 @@ define ip_punt_redirect_dump define ip_punt_redirect_details { + option deprecated; + u32 context; vl_api_punt_redirect_t punt; }; @@ -807,6 +876,30 @@ autoreply define ip_reassembly_enable_disable vl_api_ip_reass_type_t type; }; +/** enable/disable full reassembly of packets aimed at our addresses */ +autoreply define ip_local_reass_enable_disable +{ + u32 client_index; + u32 context; + bool enable_ip4; + bool enable_ip6; +}; + +/** get status of local reassembly */ +define ip_local_reass_get +{ + u32 client_index; + u32 context; +}; + +define ip_local_reass_get_reply +{ + u32 context; + i32 retval; + bool ip4_is_enabled; + bool ip6_is_enabled; +}; + /** @brief Set a Path MTU value. i.e. a MTU value for a given neighbour. The neighbour can be described as attached (w/ interface and next-hop) @@ -864,6 +957,816 @@ autoreply define ip_path_mtu_replace_end u32 context; }; +counters ip_frag { + none { + severity info; + type counter64; + units "packets"; + description "packet fragmented"; + }; + small_packet { + severity error; + type counter64; + units "packets"; + description "packet smaller than MTU"; + }; + fragment_sent { + severity info; + type counter64; + units "packets"; + description "number of sent fragments"; + }; + cant_fragment_header { + severity error; + type counter64; + units "packets"; + description "can't fragment header"; + }; + dont_fragment_set { + severity error; + type counter64; + units "packets"; + description "can't fragment this packet"; + }; + malformed { + severity error; + type counter64; + units "packets"; + description "malformed packet"; + }; + memory { + severity error; + type counter64; + units "packets"; + description "could not allocate buffer"; + }; + unknown { + severity error; + type counter64; + units "packets"; + description "unknown error"; + }; +}; + +counters ip4 { + /* Must be first. */ + none { + severity info; + type counter64; + units "packets"; + description "valid ip4 packets"; + }; + + /* Errors signalled by ip4-input */ + too_short { + severity error; + type counter64; + units "packets"; + description "ip4 length < 20 bytes"; + }; + bad_length { + severity error; + type counter64; + units "packets"; + description "ip4 length > l2 length"; + }; + bad_checksum { + severity error; + type counter64; + units "packets"; + description "bad ip4 checksum"; + }; + version { + severity error; + type counter64; + units "packets"; + description "ip4 version != 4"; + }; + options { + severity info; + type counter64; + units "packets"; + description "ip4 options present"; + }; + fragment_offset_one { + severity error; + type counter64; + units "packets"; + description "ip4 fragment offset == 1"; + }; + time_expired { + severity error; + type counter64; + units "packets"; + description "ip4 ttl <= 1"; + }; + hdr_too_short { + severity error; + type counter64; + units "packets"; + description "ip4 IHL < 5"; + }; + + /* Errors signalled by ip4-rewrite. */ + mtu_exceeded { + severity error; + type counter64; + units "packets"; + description "ip4 MTU exceeded and DF set"; + }; + dst_lookup_miss { + severity error; + type counter64; + units "packets"; + description "ip4 destination lookup miss"; + }; + src_lookup_miss { + severity error; + type counter64; + units "packets"; + description "ip4 source lookup miss"; + }; + drop { + severity error; + type counter64; + units "packets"; + description "ip4 drop"; + }; + punt { + severity error; + type counter64; + units "packets"; + description "ip4 punt"; + }; + same_interface { + severity error; + type counter64; + units "packets"; + description "ip4 egress interface same as ingress"; + }; + + /* errors signalled by ip4-local. */ + unknown_protocol { + severity error; + type counter64; + units "packets"; + description "unknown ip protocol"; + }; + tcp_checksum { + severity error; + type counter64; + units "packets"; + description "bad tcp checksum"; + }; + udp_checksum { + severity error; + type counter64; + units "packets"; + description "bad udp checksum"; + }; + udp_length { + severity error; + type counter64; + units "packets"; + description "inconsistent udp/ip lengths"; + }; + + /* spoofed packets in ip4-rewrite-local */ + spoofed_local_packets { + severity error; + type counter64; + units "packets"; + description "ip4 spoofed local-address packet drops"; + }; + + /* Errors signalled by ip4-inacl */ + inacl_table_miss { + severity error; + type counter64; + units "packets"; + description "input ACL table-miss drops"; + }; + inacl_session_deny { + severity error; + type counter64; + units "packets"; + description "input ACL session deny drops"; + }; + + /* Errors singalled by ip4-outacl */ + outacl_table_miss { + severity error; + type counter64; + units "packets"; + description "output ACL table-miss drops"; + }; + outacl_session_deny { + severity error; + type counter64; + units "packets"; + description "output ACL session deny drops"; + }; + + /* Errors from mfib-forward */ + rpf_failure { + severity error; + type counter64; + units "packets"; + description "Multicast RPF check failed"; + }; + + /* Errors signalled by ip4-reassembly */ + reass_duplicate_fragment { + severity error; + type counter64; + units "packets"; + description "duplicate/overlapping fragments"; + }; + reass_limit_reached { + severity error; + type counter64; + units "packets"; + description "drops due to concurrent reassemblies limit"; + }; + reass_fragment_chain_too_long { + severity error; + type counter64; + units "packets"; + description "fragment chain too long (drop)"; + }; + reass_no_buf { + severity error; + type counter64; + units "packets"; + description "out of buffers (drop)"; + }; + reass_malformed_packet { + severity error; + type counter64; + units "packets"; + description "malformed packets"; + }; + reass_internal_error { + severity error; + type counter64; + units "packets"; + description "drops due to internal reassembly error"; + }; + reass_timeout { + severity error; + type counter64; + units "packets"; + description "fragments dropped due to reassembly timeout"; + }; + reass_to_custom_app { + severity error; + type counter64; + units "packets"; + description "send to custom drop app"; + }; + reass_success { + severity info; + type counter64; + units "packets"; + description "successful reassemblies"; + }; + reass_fragments_reassembled { + severity info; + type counter64; + units "packets"; + description "fragments reassembled"; + }; + reass_fragments_rcvd { + severity info; + type counter64; + units "packets"; + description "fragments received"; + }; + reass_unsupp_ip_prot { + severity error; + type counter64; + units "packets"; + description "unsupported ip protocol"; + }; +}; + +/** + * IPv6 Error/info counters + */ +counters ip6 { + /* Must be first. */ + none { + severity info; + type counter64; + units "packets"; + description "valid ip6 packets"; + }; + + /* Errors signalled by ip6-input */ + too_short { + severity error; + type counter64; + units "packets"; + description "ip6 length < 40 bytes"; + }; + bad_length { + severity error; + type counter64; + units "packets"; + description "ip6 length > l2 length"; + }; + version { + severity error; + type counter64; + units "packets"; + description "ip6 version != 6"; + }; + time_expired { + severity error; + type counter64; + units "packets"; + description "ip6 ttl <= 1"; + }; + + /* Errors signalled by ip6-rewrite. */ + mtu_exceeded { + severity error; + type counter64; + units "packets"; + description "ip6 MTU exceeded"; + }; + dst_lookup_miss { + severity error; + type counter64; + units "packets"; + description "ip6 destination lookup miss"; + }; + src_lookup_miss { + severity error; + type counter64; + units "packets"; + description "ip6 source lookup miss"; + }; + drop { + severity error; + type counter64; + units "packets"; + description "ip6 drop"; + }; + punt { + severity error; + type counter64; + units "packets"; + description "ip6 punt"; + }; + + /* errors signalled by ip6-local. */ + unknown_protocol { + severity error; + type counter64; + units "packets"; + description "unknown ip protocol"; + }; + udp_checksum { + severity error; + type counter64; + units "packets"; + description "bad udp checksum"; + }; + icmp_checksum { + severity error; + type counter64; + units "packets"; + description "bad icmp checksum"; + }; + udp_length { + severity error; + type counter64; + units "packets"; + description "inconsistent udp/ip lengths"; + }; + /* Errors signalled by udp6-lookup. */ + unknown_udp_port { + severity error; + type counter64; + units "packets"; + description "no listener for udp port"; + }; + + /* spoofed packets in ip6-rewrite-local */ + spoofed_local_packets { + severity error; + type counter64; + units "packets"; + description "ip6 spoofed local-address packet drops"; + }; + + /* Errors signalled by ip6-inacl */ + inacl_table_miss { + severity error; + type counter64; + units "packets"; + description "input ACL table-miss drops"; + }; + inacl_session_deny { + severity error; + type counter64; + units "packets"; + description "input ACL session deny drops"; + }; + + /* Errors singalled by ip6-outacl */ + outacl_table_miss { + severity error; + type counter64; + units "packets"; + description "output ACL table-miss drops"; + }; + outacl_session_deny { + severity error; + type counter64; + units "packets"; + description "output ACL session deny drops"; + }; + + /* Errors from mfib-forward */ + rpf_failure { + severity error; + type counter64; + units "packets"; + description "Multicast RPF check failed"; + }; + + /* Errors signalled by ip6-reassembly */ + reass_missing_upper { + severity error; + type counter64; + units "packets"; + description "missing-upper layer drops"; + }; + reass_duplicate_fragment { + severity error; + type counter64; + units "packets"; + description "duplicate fragments"; + }; + reass_overlapping_fragment { + severity error; + type counter64; + units "packets"; + description "overlapping fragments"; + }; + reass_limit_reached { + severity error; + type counter64; + units "packets"; + description "drops due to concurrent reassemblies limit"; + }; + reass_fragment_chain_too_long { + severity error; + type counter64; + units "packets"; + description "fragment chain too long (drop)"; + }; + reass_no_buf { + severity error; + type counter64; + units "packets"; + description "out of buffers (drop)"; + }; + reass_timeout { + severity error; + type counter64; + units "packets"; + description "fragments dropped due to reassembly timeout"; + }; + reass_internal_error { + severity error; + type counter64; + units "packets"; + description "drops due to internal reassembly error"; + }; + reass_invalid_frag_len { + severity error; + type counter64; + units "packets"; + description "invalid fragment length"; + }; + reass_to_custom_app { + severity error; + type counter64; + units "packets"; + description "send to custom drop app"; + }; + reass_no_frag_hdr { + severity error; + type counter64; + units "packets"; + description "no fragmentation header"; + }; + reass_invalid_frag_size { + severity error; + type counter64; + units "packets"; + description "drop due to invalid fragment size"; + }; + reass_success { + severity info; + type counter64; + units "packets"; + description "successful reassemblies"; + }; + reass_fragments_reassembled { + severity info; + type counter64; + units "packets"; + description "fragments reassembled"; + }; + reass_fragments_rcvd { + severity info; + type counter64; + units "packets"; + description "fragments received"; + }; + reass_unsupp_ip_proto { + severity error; + type counter64; + units "packets"; + description "unsupported ip protocol"; + }; +}; + +counters icmp4 { + none { + severity info; + type counter64; + units "packets"; + description "valid packets"; + }; + unknown_type { + severity error; + type counter64; + units "packets"; + description "unknown type"; + }; + invalid_code_for_type { + severity error; + type counter64; + units "packets"; + description "invalid code for type"; + }; + invalid_hop_limit_for_type { + severity error; + type counter64; + units "packets"; + description "hop_limit != 255"; + }; + length_too_small_for_type { + severity error; + type counter64; + units "packets"; + description "payload length too small for type"; + }; + options_with_odd_length { + severity error; + type counter64; + units "packets"; + description "total option length not multiple of 8 bytes"; + }; + option_with_zero_length { + severity error; + type counter64; + units "packets"; + description "option has zero length"; + }; + echo_replies_sent { + severity info; + type counter64; + units "packets"; + description "echo replies sent"; + }; + dst_lookup_miss { + severity error; + type counter64; + units "packets"; + description "icmp6 dst address lookup misses"; + }; + dest_unreach_sent { + severity info; + type counter64; + units "packets"; + description "destination unreachable response sent"; + }; + ttl_expire_sent { + severity info; + type counter64; + units "packets"; + description "hop limit exceeded response sent"; + }; + param_problem_sent { + severity info; + type counter64; + units "packets"; + description "parameter problem response sent"; + }; + drop { + severity error; + type counter64; + units "packets"; + description "error message dropped"; + }; +}; + +counters icmp6 { + none { + severity info; + type counter64; + units "packets"; + description "valid packets"; + }; + unknown_type { + severity error; + type counter64; + units "packets"; + description "unknown type"; + }; + invalid_code_for_type { + severity error; + type counter64; + units "packets"; + description "invalid code for type"; + }; + invalid_hop_limit_for_type { + severity error; + type counter64; + units "packets"; + description "hop_limit != 255"; + }; + length_too_small_for_type { + severity error; + type counter64; + units "packets"; + description "payload length too small for type"; + }; + options_with_odd_length { + severity error; + type counter64; + units "packets"; + description "total option length not multiple of 8 bytes"; + }; + option_with_zero_length { + severity error; + type counter64; + units "packets"; + description "option has zero length"; + }; + echo_replies_sent { + severity info; + type counter64; + units "packets"; + description "echo replies sent"; + }; + neighbor_solicitation_source_not_on_link { + severity error; + type counter64; + units "packets"; + description "neighbor solicitations from source not on link"; + }; + neighbor_solicitation_source_unknown { + severity error; + type counter64; + units "packets"; + description "neighbor solicitations for unknown targets"; + }; + neighbor_advertisements_tx { + severity info; + type counter64; + units "packets"; + description "neighbor advertisements sent"; + }; + neighbor_advertisements_rx { + severity info; + type counter64; + units "packets"; + description "neighbor advertisements received"; + }; + router_solicitation_source_not_on_link { + severity error; + type counter64; + units "packets"; + description "router solicitations from source not on link"; + }; + router_solicitation_unsupported_intf { + severity error; + type counter64; + units "packets"; + description "neighbor discovery unsupported interface"; + }; + router_solicitation_radv_not_config { + severity error; + type counter64; + units "packets"; + description "neighbor discovery not configured"; + }; + router_advertisement_source_not_link_local { + severity error; + type counter64; + units "packets"; + description "router advertisement source not link local"; + }; + router_advertisements_tx { + severity info; + type counter64; + units "packets"; + description "router advertisements sent"; + }; + router_advertisements_rx { + severity info; + type counter64; + units "packets"; + description "router advertisements received"; + }; + dst_lookup_miss { + severity error; + type counter64; + units "packets"; + description "icmp6 dst address lookup misses"; + }; + dest_unreach_sent { + severity info; + type counter64; + units "packets"; + description "destination unreachable response sent"; + }; + packet_too_big_sent { + severity info; + type counter64; + units "packets"; + description "packet too big response sent"; + }; + ttl_expire_sent { + severity info; + type counter64; + units "packets"; + description "hop limit exceeded response sent"; + }; + param_problem_sent { + severity info; + type counter64; + units "packets"; + description "parameter problem response sent"; + }; + drop { + severity error; + type counter64; + units "packets"; + description "error message dropped"; + }; + alloc_failure { + severity error; + type counter64; + units "packets"; + description "buffer allocation failure"; + }; +}; + +paths { + "/err/ip-frag" "ip_frag"; + "/err/mpls-frag" "ip_frag"; + "/err/ip4-mpls-label-disposition-pipe" "ip4"; + "/err/ip4-mpls-label-disposition-uniform" "ip4"; + "/err/ip4-local" "ip4"; + "/err/ip4-input" "ip4"; + "/err/ip4-full-reassembly" "ip4"; + "/err/ip4-local-full-reassembly" "ip4"; + "/err/ip4-full-reassembly-feature" "ip4"; + "/err/ip4-full-reassembly-custom" "ip4"; + "/err/ip4-full-reassembly-expire-walk" "ip4"; + "/err/ip4-sv-reassembly" "ip4"; + "/err/ip4-sv-reassembly-feature" "ip4"; + "/err/ip4-sv-reassembly-output-feature" "ip4"; + "/err/ip4-sv-reassembly-custom-next" "ip4"; + "/err/ip4-sv-reassembly-expire-walk" "ip4"; + "/err/ip6-mpls-label-disposition-pipe" "ip6"; + "/err/ip6-mpls-label-disposition-uniform" "ip6"; + "/err/ip6-local" "ip6"; + "/err/ip6-input" "ip6"; + "/err/ip6-full-reassembly" "ip6"; + "/err/ip6-local-full-reassembly" "ip6"; + "/err/ip6-full-reassembly-feature" "ip6"; + "/err/ip6-full-reassembly-custom" "ip6"; + "/err/ip6-full-reassembly-expire-walk" "ip6"; + "/err/ip6-sv-reassembly" "ip6"; + "/err/ip6-sv-reassembly-feature" "ip6"; + "/err/ip6-sv-reassembly-output-feature" "ip6"; + "/err/ip6-sv-reassembly-custom-next" "ip6"; + "/err/ip6-sv-reassembly-expire-walk" "ip6"; + "/err/ip4-icmp-input" "icmp4"; + "/err/ip4-icmp-error" "icmp4"; + "/err/ip6-icmp-input" "icmp6"; + "/err/ip6-icmp-error" "icmp6"; +}; + /* * Local Variables: * eval: (c-set-style "gnu") diff --git a/src/vnet/ip/ip.c b/src/vnet/ip/ip.c index 5d0c7707dd3..586f7dfbc85 100644 --- a/src/vnet/ip/ip.c +++ b/src/vnet/ip/ip.c @@ -18,6 +18,20 @@ u32 ip_flow_hash_router_id; +ethernet_type_t +ip_address_family_to_ether_type (ip_address_family_t af) +{ + switch (af) + { + case AF_IP4: + return (ETHERNET_TYPE_IP4); + case AF_IP6: + return (ETHERNET_TYPE_IP6); + } + ASSERT (0); + return (ETHERNET_TYPE_IP4); +} + u8 ip_is_zero (ip46_address_t * ip46_address, u8 is_ip4) { @@ -104,7 +118,6 @@ ip_set (ip46_address_t * dst, void *src, u8 is_ip4) sizeof (ip6_address_t)); } -/* *INDENT-OFF* */ static const char *ip_arc_names[N_IP_FEATURE_LOCATIONS][N_AF][N_SAFI] = { [IP_FEATURE_INPUT] = { [AF_IP4] = { @@ -157,7 +170,6 @@ static const char *ip_arc_names[N_IP_FEATURE_LOCATIONS][N_AF][N_SAFI] = { }, }, }; -/* *INDENT-ON* */ void ip_feature_enable_disable (ip_address_family_t af, @@ -189,7 +201,8 @@ ip_feature_enable_disable (ip_address_family_t af, } int -ip_flow_hash_set (ip_address_family_t af, u32 table_id, u32 flow_hash_config) +ip_flow_hash_set (ip_address_family_t af, u32 table_id, + flow_hash_config_t flow_hash_config) { fib_protocol_t fproto; u32 fib_index; diff --git a/src/vnet/ip/ip.h b/src/vnet/ip/ip.h index 6d822d29dbe..9ebefa0cf5d 100644 --- a/src/vnet/ip/ip.h +++ b/src/vnet/ip/ip.h @@ -51,19 +51,18 @@ #include <vnet/ip/ip_packet.h> #include <vnet/ip/lookup.h> #include <vnet/ip/ip_interface.h> +#include <vnet/ip/ip.api_enum.h> #include <vnet/tcp/tcp_packet.h> #include <vnet/udp/udp_packet.h> #include <vnet/ip/icmp46_packet.h> #include <vnet/ip/ip4.h> -#include <vnet/ip/ip4_error.h> #include <vnet/ip/ip4_packet.h> #include <vnet/ip/icmp4.h> #include <vnet/ip/ip6.h> #include <vnet/ip/ip6_packet.h> -#include <vnet/ip/ip6_error.h> #include <vnet/ip/icmp6.h> /* Per protocol info. */ @@ -267,8 +266,11 @@ void ip_table_create (fib_protocol_t fproto, u32 table_id, u8 is_api, void ip_table_delete (fib_protocol_t fproto, u32 table_id, u8 is_api); -int ip_table_bind (fib_protocol_t fproto, u32 sw_if_index, - u32 table_id, u8 is_api); +void fib_table_bind (fib_protocol_t fproto, u32 sw_if_index, u32 fib_index); +void mfib_table_bind (fib_protocol_t fproto, u32 sw_if_index, u32 mfib_index); +int ip_table_bind (fib_protocol_t fproto, u32 sw_if_index, u32 table_id); + +u32 ip_table_get_unused_id (fib_protocol_t fproto); u8 ip_is_zero (ip46_address_t * ip46_address, u8 is_ip4); u8 ip_is_local_host (ip46_address_t * ip46_address, u8 is_ip4); @@ -286,6 +288,8 @@ void ip_feature_enable_disable (ip_address_family_t af, void *feature_config, u32 n_feature_config_bytes); +ethernet_type_t ip_address_family_to_ether_type (ip_address_family_t af); + always_inline u32 vlib_buffer_get_ip4_fib_index (vlib_buffer_t * b); always_inline u32 vlib_buffer_get_ip6_fib_index (vlib_buffer_t * b); always_inline u32 diff --git a/src/vnet/ip/ip4.h b/src/vnet/ip/ip4.h index dde7b7b9de9..45d07c2e0f6 100644 --- a/src/vnet/ip/ip4.h +++ b/src/vnet/ip/ip4.h @@ -169,7 +169,6 @@ typedef struct ip4_main_t /** Global ip4 main structure. */ extern ip4_main_t ip4_main; -extern char *ip4_error_strings[]; /** Global ip4 input node. Errors get attached to ip4 input node. */ extern vlib_node_registration_t ip4_input_node; @@ -212,7 +211,6 @@ ip4_interface_address_matching_destination (ip4_main_t * im, ip_interface_address_t *ia; ip4_address_t *result = 0; - /* *INDENT-OFF* */ foreach_ip_interface_address (lm, ia, sw_if_index, 1 /* honor unnumbered */, ({ @@ -223,7 +221,6 @@ ip4_interface_address_matching_destination (ip4_main_t * im, break; } })); - /* *INDENT-ON* */ if (result_ia) *result_ia = result ? ia : 0; return result; diff --git a/src/vnet/ip/ip46_address.h b/src/vnet/ip/ip46_address.h index f726178ee63..90f766464f6 100644 --- a/src/vnet/ip/ip46_address.h +++ b/src/vnet/ip/ip46_address.h @@ -34,7 +34,6 @@ typedef enum extern u8 *format_ip46_type (u8 * s, va_list * args); -/* *INDENT-OFF* */ typedef CLIB_PACKED (union ip46_address_t_ { struct { u32 pad[3]; @@ -44,7 +43,6 @@ typedef CLIB_PACKED (union ip46_address_t_ { u8 as_u8[16]; u64 as_u64[2]; }) ip46_address_t; -/* *INDENT-ON* */ format_function_t format_ip46_address; diff --git a/src/vnet/ip/ip46_cli.c b/src/vnet/ip/ip46_cli.c index f58be898d9b..e3da27914bd 100644 --- a/src/vnet/ip/ip46_cli.c +++ b/src/vnet/ip/ip46_cli.c @@ -71,12 +71,10 @@ ip6_address_compare (ip6_address_t * a1, ip6_address_t * a2) return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_interface_ip_command, static) = { .path = "set interface ip", .short_help = "IP4/IP6 commands", }; -/* *INDENT-ON* */ void ip_del_all_interface_addresses (vlib_main_t * vm, u32 sw_if_index) @@ -90,7 +88,6 @@ ip_del_all_interface_addresses (vlib_main_t * vm, u32 sw_if_index) ip_interface_address_t *ia; int i; - /* *INDENT-OFF* */ foreach_ip_interface_address (&im4->lookup_main, ia, sw_if_index, 0 /* honor unnumbered */, ({ @@ -99,9 +96,7 @@ ip_del_all_interface_addresses (vlib_main_t * vm, u32 sw_if_index) vec_add1 (ip4_addrs, x[0]); vec_add1 (ip4_masks, ia->address_length); })); - /* *INDENT-ON* */ - /* *INDENT-OFF* */ foreach_ip_interface_address (&im6->lookup_main, ia, sw_if_index, 0 /* honor unnumbered */, ({ @@ -110,7 +105,6 @@ ip_del_all_interface_addresses (vlib_main_t * vm, u32 sw_if_index) vec_add1 (ip6_addrs, x[0]); vec_add1 (ip6_masks, ia->address_length); })); - /* *INDENT-ON* */ for (i = 0; i < vec_len (ip4_addrs); i++) ip4_add_del_interface_address (vm, sw_if_index, &ip4_addrs[i], @@ -212,13 +206,11 @@ done: * @cliexcmd{set interface ip address del GigabitEthernet2/0/0 all} * @endparblock ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_interface_ip_address_command, static) = { .path = "set interface ip address", .function = add_del_ip_address, .short_help = "set interface ip address [del] <interface> <ip-addr>/<mask> | [all]", }; -/* *INDENT-ON* */ static clib_error_t * set_reassembly_command_fn (vlib_main_t * vm, @@ -294,13 +286,11 @@ set_reassembly_command_fn (vlib_main_t * vm, return NULL; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_reassembly_command, static) = { .path = "set interface reassembly", .short_help = "set interface reassembly <interface-name> [on|off|ip4|ip6]", .function = set_reassembly_command_fn, }; -/* *INDENT-ON* */ /* Dummy init function to get us linked in. */ static clib_error_t * diff --git a/src/vnet/ip/ip4_error.h b/src/vnet/ip/ip4_error.h deleted file mode 100644 index dce3dd4c1ab..00000000000 --- a/src/vnet/ip/ip4_error.h +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - * ip/ip4_error.h: ip4 fast path errors - * - * Copyright (c) 2008 Eliot Dresselhaus - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef included_ip_ip4_error_h -#define included_ip_ip4_error_h - -#define foreach_ip4_error \ - /* Must be first. */ \ - _ (NONE, "valid ip4 packets") \ - \ - /* Errors signalled by ip4-input */ \ - _ (TOO_SHORT, "ip4 length < 20 bytes") \ - _ (BAD_LENGTH, "ip4 length > l2 length") \ - _ (BAD_CHECKSUM, "bad ip4 checksum") \ - _ (VERSION, "ip4 version != 4") \ - _ (OPTIONS, "ip4 options present") \ - _ (FRAGMENT_OFFSET_ONE, "ip4 fragment offset == 1") \ - _ (TIME_EXPIRED, "ip4 ttl <= 1") \ - \ - /* Errors signalled by ip4-rewrite. */ \ - _ (MTU_EXCEEDED, "ip4 MTU exceeded and DF set") \ - _ (DST_LOOKUP_MISS, "ip4 destination lookup miss") \ - _ (SRC_LOOKUP_MISS, "ip4 source lookup miss") \ - _ (DROP, "ip4 drop") \ - _ (PUNT, "ip4 punt") \ - _ (SAME_INTERFACE, "ip4 egress interface same as ingress") \ - \ - /* Errors signalled by ip4-local. */ \ - _ (UNKNOWN_PROTOCOL, "unknown ip protocol") \ - _ (TCP_CHECKSUM, "bad tcp checksum") \ - _ (UDP_CHECKSUM, "bad udp checksum") \ - _ (UDP_LENGTH, "inconsistent udp/ip lengths") \ - \ - /* Spoofed packets in ip4-rewrite-local */ \ - _ (SPOOFED_LOCAL_PACKETS, "ip4 spoofed local-address packet drops") \ - \ - /* Errors signalled by ip4-inacl */ \ - _ (INACL_TABLE_MISS, "input ACL table-miss drops") \ - _ (INACL_SESSION_DENY, "input ACL session deny drops") \ - /* Errors singalled by ip4-outacl */ \ - _ (OUTACL_TABLE_MISS, "output ACL table-miss drops") \ - _ (OUTACL_SESSION_DENY, "output ACL session deny drops") \ - \ - /* Errors from mfib-forward */ \ - _ (RPF_FAILURE, "Multicast RPF check failed") \ - \ - /* Errors signalled by ip4-reassembly */ \ - _ (REASS_DUPLICATE_FRAGMENT, "duplicate/overlapping fragments") \ - _ (REASS_LIMIT_REACHED, "drops due to concurrent reassemblies limit") \ - _ (REASS_FRAGMENT_CHAIN_TOO_LONG, "fragment chain too long (drop)") \ - _ (REASS_NO_BUF, "out of buffers (drop)") \ - _ (REASS_MALFORMED_PACKET, "malformed packets") \ - _ (REASS_INTERNAL_ERROR, "drops due to internal reassembly error") \ - _ (REASS_UNSUPP_IP_PROT, "unsupported ip protocol") - -typedef enum -{ -#define _(sym,str) IP4_ERROR_##sym, - foreach_ip4_error -#undef _ - IP4_N_ERROR, -} ip4_error_t; - -#endif /* included_ip_ip4_error_h */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c index 5cd5e418fd6..ff74b52eb18 100644 --- a/src/vnet/ip/ip4_forward.c +++ b/src/vnet/ip/ip4_forward.c @@ -52,6 +52,7 @@ #include <vnet/mfib/ip4_mfib.h> #include <vnet/dpo/load_balance.h> #include <vnet/dpo/load_balance_map.h> +#include <vnet/dpo/receive_dpo.h> #include <vnet/dpo/classify_dpo.h> #include <vnet/mfib/mfib_table.h> /* for mFIB table and entry creation */ #include <vnet/adj/adj_dp.h> @@ -60,6 +61,7 @@ #include <vnet/ip/ip4_forward.h> #include <vnet/interface_output.h> #include <vnet/classify/vnet_classify.h> +#include <vnet/ip/reass/ip4_full_reass.h> /** @brief IPv4 lookup node. @node ip4-lookup @@ -101,7 +103,6 @@ VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node, static u8 *format_ip4_lookup_trace (u8 * s, va_list * args); -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_lookup_node) = { .name = "ip4-lookup", @@ -110,7 +111,6 @@ VLIB_REGISTER_NODE (ip4_lookup_node) = .n_next_nodes = IP_LOOKUP_N_NEXT, .next_nodes = IP4_LOOKUP_NEXT_NODES, }; -/* *INDENT-ON* */ VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -266,7 +266,6 @@ VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm, return frame->n_vectors; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_load_balance_node) = { .name = "ip4-load-balance", @@ -274,7 +273,6 @@ VLIB_REGISTER_NODE (ip4_load_balance_node) = .sibling_of = "ip4-lookup", .format_trace = format_ip4_lookup_trace, }; -/* *INDENT-ON* */ #ifndef CLIB_MARCH_VARIANT /* get first interface address */ @@ -286,7 +284,6 @@ ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index, ip_interface_address_t *ia = 0; ip4_address_t *result = 0; - /* *INDENT-OFF* */ foreach_ip_interface_address (lm, ia, sw_if_index, 1 /* honor unnumbered */ , @@ -296,7 +293,6 @@ ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index, result = a; break; })); - /* *INDENT-OFF* */ if (result_ia) *result_ia = result ? ia : 0; return result; @@ -655,7 +651,10 @@ ip4_add_del_interface_address_internal (vlib_main_t * vm, error = vnet_sw_interface_supports_addressing (vnm, sw_if_index); if (error) - return error; + { + vnm->api_errno = VNET_API_ERROR_UNSUPPORTED; + return error; + } ip4_addr_fib_init (&ip4_af, address, vec_elt (im->fib_index_by_sw_if_index, sw_if_index)); @@ -666,7 +665,6 @@ ip4_add_del_interface_address_internal (vlib_main_t * vm, * subnets on interfaces. Easy fix - disallow overlapping subnets, like * most routers do. */ - /* *INDENT-OFF* */ if (!is_del) { /* When adding an address check that it does not conflict @@ -727,7 +725,6 @@ ip4_add_del_interface_address_internal (vlib_main_t * vm, } } } - /* *INDENT-ON* */ if_address_index = ip_interface_address_find (lm, addr_fib, address_length); @@ -848,7 +845,6 @@ ip4_directed_broadcast (u32 sw_if_index, u8 enable) * when directed broadcast is enabled, the subnet braodcast route will forward * packets using an adjacency with a broadcast MAC. otherwise it drops */ - /* *INDENT-OFF* */ foreach_ip_interface_address(&im->lookup_main, ia, sw_if_index, 0, ({ @@ -872,7 +868,6 @@ ip4_directed_broadcast (u32 sw_if_index, u8 enable) &pfx, sw_if_index); } })); - /* *INDENT-ON* */ } #endif @@ -892,7 +887,6 @@ ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags) fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index); - /* *INDENT-OFF* */ foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 0 /* honor unnumbered */, ({ @@ -906,7 +900,6 @@ ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags) im, fib_index, a, ia->address_length); })); - /* *INDENT-ON* */ return 0; } @@ -914,7 +907,6 @@ ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags) VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down); /* Built-in ip4 unicast rx feature path definition */ -/* *INDENT-OFF* */ VNET_FEATURE_ARC_INIT (ip4_unicast, static) = { .arc_name = "ip4-unicast", @@ -1053,7 +1045,6 @@ VNET_FEATURE_INIT (ip4_interface_output, static) = .node_name = "interface-output", .runs_before = 0, /* not before any other features */ }; -/* *INDENT-ON* */ static clib_error_t * ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add) @@ -1078,14 +1069,21 @@ ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add) vlib_main_t *vm = vlib_get_main (); vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0); - /* *INDENT-OFF* */ foreach_ip_interface_address (lm4, ia, sw_if_index, 0, ({ address = ip_interface_address_get_address (lm4, ia); ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1); })); - /* *INDENT-ON* */ ip4_mfib_interface_enable_disable (sw_if_index, 0); + + if (0 != im4->fib_index_by_sw_if_index[sw_if_index]) + fib_table_bind (FIB_PROTOCOL_IP4, sw_if_index, 0); + if (0 != im4->mfib_index_by_sw_if_index[sw_if_index]) + mfib_table_bind (FIB_PROTOCOL_IP4, sw_if_index, 0); + + /* Erase the lookup tables just in case */ + im4->fib_index_by_sw_if_index[sw_if_index] = ~0; + im4->mfib_index_by_sw_if_index[sw_if_index] = ~0; } vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index, @@ -1192,9 +1190,11 @@ format_ip4_forward_next_trace (u8 * s, va_list * args) CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *); u32 indent = format_get_indent (s); - s = format (s, "%U%U", - format_white_space, indent, - format_ip4_header, t->packet_data, sizeof (t->packet_data)); + + s = format (s, "%Ufib:%d adj:%d flow:0x%08x", format_white_space, indent, + t->fib_index, t->dpo_index, t->flow_hash); + s = format (s, "\n%U%U", format_white_space, indent, format_ip4_header, + t->packet_data, sizeof (t->packet_data)); return s; } #endif @@ -1383,14 +1383,11 @@ ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0) } #endif -/* *INDENT-OFF* */ -VNET_FEATURE_ARC_INIT (ip4_local) = -{ - .arc_name = "ip4-local", - .start_nodes = VNET_FEATURES ("ip4-local"), +VNET_FEATURE_ARC_INIT (ip4_local) = { + .arc_name = "ip4-local", + .start_nodes = VNET_FEATURES ("ip4-local", "ip4-receive"), .last_in_arc = "ip4-local-end-of-arc", }; -/* *INDENT-ON* */ static inline void ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p, @@ -1466,10 +1463,10 @@ ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b, if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0]) || ip4_local_need_csum_check (is_tcp_udp[1], b[1]))) { - if (is_tcp_udp[0]) + if (is_tcp_udp[0] && !ip4_local_csum_is_offloaded (b[0])) ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0], &good_tcp_udp[0]); - if (is_tcp_udp[1]) + if (is_tcp_udp[1] && !ip4_local_csum_is_offloaded (b[1])) ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1], &good_tcp_udp[1]); } @@ -1495,9 +1492,8 @@ ip4_local_set_next_and_error (vlib_node_runtime_t * error_node, next_index = *next; if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL)) { - vnet_feature_arc_start (arc_index, - vnet_buffer (b)->sw_if_index[VLIB_RX], - &next_index, b); + vnet_feature_arc_start ( + arc_index, vnet_buffer (b)->ip.rx_sw_if_index, &next_index, b); *next = next_index; } } @@ -1505,15 +1501,18 @@ ip4_local_set_next_and_error (vlib_node_runtime_t * error_node, typedef struct { + /* The src and fib-index together determine if packet n is the same as n-1 */ ip4_address_t src; + u32 fib_index; u32 lbi; u8 error; u8 first; } ip4_local_last_check_t; static inline void -ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0, - ip4_local_last_check_t * last_check, u8 * error0) +ip4_local_check_src (vlib_buffer_t *b, ip4_header_t *ip0, + ip4_local_last_check_t *last_check, u8 *error0, + int is_receive_dpo) { const dpo_id_t *dpo0; load_balance_t *lb0; @@ -1523,13 +1522,23 @@ ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0, vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ? vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index; + vnet_buffer (b)->ip.rx_sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX]; + if (is_receive_dpo) + { + receive_dpo_t *rd; + rd = receive_dpo_get (vnet_buffer (b)->ip.adj_index[VLIB_TX]); + if (rd->rd_sw_if_index != ~0) + vnet_buffer (b)->ip.rx_sw_if_index = rd->rd_sw_if_index; + } + /* * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the * adjacency for the destination address (the local interface address). * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the * adjacency for the source address (the remote sender's address) */ - if (PREDICT_TRUE (last_check->src.as_u32 != ip0->src_address.as_u32) || + if (PREDICT_TRUE ((last_check->src.as_u32 != ip0->src_address.as_u32)) || + (last_check->fib_index != vnet_buffer (b)->ip.fib_index) || last_check->first) { lbi0 = ip4_fib_forwarding_lookup (vnet_buffer (b)->ip.fib_index, @@ -1565,6 +1574,7 @@ ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0, last_check->lbi = lbi0; last_check->error = *error0; last_check->first = 0; + last_check->fib_index = vnet_buffer (b)->ip.fib_index; } else { @@ -1576,8 +1586,9 @@ ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0, } static inline void -ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip, - ip4_local_last_check_t * last_check, u8 * error) +ip4_local_check_src_x2 (vlib_buffer_t **b, ip4_header_t **ip, + ip4_local_last_check_t *last_check, u8 *error, + int is_receive_dpo) { const dpo_id_t *dpo[2]; load_balance_t *lb[2]; @@ -1598,6 +1609,24 @@ ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip, vnet_buffer (b[1])->sw_if_index[VLIB_TX] : vnet_buffer (b[1])->ip.fib_index; + not_last_hit |= vnet_buffer (b[0])->ip.fib_index ^ last_check->fib_index; + not_last_hit |= vnet_buffer (b[1])->ip.fib_index ^ last_check->fib_index; + + vnet_buffer (b[0])->ip.rx_sw_if_index = + vnet_buffer (b[0])->sw_if_index[VLIB_RX]; + vnet_buffer (b[1])->ip.rx_sw_if_index = + vnet_buffer (b[1])->sw_if_index[VLIB_RX]; + if (is_receive_dpo) + { + const receive_dpo_t *rd0, *rd1; + rd0 = receive_dpo_get (vnet_buffer (b[0])->ip.adj_index[VLIB_TX]); + rd1 = receive_dpo_get (vnet_buffer (b[1])->ip.adj_index[VLIB_TX]); + if (rd0->rd_sw_if_index != ~0) + vnet_buffer (b[0])->ip.rx_sw_if_index = rd0->rd_sw_if_index; + if (rd1->rd_sw_if_index != ~0) + vnet_buffer (b[1])->ip.rx_sw_if_index = rd1->rd_sw_if_index; + } + /* * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the * adjacency for the destination address (the local interface address). @@ -1644,6 +1673,7 @@ ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip, last_check->lbi = lbi[1]; last_check->error = error[1]; last_check->first = 0; + last_check->fib_index = vnet_buffer (b[1])->ip.fib_index; } else { @@ -1694,9 +1724,9 @@ ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next) } static inline uword -ip4_local_inline (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame, int head_of_feature_arc) +ip4_local_inline (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame, int head_of_feature_arc, + int is_receive_dpo) { u32 *from, n_left_from; vlib_node_runtime_t *error_node = @@ -1713,10 +1743,11 @@ ip4_local_inline (vlib_main_t * vm, * member to make sure the .lbi is initialised for the first * packet. */ - .src = {.as_u32 = 0}, + .src = { .as_u32 = 0 }, .lbi = ~0, .error = IP4_ERROR_UNKNOWN_PROTOCOL, .first = 1, + .fib_index = 0, }; from = vlib_frame_vector_args (frame); @@ -1761,19 +1792,21 @@ ip4_local_inline (vlib_main_t * vm, if (PREDICT_TRUE (not_batch == 0)) { ip4_local_check_l4_csum_x2 (vm, b, ip, error); - ip4_local_check_src_x2 (b, ip, &last_check, error); + ip4_local_check_src_x2 (b, ip, &last_check, error, is_receive_dpo); } else { if (!pt[0]) { ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]); - ip4_local_check_src (b[0], ip[0], &last_check, &error[0]); + ip4_local_check_src (b[0], ip[0], &last_check, &error[0], + is_receive_dpo); } if (!pt[1]) { ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]); - ip4_local_check_src (b[1], ip[1], &last_check, &error[1]); + ip4_local_check_src (b[1], ip[1], &last_check, &error[1], + is_receive_dpo); } } @@ -1801,7 +1834,8 @@ ip4_local_inline (vlib_main_t * vm, goto skip_check; ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]); - ip4_local_check_src (b[0], ip[0], &last_check, &error[0]); + ip4_local_check_src (b[0], ip[0], &last_check, &error[0], + is_receive_dpo); skip_check: @@ -1820,17 +1854,17 @@ ip4_local_inline (vlib_main_t * vm, VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ ); + return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */, + 0 /* is_receive_dpo */); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_local_node) = { .name = "ip4-local", .vector_size = sizeof (u32), .format_trace = format_ip4_forward_next_trace, .n_errors = IP4_N_ERROR, - .error_strings = ip4_error_strings, + .error_counters = ip4_error_counters, .n_next_nodes = IP_LOCAL_N_NEXT, .next_nodes = { @@ -1838,20 +1872,32 @@ VLIB_REGISTER_NODE (ip4_local_node) = [IP_LOCAL_NEXT_PUNT] = "ip4-punt", [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup", [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input", - [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-full-reassembly", + [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-local-full-reassembly", }, }; -/* *INDENT-ON* */ +VLIB_NODE_FN (ip4_receive_local_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */, + 1 /* is_receive_dpo */); +} + +VLIB_REGISTER_NODE (ip4_receive_local_node) = { + .name = "ip4-receive", + .vector_size = sizeof (u32), + .format_trace = format_ip4_forward_next_trace, + .sibling_of = "ip4-local" +}; VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ ); + return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */, + 0 /* is_receive_dpo */); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = { .name = "ip4-local-end-of-arc", .vector_size = sizeof (u32), @@ -1865,7 +1911,6 @@ VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = { .node_name = "ip4-local-end-of-arc", .runs_before = 0, /* not before any other features */ }; -/* *INDENT-ON* */ #ifndef CLIB_MARCH_VARIANT void @@ -1928,14 +1973,12 @@ show_ip_local_command_fn (vlib_main_t * vm, * 47 * @cliexend ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_ip_local, static) = { .path = "show ip local", .function = show_ip_local_command_fn, .short_help = "show ip local", }; -/* *INDENT-ON* */ typedef enum { @@ -2002,7 +2045,9 @@ ip4_ttl_inc (vlib_buffer_t * b, ip4_header_t * ip) ttl += 1; ip->ttl = ttl; - ASSERT (ip4_header_checksum_is_valid (ip)); + ASSERT (ip4_header_checksum_is_valid (ip) || + (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM) || + (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM)); } /* Decrement TTL & update checksum. @@ -2180,9 +2225,6 @@ ip4_rewrite_inline (vlib_main_t *vm, vlib_node_runtime_t *node, adj0->ia_cfg_index); next[0] = next_index; - if (is_midchain) - vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ , - 0 /* is_ip6 */ ); } else { @@ -2205,9 +2247,6 @@ ip4_rewrite_inline (vlib_main_t *vm, vlib_node_runtime_t *node, &next_index, b[1], adj1->ia_cfg_index); next[1] = next_index; - if (is_midchain) - vnet_calc_checksums_inline (vm, b[1], 1 /* is_ip4 */ , - 0 /* is_ip6 */ ); } else { @@ -2357,9 +2396,6 @@ ip4_rewrite_inline (vlib_main_t *vm, vlib_node_runtime_t *node, if (is_midchain) { - vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ , - 0 /* is_ip6 */ ); - /* Guess we are only writing on ipv4 header. */ vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t)); } @@ -2463,10 +2499,6 @@ ip4_rewrite_inline (vlib_main_t *vm, vlib_node_runtime_t *node, if (is_midchain) { - /* this acts on the packet that is about to be encapped */ - vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ , - 0 /* is_ip6 */ ); - /* Guess we are only writing on ipv4 header. */ vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t)); } @@ -2593,7 +2625,6 @@ VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm, return ip4_rewrite_inline (vm, node, frame, 0, 1, 1); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_rewrite_node) = { .name = "ip4-rewrite", .vector_size = sizeof (u32), @@ -2638,7 +2669,6 @@ VLIB_REGISTER_NODE (ip4_midchain_node) = { .format_trace = format_ip4_rewrite_trace, .sibling_of = "ip4-rewrite", }; -/* *INDENT-ON */ static clib_error_t * set_ip_flow_hash_command_fn (vlib_main_t * vm, @@ -2770,15 +2800,12 @@ set_ip_flow_hash_command_fn (vlib_main_t * vm, * [0] [@0]: dpo-drop ip6 * @cliexend ?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = -{ +VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = { .path = "set ip flow-hash", - .short_help = - "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]", + .short_help = "set ip flow-hash table <table-id> [src] [dst] [sport] " + "[dport] [proto] [reverse] [gtpv1teid]", .function = set_ip_flow_hash_command_fn, }; -/* *INDENT-ON* */ #ifndef CLIB_MARCH_VARIANT int @@ -2895,7 +2922,6 @@ set_ip_classify_command_fn (vlib_main_t * vm, * Example of how to assign a classification table to an interface: * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_ip_classify_command, static) = { .path = "set ip classify", @@ -2903,7 +2929,6 @@ VLIB_CLI_COMMAND (set_ip_classify_command, static) = "set ip classify intfc <interface> table-index <classify-idx>", .function = set_ip_classify_command_fn, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/ip/ip4_inlines.h b/src/vnet/ip/ip4_inlines.h index 00a47125b8a..b4fcebc9896 100644 --- a/src/vnet/ip/ip4_inlines.h +++ b/src/vnet/ip/ip4_inlines.h @@ -42,6 +42,8 @@ #include <vnet/ip/ip_flow_hash.h> #include <vnet/ip/ip4_packet.h> +#include <vnet/tcp/tcp_packet.h> +#include <vnet/udp/udp_packet.h> #define IP_DF 0x4000 /* don't fragment */ @@ -52,9 +54,11 @@ ip4_compute_flow_hash (const ip4_header_t * ip, flow_hash_config_t flow_hash_config) { tcp_header_t *tcp = (void *) (ip + 1); + udp_header_t *udp = (void *) (ip + 1); + gtpv1u_header_t *gtpu = (void *) (udp + 1); u32 a, b, c, t1, t2; - uword is_tcp_udp = (ip->protocol == IP_PROTOCOL_TCP - || ip->protocol == IP_PROTOCOL_UDP); + uword is_udp = ip->protocol == IP_PROTOCOL_UDP; + uword is_tcp_udp = (ip->protocol == IP_PROTOCOL_TCP || is_udp); t1 = (flow_hash_config & IP_FLOW_HASH_SRC_ADDR) ? ip->src_address.data_u32 : 0; @@ -89,6 +93,13 @@ ip4_compute_flow_hash (const ip4_header_t * ip, b ^= (flow_hash_config & IP_FLOW_HASH_PROTO) ? ip->protocol : 0; c = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ? (t1 << 16) | t2 : (t2 << 16) | t1; + if (PREDICT_TRUE (is_udp) && + PREDICT_FALSE ((flow_hash_config & IP_FLOW_HASH_GTPV1_TEID) && + udp->dst_port == GTPV1_PORT_BE)) + { + t1 = gtpu->teid; + c ^= t1; + } a ^= ip_flow_hash_router_id; hash_v3_mix32 (a, b, c); @@ -98,9 +109,9 @@ ip4_compute_flow_hash (const ip4_header_t * ip, } always_inline void * -vlib_buffer_push_ip4_custom (vlib_main_t * vm, vlib_buffer_t * b, - ip4_address_t * src, ip4_address_t * dst, - int proto, u8 csum_offload, u8 is_df) +vlib_buffer_push_ip4_custom (vlib_main_t *vm, vlib_buffer_t *b, + ip4_address_t *src, ip4_address_t *dst, int proto, + u8 csum_offload, u8 is_df, u8 dscp) { ip4_header_t *ih; @@ -108,7 +119,8 @@ vlib_buffer_push_ip4_custom (vlib_main_t * vm, vlib_buffer_t * b, ih = vlib_buffer_push_uninit (b, sizeof (ip4_header_t)); ih->ip_version_and_header_length = 0x45; - ih->tos = 0; + ip4_header_set_dscp (ih, dscp); + ip4_header_set_ecn (ih, 0); ih->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b)); /* No fragments */ @@ -152,7 +164,7 @@ vlib_buffer_push_ip4 (vlib_main_t * vm, vlib_buffer_t * b, u8 csum_offload) { return vlib_buffer_push_ip4_custom (vm, b, src, dst, proto, csum_offload, - 1 /* is_df */ ); + 1 /* is_df */, 0); } #endif /* included_ip_ip4_inlines_h */ diff --git a/src/vnet/ip/ip4_input.c b/src/vnet/ip/ip4_input.c index 3b3edf9fca7..106d17da3cb 100644 --- a/src/vnet/ip/ip4_input.c +++ b/src/vnet/ip/ip4_input.c @@ -374,22 +374,13 @@ VLIB_NODE_FN (ip4_input_no_checksum_node) (vlib_main_t * vm, return ip4_input_inline (vm, node, frame, /* verify_checksum */ 0); } -#ifndef CLIB_MARCH_VARIANT -char *ip4_error_strings[] = { -#define _(sym,string) string, - foreach_ip4_error -#undef _ -}; -#endif - -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_input_node) = { .name = "ip4-input", .vector_size = sizeof (u32), .protocol_hint = VLIB_NODE_PROTO_HINT_IP4, .n_errors = IP4_N_ERROR, - .error_strings = ip4_error_strings, + .error_counters = ip4_error_counters, .n_next_nodes = IP4_INPUT_N_NEXT, .next_nodes = { @@ -399,7 +390,6 @@ VLIB_REGISTER_NODE (ip4_input_node) = { [IP4_INPUT_NEXT_LOOKUP] = "ip4-lookup", [IP4_INPUT_NEXT_LOOKUP_MULTICAST] = "ip4-mfib-forward-lookup", [IP4_INPUT_NEXT_ICMP_ERROR] = "ip4-icmp-error", - [IP4_INPUT_NEXT_REASSEMBLY] = "ip4-full-reassembly", }, .format_buffer = format_ip4_header, @@ -414,7 +404,6 @@ VLIB_REGISTER_NODE (ip4_input_no_checksum_node) = { .format_buffer = format_ip4_header, .format_trace = format_ip4_input_trace, }; -/* *INDENT-ON* */ static clib_error_t * ip4_init (vlib_main_t * vm) diff --git a/src/vnet/ip/ip4_input.h b/src/vnet/ip/ip4_input.h index 383ef31758c..d2ed13fa35f 100644 --- a/src/vnet/ip/ip4_input.h +++ b/src/vnet/ip/ip4_input.h @@ -42,6 +42,7 @@ #include <vnet/ip/ip.h> #include <vnet/ethernet/ethernet.h> +#include <vppinfra/vector/ip_csum.h> typedef enum { @@ -51,7 +52,6 @@ typedef enum IP4_INPUT_NEXT_LOOKUP, IP4_INPUT_NEXT_LOOKUP_MULTICAST, IP4_INPUT_NEXT_ICMP_ERROR, - IP4_INPUT_NEXT_REASSEMBLY, IP4_INPUT_N_NEXT, } ip4_input_next_t; @@ -60,18 +60,21 @@ check_ver_opt_csum (ip4_header_t * ip, u8 * error, int verify_checksum) { if (PREDICT_FALSE (ip->ip_version_and_header_length != 0x45)) { - if ((ip->ip_version_and_header_length & 0xf) != 5) + if ((ip->ip_version_and_header_length & 0xf0) != 0x40) + *error = IP4_ERROR_VERSION; + else if ((ip->ip_version_and_header_length & 0x0f) < 5) + *error = IP4_ERROR_HDR_TOO_SHORT; + else { *error = IP4_ERROR_OPTIONS; - if (verify_checksum && ip_csum (ip, ip4_header_bytes (ip)) != 0) + if (verify_checksum && + clib_ip_csum ((u8 *) ip, ip4_header_bytes (ip)) != 0) *error = IP4_ERROR_BAD_CHECKSUM; } - else - *error = IP4_ERROR_VERSION; } - else - if (PREDICT_FALSE (verify_checksum && - ip_csum (ip, sizeof (ip4_header_t)) != 0)) + else if (PREDICT_FALSE (verify_checksum && + clib_ip_csum ((u8 *) ip, sizeof (ip4_header_t)) != + 0)) *error = IP4_ERROR_BAD_CHECKSUM; } diff --git a/src/vnet/ip/ip4_mtrie.c b/src/vnet/ip/ip4_mtrie.c index 0f4c47fe11a..00855f7db43 100644 --- a/src/vnet/ip/ip4_mtrie.c +++ b/src/vnet/ip/ip4_mtrie.c @@ -91,94 +91,48 @@ ip4_mtrie_leaf_set_next_ply_index (u32 i) return l; } -#ifndef __ALTIVEC__ -#define PLY_X4_SPLAT_INIT(init_x4, init) \ - init_x4 = u32x4_splat (init); -#else -#define PLY_X4_SPLAT_INIT(init_x4, init) \ -{ \ - u32x4_union_t y; \ - y.as_u32[0] = init; \ - y.as_u32[1] = init; \ - y.as_u32[2] = init; \ - y.as_u32[3] = init; \ - init_x4 = y.as_u32x4; \ -} -#endif - -#ifdef CLIB_HAVE_VEC128 -#define PLY_INIT_LEAVES(p) \ -{ \ - u32x4 *l, init_x4; \ - \ - PLY_X4_SPLAT_INIT(init_x4, init); \ - for (l = p->leaves_as_u32x4; \ - l < p->leaves_as_u32x4 + ARRAY_LEN (p->leaves_as_u32x4); \ - l += 4) \ - { \ - l[0] = init_x4; \ - l[1] = init_x4; \ - l[2] = init_x4; \ - l[3] = init_x4; \ - } \ -} -#else -#define PLY_INIT_LEAVES(p) \ -{ \ - u32 *l; \ - \ - for (l = p->leaves; l < p->leaves + ARRAY_LEN (p->leaves); l += 4) \ - { \ - l[0] = init; \ - l[1] = init; \ - l[2] = init; \ - l[3] = init; \ - } \ -} -#endif - -#define PLY_INIT(p, init, prefix_len, ply_base_len) \ -{ \ - /* \ - * A leaf is 'empty' if it represents a leaf from the covering PLY \ - * i.e. if the prefix length of the leaf is less than or equal to \ - * the prefix length of the PLY \ - */ \ - p->n_non_empty_leafs = (prefix_len > ply_base_len ? \ - ARRAY_LEN (p->leaves) : 0); \ - clib_memset (p->dst_address_bits_of_leaves, prefix_len, \ - sizeof (p->dst_address_bits_of_leaves)); \ - p->dst_address_bits_base = ply_base_len; \ - \ - /* Initialize leaves. */ \ - PLY_INIT_LEAVES(p); \ -} - static void ply_8_init (ip4_mtrie_8_ply_t *p, ip4_mtrie_leaf_t init, uword prefix_len, u32 ply_base_len) { - PLY_INIT (p, init, prefix_len, ply_base_len); + p->n_non_empty_leafs = prefix_len > ply_base_len ? ARRAY_LEN (p->leaves) : 0; + clib_memset_u8 (p->dst_address_bits_of_leaves, prefix_len, + sizeof (p->dst_address_bits_of_leaves)); + p->dst_address_bits_base = ply_base_len; + + clib_memset_u32 (p->leaves, init, ARRAY_LEN (p->leaves)); } static void ply_16_init (ip4_mtrie_16_ply_t *p, ip4_mtrie_leaf_t init, uword prefix_len) { - clib_memset (p->dst_address_bits_of_leaves, prefix_len, - sizeof (p->dst_address_bits_of_leaves)); - PLY_INIT_LEAVES (p); + clib_memset_u8 (p->dst_address_bits_of_leaves, prefix_len, + sizeof (p->dst_address_bits_of_leaves)); + clib_memset_u32 (p->leaves, init, ARRAY_LEN (p->leaves)); } static ip4_mtrie_leaf_t ply_create (ip4_mtrie_leaf_t init_leaf, u32 leaf_prefix_len, u32 ply_base_len) { ip4_mtrie_8_ply_t *p; - /* Get cache aligned ply. */ + ip4_mtrie_leaf_t l; + u8 need_barrier_sync = pool_get_will_expand (ip4_ply_pool); + vlib_main_t *vm = vlib_get_main (); + ASSERT (vm->thread_index == 0); + + if (need_barrier_sync) + vlib_worker_thread_barrier_sync (vm); + /* Get cache aligned ply. */ pool_get_aligned (ip4_ply_pool, p, CLIB_CACHE_LINE_BYTES); ply_8_init (p, init_leaf, leaf_prefix_len, ply_base_len); - return ip4_mtrie_leaf_set_next_ply_index (p - ip4_ply_pool); + l = ip4_mtrie_leaf_set_next_ply_index (p - ip4_ply_pool); + + if (need_barrier_sync) + vlib_worker_thread_barrier_release (vm); + + return l; } always_inline ip4_mtrie_8_ply_t * diff --git a/src/vnet/ip/ip4_mtrie.h b/src/vnet/ip/ip4_mtrie.h index ec417c9a9f7..16c524745be 100644 --- a/src/vnet/ip/ip4_mtrie.h +++ b/src/vnet/ip/ip4_mtrie.h @@ -65,14 +65,7 @@ typedef struct ip4_mtrie_16_ply_t_ /** * The leaves/slots/buckets to be filed with leafs */ - union - { - ip4_mtrie_leaf_t leaves[PLY_16_SIZE]; - -#ifdef CLIB_HAVE_VEC128 - u32x4 leaves_as_u32x4[PLY_16_SIZE / 4]; -#endif - }; + ip4_mtrie_leaf_t leaves[PLY_16_SIZE]; /** * Prefix length for terminal leaves. @@ -85,17 +78,11 @@ typedef struct ip4_mtrie_16_ply_t_ */ typedef struct ip4_mtrie_8_ply_t_ { + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); /** * The leaves/slots/buckets to be filed with leafs */ - union - { - ip4_mtrie_leaf_t leaves[256]; - -#ifdef CLIB_HAVE_VEC128 - u32x4 leaves_as_u32x4[256 / 4]; -#endif - }; + ip4_mtrie_leaf_t leaves[256]; /** * Prefix length for leaves/ply. @@ -113,9 +100,6 @@ typedef struct ip4_mtrie_8_ply_t_ * 'non-empty'. Otherwise it is the value of the cover. */ i32 dst_address_bits_base; - - /* Pad to cache line boundary. */ - u8 pad[CLIB_CACHE_LINE_BYTES - 2 * sizeof (i32)]; } ip4_mtrie_8_ply_t; STATIC_ASSERT (0 == sizeof (ip4_mtrie_8_ply_t) % CLIB_CACHE_LINE_BYTES, diff --git a/src/vnet/ip/ip4_options.c b/src/vnet/ip/ip4_options.c index 1b5a7878512..bbe311ffb20 100644 --- a/src/vnet/ip/ip4_options.c +++ b/src/vnet/ip/ip4_options.c @@ -78,10 +78,17 @@ VLIB_NODE_FN (ip4_options_node) (vlib_main_t * vm, { case IP4_ROUTER_ALERT_OPTION: /* + * check the option length + */ + if (options[1] != 4) + break; + /* * if it's an IGMP packet, pass up the local stack */ if (IP_PROTOCOL_IGMP == ip4->protocol) { + ip_lookup_set_buffer_fib_index ( + ip4_main.fib_index_by_sw_if_index, b); next = IP4_OPTIONS_NEXT_LOCAL; } break; @@ -120,7 +127,6 @@ format_ip4_options_trace (u8 * s, va_list * args) return s; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_options_node) = { .name = "ip4-options", .vector_size = sizeof (u32), @@ -133,7 +139,6 @@ VLIB_REGISTER_NODE (ip4_options_node) = { .format_buffer = format_ip4_header, .format_trace = format_ip4_options_trace, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/ip/ip4_packet.h b/src/vnet/ip/ip4_packet.h index 513a7449b54..269049194e6 100644 --- a/src/vnet/ip/ip4_packet.h +++ b/src/vnet/ip/ip4_packet.h @@ -41,7 +41,6 @@ #define included_ip4_packet_h #include <vnet/ip/ip_packet.h> /* for ip_csum_t */ -#include <vnet/tcp/tcp_packet.h> /* for tcp_header_t */ #include <vppinfra/byte_order.h> /* for clib_net_to_host_u16 */ #include <vppinfra/warnings.h> /* for WARN_OFF/WARN_ON macro */ @@ -130,19 +129,15 @@ typedef union /* For checksumming we'll want to access IP header in word sized chunks. */ /* For 64 bit machines. */ - /* *INDENT-OFF* */ CLIB_PACKED (struct { u64 checksum_data_64[2]; u32 checksum_data_64_32[1]; }); - /* *INDENT-ON* */ /* For 32 bit machines. */ - /* *INDENT-OFF* */ CLIB_PACKED (struct { u32 checksum_data_32[5]; }); - /* *INDENT-ON* */ } ip4_header_t; /* Value of ip_version_and_header_length for packets w/o options. */ @@ -201,9 +196,7 @@ ip4_next_header (ip4_header_t * i) /* Turn off array bounds check due to ip4_header_t option field operations. */ -/* *INDENT-OFF* */ WARN_OFF(array-bounds) -/* *INDENT-ON* */ static_always_inline u16 ip4_header_checksum_inline (ip4_header_t * i, int with_checksum) @@ -306,9 +299,7 @@ ip4_header_checksum_inline (ip4_header_t * i, int with_checksum) return ~((u16) sum); } -/* *INDENT-OFF* */ WARN_ON(array-bounds) -/* *INDENT-ON* */ always_inline u16 ip4_header_checksum (ip4_header_t * i) @@ -476,47 +467,6 @@ ip4_multicast_ethernet_address (u8 * ethernet_address, ethernet_address[5] = d[3]; } -always_inline void -ip4_tcp_reply_x1 (ip4_header_t * ip0, tcp_header_t * tcp0) -{ - u32 src0, dst0; - - src0 = ip0->src_address.data_u32; - dst0 = ip0->dst_address.data_u32; - ip0->src_address.data_u32 = dst0; - ip0->dst_address.data_u32 = src0; - - src0 = tcp0->src; - dst0 = tcp0->dst; - tcp0->src = dst0; - tcp0->dst = src0; -} - -always_inline void -ip4_tcp_reply_x2 (ip4_header_t * ip0, ip4_header_t * ip1, - tcp_header_t * tcp0, tcp_header_t * tcp1) -{ - u32 src0, dst0, src1, dst1; - - src0 = ip0->src_address.data_u32; - src1 = ip1->src_address.data_u32; - dst0 = ip0->dst_address.data_u32; - dst1 = ip1->dst_address.data_u32; - ip0->src_address.data_u32 = dst0; - ip1->src_address.data_u32 = dst1; - ip0->dst_address.data_u32 = src0; - ip1->dst_address.data_u32 = src1; - - src0 = tcp0->src; - src1 = tcp1->src; - dst0 = tcp0->dst; - dst1 = tcp1->dst; - tcp0->src = dst0; - tcp1->src = dst1; - tcp0->dst = src0; - tcp1->dst = src1; -} - #endif /* included_ip4_packet_h */ /* diff --git a/src/vnet/ip/ip4_punt_drop.c b/src/vnet/ip/ip4_punt_drop.c index 89803afb9dd..b8cc3304437 100644 --- a/src/vnet/ip/ip4_punt_drop.c +++ b/src/vnet/ip/ip4_punt_drop.c @@ -18,7 +18,6 @@ #include <vnet/policer/policer.h> #include <vnet/policer/police_inlines.h> -/* *INDENT-OFF* */ VNET_FEATURE_ARC_INIT (ip4_punt) = { .arc_name = "ip4-punt", @@ -30,7 +29,6 @@ VNET_FEATURE_ARC_INIT (ip4_drop) = .arc_name = "ip4-drop", .start_nodes = VNET_FEATURES ("ip4-drop", "ip4-not-enabled"), }; -/* *INDENT-ON* */ extern ip_punt_policer_t ip4_punt_policer_cfg; @@ -89,7 +87,6 @@ VLIB_NODE_FN (ip4_punt_policer_node) (vlib_main_t * vm, ip4_punt_policer_cfg.policer_index)); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_punt_policer_node) = { .name = "ip4-punt-policer", .vector_size = sizeof (u32), @@ -109,7 +106,6 @@ VNET_FEATURE_INIT (ip4_punt_policer_node) = { .node_name = "ip4-punt-policer", .runs_before = VNET_FEATURES("ip4-punt-redirect"), }; -/* *INDENT-ON* */ #define foreach_ip4_punt_redirect_error \ @@ -138,7 +134,6 @@ VLIB_NODE_FN (ip4_punt_redirect_node) (vlib_main_t * vm, FIB_PROTOCOL_IP4)); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_punt_redirect_node) = { .name = "ip4-punt-redirect", .vector_size = sizeof (u32), @@ -160,7 +155,6 @@ VNET_FEATURE_INIT (ip4_punt_redirect_node, static) = { .node_name = "ip4-punt-redirect", .runs_before = VNET_FEATURES("error-punt"), }; -/* *INDENT-ON* */ VLIB_NODE_FN (ip4_drop_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) @@ -194,7 +188,6 @@ ip4_punt (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) vnet_feat_arc_ip4_punt.feature_arc_index); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_drop_node) = { .name = "ip4-drop", @@ -237,7 +230,6 @@ VNET_FEATURE_INIT (ip4_drop_end_of_arc, static) = { .node_name = "error-drop", .runs_before = 0, /* not before any other features */ }; -/* *INDENT-ON */ #ifndef CLIB_MARCH_VARIANT void @@ -301,17 +293,17 @@ done: * @cliexpar * @cliexcmd{set ip punt policer <INDEX>} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (ip4_punt_policer_command, static) = { .path = "ip punt policer", .function = ip4_punt_police_cmd, .short_help = "ip punt policer [add|del] <index>", }; -/* *INDENT-ON* */ #ifndef CLIB_MARCH_VARIANT +static u32 ip4_punt_redirect_enable_counts; + void ip4_punt_redirect_add_paths (u32 rx_sw_if_index, const fib_route_path_t *rpaths) @@ -320,13 +312,16 @@ ip4_punt_redirect_add_paths (u32 rx_sw_if_index, rx_sw_if_index, FIB_FORW_CHAIN_TYPE_UNICAST_IP4, rpaths); - vnet_feature_enable_disable ("ip4-punt", "ip4-punt-redirect", 0, 1, 0, 0); + if (1 == ++ip4_punt_redirect_enable_counts) + vnet_feature_enable_disable ("ip4-punt", "ip4-punt-redirect", 0, 1, 0, 0); } void ip4_punt_redirect_del (u32 rx_sw_if_index) { - vnet_feature_enable_disable ("ip4-punt", "ip4-punt-redirect", 0, 0, 0, 0); + ASSERT (ip4_punt_redirect_enable_counts); + if (0 == --ip4_punt_redirect_enable_counts) + vnet_feature_enable_disable ("ip4-punt", "ip4-punt-redirect", 0, 0, 0, 0); ip_punt_redirect_del (FIB_PROTOCOL_IP4, rx_sw_if_index); } @@ -399,14 +394,12 @@ done: * @cliexpar * @cliexcmd{set ip punt policer} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (ip4_punt_redirect_command, static) = { .path = "ip punt redirect", .function = ip4_punt_redirect_cmd, .short_help = "ip punt redirect [add|del] rx [<interface>|all] via [<nh>] <tx_interface>", }; -/* *INDENT-ON* */ static clib_error_t * ip4_punt_redirect_show_cmd (vlib_main_t * vm, @@ -423,7 +416,6 @@ ip4_punt_redirect_show_cmd (vlib_main_t * vm, * @cliexpar * @cliexcmd{set ip punt redierect} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_ip4_punt_redirect_command, static) = { .path = "show ip punt redirect", @@ -431,7 +423,6 @@ VLIB_CLI_COMMAND (show_ip4_punt_redirect_command, static) = .short_help = "show ip punt redirect", .is_mp_safe = 1, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/ip/ip4_source_and_port_range_check.c b/src/vnet/ip/ip4_source_and_port_range_check.c index 00ab51e2440..27b2d549ea7 100644 --- a/src/vnet/ip/ip4_source_and_port_range_check.c +++ b/src/vnet/ip/ip4_source_and_port_range_check.c @@ -99,7 +99,9 @@ static inline u32 check_adj_port_range_x1 (const protocol_port_range_dpo_t * ppr_dpo, u16 dst_port, u32 next) { +#ifdef CLIB_HAVE_VEC128 u16x8 key = u16x8_splat (dst_port); +#endif int i; if (NULL == ppr_dpo || dst_port == 0) @@ -107,9 +109,20 @@ check_adj_port_range_x1 (const protocol_port_range_dpo_t * ppr_dpo, for (i = 0; i < ppr_dpo->n_used_blocks; i++) +#ifdef CLIB_HAVE_VEC128 if (!u16x8_is_all_zero ((ppr_dpo->blocks[i].low.as_u16x8 <= key) & (ppr_dpo->blocks[i].hi.as_u16x8 >= key))) return next; +#else + { + for (int j = 0; j < 8; j++) + { + if ((ppr_dpo->blocks[i].low.as_u16[j] <= dst_port) && + (ppr_dpo->blocks[i].hi.as_u16[j] >= dst_port)) + return next; + } + }; +#endif return IP4_SOURCE_AND_PORT_RANGE_CHECK_NEXT_DROP; } @@ -550,7 +563,6 @@ ip4_source_and_port_range_check_tx (vlib_main_t * vm, if this changes can easily make new function */ -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_source_port_and_range_check_rx) = { .function = ip4_source_and_port_range_check_rx, .name = "ip4-source-and-port-range-check-rx", @@ -567,9 +579,7 @@ VLIB_REGISTER_NODE (ip4_source_port_and_range_check_rx) = { .format_buffer = format_ip4_header, .format_trace = format_ip4_source_and_port_range_check_trace, }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_source_port_and_range_check_tx) = { .function = ip4_source_and_port_range_check_tx, .name = "ip4-source-and-port-range-check-tx", @@ -586,7 +596,6 @@ VLIB_REGISTER_NODE (ip4_source_port_and_range_check_tx) = { .format_buffer = format_ip4_header, .format_trace = format_ip4_source_and_port_range_check_trace, }; -/* *INDENT-ON* */ int set_ip_source_and_port_range_check (vlib_main_t * vm, @@ -749,7 +758,8 @@ set_ip_source_and_port_range_check_fn (vlib_main_t * vm, * @cliexend * * Example of how to enable range checking on TX: - * @cliexcmd{set interface ip source-and-port-range-check GigabitEthernet2/0/0 udp-in-vrf 7} + * @cliexcmd{set interface ip source-and-port-range-check GigabitEthernet2/0/0 + * udp-in-vrf 7} * * Example of graph node after range checking is enabled: * @cliexstart{show vlib graph ip4-source-and-port-range-check-tx} @@ -758,7 +768,7 @@ set_ip_source_and_port_range_check_fn (vlib_main_t * vm, * interface-output [1] * @cliexend * - * Example of how to display the features enabed on an interface: + * Example of how to display the features enabled on an interface: * @cliexstart{show ip interface features GigabitEthernet2/0/0} * IP feature paths configured on GigabitEthernet2/0/0... * @@ -783,13 +793,11 @@ set_ip_source_and_port_range_check_fn (vlib_main_t * vm, * @cliexend * @endparblock ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_interface_ip_source_and_port_range_check_command, static) = { .path = "set interface ip source-and-port-range-check", .function = set_ip_source_and_port_range_check_fn, .short_help = "set interface ip source-and-port-range-check <interface> [tcp-out-vrf <table-id>] [udp-out-vrf <table-id>] [tcp-in-vrf <table-id>] [udp-in-vrf <table-id>] [del]", }; -/* *INDENT-ON* */ static u8 * format_ppr_dpo (u8 * s, va_list * args) @@ -1250,14 +1258,12 @@ ip_source_and_port_range_check_command_fn (vlib_main_t * vm, * Example of how to delete an IPv4 subnet and range of ports from an IPv4 FIB table: * @cliexcmd{set ip source-and-port-range-check vrf 7 172.16.1.0/24 range 23 - 100 del} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (ip_source_and_port_range_check_command, static) = { .path = "set ip source-and-port-range-check", .function = ip_source_and_port_range_check_command_fn, .short_help = "set ip source-and-port-range-check vrf <table-id> <ip-addr>/<mask> {port nn | range <nn> - <nn>} [del]", }; -/* *INDENT-ON* */ static clib_error_t * @@ -1367,7 +1373,7 @@ show_source_and_port_range_check_fn (vlib_main_t * vm, * @cliexstart{show ip source-and-port-range-check vrf 7 172.16.2.0} * 172.16.2.0: 23 - 101 * @cliexend - * Example of how to test to determine of a given Pv4 address and port + * Example of how to test to determine of a given iPv4 address and port * are being validated: * @cliexstart{show ip source-and-port-range-check vrf 7 172.16.2.2 port 23} * 172.16.2.2 port 23 PASS @@ -1376,14 +1382,12 @@ show_source_and_port_range_check_fn (vlib_main_t * vm, * 172.16.2.2 port 250 FAIL * @cliexend ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_source_and_port_range_check, static) = { .path = "show ip source-and-port-range-check", .function = show_source_and_port_range_check_fn, .short_help = "show ip source-and-port-range-check vrf <table-id> <ip-addr> [port <n>]", }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/ip/ip4_to_ip6.h b/src/vnet/ip/ip4_to_ip6.h index a6d87f1f962..57c2b6ff78b 100644 --- a/src/vnet/ip/ip4_to_ip6.h +++ b/src/vnet/ip/ip4_to_ip6.h @@ -28,14 +28,12 @@ typedef int (*ip4_to_ip6_set_fn_t) (vlib_buffer_t * b, ip4_header_t * ip4, ip6_header_t * ip6, void *ctx); -/* *INDENT-OFF* */ static u8 icmp_to_icmp6_updater_pointer_table[] = { 0, 1, 4, 4, ~0, ~0, ~0, ~0, 7, 6, ~0, ~0, 8, 8, 8, 8, 24, 24, 24, 24 }; -/* *INDENT-ON* */ #define frag_id_4to6(id) (id) diff --git a/src/vnet/ip/ip6.h b/src/vnet/ip/ip6.h index f33780f1a98..56eec523d5b 100644 --- a/src/vnet/ip/ip6.h +++ b/src/vnet/ip/ip6.h @@ -238,7 +238,6 @@ ip6_interface_address_matching_destination (ip6_main_t * im, ip_interface_address_t *ia; ip6_address_t *result = 0; - /* *INDENT-OFF* */ foreach_ip_interface_address (lm, ia, sw_if_index, 1 /* honor unnumbered */, ({ @@ -249,7 +248,6 @@ ip6_interface_address_matching_destination (ip6_main_t * im, break; } })); - /* *INDENT-ON* */ if (result_ia) *result_ia = result ? ia : 0; return result; diff --git a/src/vnet/ip/ip6_error.h b/src/vnet/ip/ip6_error.h deleted file mode 100644 index a6fb16570b6..00000000000 --- a/src/vnet/ip/ip6_error.h +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - * ip/ip6_error.h: ip6 fast path errors - * - * Copyright (c) 2008 Eliot Dresselhaus - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef included_ip_ip6_error_h -#define included_ip_ip6_error_h - -#define foreach_ip6_error \ - /* Must be first. */ \ - _ (NONE, "valid ip6 packets") \ - \ - /* Errors signalled by ip6-input */ \ - _ (TOO_SHORT, "ip6 length < 40 bytes") \ - _ (BAD_LENGTH, "ip6 length > l2 length") \ - _ (VERSION, "ip6 version != 6") \ - _ (TIME_EXPIRED, "ip6 ttl <= 1") \ - \ - /* Errors signalled by ip6-rewrite. */ \ - _ (MTU_EXCEEDED, "ip6 MTU exceeded") \ - _ (DST_LOOKUP_MISS, "ip6 destination lookup miss") \ - _ (SRC_LOOKUP_MISS, "ip6 source lookup miss") \ - _ (DROP, "ip6 drop") \ - _ (PUNT, "ip6 punt") \ - \ - /* Errors signalled by ip6-local. */ \ - _ (UNKNOWN_PROTOCOL, "unknown ip protocol") \ - _ (UDP_CHECKSUM, "bad udp checksum") \ - _ (ICMP_CHECKSUM, "bad icmp checksum") \ - _ (UDP_LENGTH, "inconsistent udp/ip lengths") \ - \ - /* Errors signalled by udp6-lookup. */ \ - _ (UNKNOWN_UDP_PORT, "no listener for udp port") \ - \ - /* Spoofed packets in ip6-rewrite-local */ \ - _(SPOOFED_LOCAL_PACKETS, "ip4 spoofed local-address packet drops") \ - \ - /* Erros singalled by ip6-inacl */ \ - _ (INACL_TABLE_MISS, "input ACL table-miss drops") \ - _ (INACL_SESSION_DENY, "input ACL session deny drops") \ - /* Erros singalled by ip6-outacl */ \ - _ (OUTACL_TABLE_MISS, "output ACL table-miss drops") \ - _ (OUTACL_SESSION_DENY, "output ACL session deny drops") \ - \ - /* Errors signalled by ip6-reassembly */ \ - _ (REASS_MISSING_UPPER, "missing-upper layer drops") \ - _ (REASS_DUPLICATE_FRAGMENT, "duplicate fragments") \ - _ (REASS_OVERLAPPING_FRAGMENT, "overlapping fragments") \ - _ (REASS_LIMIT_REACHED, "drops due to concurrent reassemblies limit") \ - _ (REASS_FRAGMENT_CHAIN_TOO_LONG, "fragment chain too long (drop)") \ - _ (REASS_NO_BUF, "out of buffers (drop)") \ - _ (REASS_TIMEOUT, "fragments dropped due to reassembly timeout") \ - _ (REASS_INTERNAL_ERROR, "drops due to internal reassembly error") \ - _ (REASS_UNSUPP_IP_PROTO, "unsupported ip protocol") - -typedef enum -{ -#define _(sym,str) IP6_ERROR_##sym, - foreach_ip6_error -#undef _ - IP6_N_ERROR, -} ip6_error_t; - -#endif /* included_ip_ip6_error_h */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/ip/ip6_format.c b/src/vnet/ip/ip6_format.c index 1b8ff1e0ab0..1a1bef26aa6 100644 --- a/src/vnet/ip/ip6_format.c +++ b/src/vnet/ip/ip6_format.c @@ -288,7 +288,7 @@ format_ip6_header (u8 * s, va_list * args) "\n%Utos 0x%02x, flow label 0x%x, hop limit %d, payload length %d", format_white_space, indent, traffic_class, flow_label, ip->hop_limit, clib_net_to_host_u16 (ip->payload_length)); - +#if 0 /* Recurse into next protocol layer. */ if (max_header_bytes != 0 && sizeof (ip[0]) < max_header_bytes) { @@ -301,7 +301,7 @@ format_ip6_header (u8 * s, va_list * args) /* next protocol header */ (void *) (ip + 1), max_header_bytes - sizeof (ip[0])); } - +#endif return s; } diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c index 8daf2614c15..48fb633fd32 100644 --- a/src/vnet/ip/ip6_forward.c +++ b/src/vnet/ip/ip6_forward.c @@ -48,6 +48,7 @@ #include <vnet/fib/ip6_fib.h> #include <vnet/mfib/ip6_mfib.h> #include <vnet/dpo/load_balance_map.h> +#include <vnet/dpo/receive_dpo.h> #include <vnet/dpo/classify_dpo.h> #include <vnet/classify/vnet_classify.h> #include <vnet/pg/pg.h> @@ -70,7 +71,6 @@ ip6_add_interface_prefix_routes (ip6_main_t * im, ip_lookup_main_t *lm = &im->lookup_main; ip_interface_prefix_t *if_prefix; - /* *INDENT-OFF* */ ip_interface_prefix_key_t key = { .prefix = { .fp_len = address_length, @@ -84,7 +84,6 @@ ip6_add_interface_prefix_routes (ip6_main_t * im, }, .sw_if_index = sw_if_index, }; - /* *INDENT-ON* */ /* If prefix already set on interface, just increment ref count & return */ if_prefix = ip_get_interface_prefix (lm, &key); @@ -177,7 +176,6 @@ ip6_del_interface_prefix_routes (ip6_main_t * im, ip_lookup_main_t *lm = &im->lookup_main; ip_interface_prefix_t *if_prefix; - /* *INDENT-OFF* */ ip_interface_prefix_key_t key = { .prefix = { .fp_len = address_length, @@ -191,13 +189,12 @@ ip6_del_interface_prefix_routes (ip6_main_t * im, }, .sw_if_index = sw_if_index, }; - /* *INDENT-ON* */ if_prefix = ip_get_interface_prefix (lm, &key); if (!if_prefix) { clib_warning ("Prefix not found while deleting %U", - format_ip4_address_and_length, address, address_length); + format_ip6_address_and_length, address, address_length); return; } @@ -282,7 +279,6 @@ ip6_interface_first_address (ip6_main_t * im, u32 sw_if_index) ip_interface_address_t *ia = 0; ip6_address_t *result = 0; - /* *INDENT-OFF* */ foreach_ip_interface_address (lm, ia, sw_if_index, 1 /* honor unnumbered */, ({ @@ -290,7 +286,6 @@ ip6_interface_first_address (ip6_main_t * im, u32 sw_if_index) result = a; break; })); - /* *INDENT-ON* */ return result; } @@ -310,7 +305,10 @@ ip6_add_del_interface_address (vlib_main_t * vm, error = vnet_sw_interface_supports_addressing (vnm, sw_if_index); if (error) - return error; + { + vnm->api_errno = VNET_API_ERROR_UNSUPPORTED; + return error; + } if (ip6_address_is_link_local_unicast (address)) { @@ -355,7 +353,6 @@ ip6_add_del_interface_address (vlib_main_t * vm, vec_elt (im->fib_index_by_sw_if_index, sw_if_index)); vec_add1 (addr_fib, ip6_af); - /* *INDENT-OFF* */ if (!is_del) { /* When adding an address check that it does not conflict @@ -413,7 +410,6 @@ ip6_add_del_interface_address (vlib_main_t * vm, } } } - /* *INDENT-ON* */ if_address_index = ip_interface_address_find (lm, addr_fib, address_length); @@ -533,7 +529,6 @@ ip6_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags) fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index); - /* *INDENT-OFF* */ foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 0 /* honor unnumbered */, ({ @@ -546,7 +541,6 @@ ip6_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags) ip6_del_interface_routes (sw_if_index, im, fib_index, a, ia->address_length); })); - /* *INDENT-ON* */ return 0; } @@ -554,7 +548,6 @@ ip6_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags) VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip6_sw_interface_admin_up_down); /* Built-in ip6 unicast rx feature path definition */ -/* *INDENT-OFF* */ VNET_FEATURE_ARC_INIT (ip6_unicast, static) = { .arc_name = "ip6-unicast", @@ -679,7 +672,6 @@ VNET_FEATURE_INIT (ip6_interface_output, static) = { .node_name = "interface-output", .runs_before = 0, /* not before any other features */ }; -/* *INDENT-ON* */ static clib_error_t * ip6_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add) @@ -705,14 +697,21 @@ ip6_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add) vlib_main_t *vm = vlib_get_main (); vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0); - /* *INDENT-OFF* */ foreach_ip_interface_address (lm6, ia, sw_if_index, 0, ({ address = ip_interface_address_get_address (lm6, ia); ip6_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1); })); - /* *INDENT-ON* */ ip6_mfib_interface_enable_disable (sw_if_index, 0); + + if (0 != im6->fib_index_by_sw_if_index[sw_if_index]) + fib_table_bind (FIB_PROTOCOL_IP6, sw_if_index, 0); + if (0 != im6->mfib_index_by_sw_if_index[sw_if_index]) + mfib_table_bind (FIB_PROTOCOL_IP6, sw_if_index, 0); + + /* Erase the lookup tables just in case */ + im6->fib_index_by_sw_if_index[sw_if_index] = ~0; + im6->mfib_index_by_sw_if_index[sw_if_index] = ~0; } vnet_feature_enable_disable ("ip6-unicast", "ip6-not-enabled", sw_if_index, @@ -735,7 +734,6 @@ VLIB_NODE_FN (ip6_lookup_node) (vlib_main_t * vm, static u8 *format_ip6_lookup_trace (u8 * s, va_list * args); -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_lookup_node) = { .name = "ip6-lookup", @@ -744,7 +742,6 @@ VLIB_REGISTER_NODE (ip6_lookup_node) = .n_next_nodes = IP6_LOOKUP_N_NEXT, .next_nodes = IP6_LOOKUP_NEXT_NODES, }; -/* *INDENT-ON* */ VLIB_NODE_FN (ip6_load_balance_node) (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -922,7 +919,6 @@ VLIB_NODE_FN (ip6_load_balance_node) (vlib_main_t * vm, return frame->n_vectors; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_load_balance_node) = { .name = "ip6-load-balance", @@ -930,7 +926,6 @@ VLIB_REGISTER_NODE (ip6_load_balance_node) = .sibling_of = "ip6-lookup", .format_trace = format_ip6_lookup_trace, }; -/* *INDENT-ON* */ typedef struct { @@ -953,8 +948,7 @@ format_ip6_forward_next_trace (u8 * s, va_list * args) ip6_forward_next_trace_t *t = va_arg (*args, ip6_forward_next_trace_t *); u32 indent = format_get_indent (s); - s = format (s, "%Ufib:%d adj:%d flow:%d", - format_white_space, indent, + s = format (s, "%Ufib:%d adj:%d flow:0x%08x", format_white_space, indent, t->fib_index, t->adj_index, t->flow_hash); s = format (s, "\n%U%U", format_white_space, indent, @@ -1214,23 +1208,17 @@ always_inline u8 ip6_next_proto_is_tcp_udp (vlib_buffer_t * p0, ip6_header_t * ip0, u32 * udp_offset0) { - u32 proto0; - proto0 = ip6_locate_header (p0, ip0, IP_PROTOCOL_UDP, udp_offset0); - if (proto0 != IP_PROTOCOL_UDP) - { - proto0 = ip6_locate_header (p0, ip0, IP_PROTOCOL_TCP, udp_offset0); - proto0 = (proto0 == IP_PROTOCOL_TCP) ? proto0 : 0; - } - return proto0; + int nh = ip6_locate_header (p0, ip0, -1, udp_offset0); + if (nh > 0) + if (nh == IP_PROTOCOL_UDP || nh == IP_PROTOCOL_TCP) + return nh; + return 0; } -/* *INDENT-OFF* */ -VNET_FEATURE_ARC_INIT (ip6_local) = -{ - .arc_name = "ip6-local", - .start_nodes = VNET_FEATURES ("ip6-local"), +VNET_FEATURE_ARC_INIT (ip6_local) = { + .arc_name = "ip6-local", + .start_nodes = VNET_FEATURES ("ip6-local", "ip6-receive"), }; -/* *INDENT-ON* */ static_always_inline u8 ip6_tcp_udp_icmp_bad_length (vlib_main_t * vm, vlib_buffer_t * p0) @@ -1267,7 +1255,7 @@ ip6_tcp_udp_icmp_bad_length (vlib_main_t * vm, vlib_buffer_t * p0) } n_bytes_left -= n_this_buffer; - n_bytes_left -= p0->total_length_not_including_first_buffer; + n_bytes_left -= vlib_buffer_length_in_chain (vm, p0) - p0->current_length; if (n_bytes_left == 0) return 0; @@ -1275,10 +1263,10 @@ ip6_tcp_udp_icmp_bad_length (vlib_main_t * vm, vlib_buffer_t * p0) return 1; } - always_inline uword -ip6_local_inline (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_frame_t * frame, int head_of_feature_arc) +ip6_local_inline (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame, int head_of_feature_arc, + int is_receive_dpo) { ip6_main_t *im = &ip6_main; ip_lookup_main_t *lm = &im->lookup_main; @@ -1310,7 +1298,7 @@ ip6_local_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_prefetch_buffer_data (b[3], LOAD); } - ip6_error_t error[2]; + vl_counter_ip6_enum_t error[2]; error[0] = IP6_ERROR_UNKNOWN_PROTOCOL; error[1] = IP6_ERROR_UNKNOWN_PROTOCOL; @@ -1466,6 +1454,23 @@ ip6_local_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ? vnet_buffer (b[1])->sw_if_index[VLIB_TX] : vnet_buffer (b[1])->ip.fib_index; + + vnet_buffer (b[0])->ip.rx_sw_if_index = + vnet_buffer (b[0])->sw_if_index[VLIB_RX]; + vnet_buffer (b[1])->ip.rx_sw_if_index = + vnet_buffer (b[1])->sw_if_index[VLIB_RX]; + if (is_receive_dpo) + { + const receive_dpo_t *rd0, *rd1; + rd0 = + receive_dpo_get (vnet_buffer (b[0])->ip.adj_index[VLIB_TX]); + rd1 = + receive_dpo_get (vnet_buffer (b[1])->ip.adj_index[VLIB_TX]); + if (rd0->rd_sw_if_index != ~0) + vnet_buffer (b[0])->ip.rx_sw_if_index = rd0->rd_sw_if_index; + if (rd1->rd_sw_if_index != ~0) + vnet_buffer (b[1])->ip.rx_sw_if_index = rd1->rd_sw_if_index; + } } /* head_of_feature_arc */ next[0] = lm->local_next_by_ip_protocol[ip[0]->protocol]; @@ -1487,16 +1492,16 @@ ip6_local_inline (vlib_main_t * vm, vlib_node_runtime_t * node, { u32 next32 = next[0]; vnet_feature_arc_start (arc_index, - vnet_buffer (b[0])->sw_if_index - [VLIB_RX], &next32, b[0]); + vnet_buffer (b[0])->ip.rx_sw_if_index, + &next32, b[0]); next[0] = next32; } if (PREDICT_TRUE (ip6_unknown[1])) { u32 next32 = next[1]; vnet_feature_arc_start (arc_index, - vnet_buffer (b[1])->sw_if_index - [VLIB_RX], &next32, b[1]); + vnet_buffer (b[1])->ip.rx_sw_if_index, + &next32, b[1]); next[1] = next32; } } @@ -1593,6 +1598,16 @@ ip6_local_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ? vnet_buffer (b[0])->sw_if_index[VLIB_TX] : vnet_buffer (b[0])->ip.fib_index; + + vnet_buffer (b[0])->ip.rx_sw_if_index = + vnet_buffer (b[0])->sw_if_index[VLIB_RX]; + if (is_receive_dpo) + { + receive_dpo_t *rd; + rd = receive_dpo_get (vnet_buffer (b[0])->ip.adj_index[VLIB_TX]); + if (rd->rd_sw_if_index != ~0) + vnet_buffer (b[0])->ip.rx_sw_if_index = rd->rd_sw_if_index; + } } /* head_of_feature_arc */ next[0] = lm->local_next_by_ip_protocol[ip->protocol]; @@ -1607,8 +1622,8 @@ ip6_local_inline (vlib_main_t * vm, vlib_node_runtime_t * node, { u32 next32 = next[0]; vnet_feature_arc_start (arc_index, - vnet_buffer (b[0])->sw_if_index - [VLIB_RX], &next32, b[0]); + vnet_buffer (b[0])->ip.rx_sw_if_index, + &next32, b[0]); next[0] = next32; } } @@ -1626,15 +1641,17 @@ ip6_local_inline (vlib_main_t * vm, vlib_node_runtime_t * node, VLIB_NODE_FN (ip6_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return ip6_local_inline (vm, node, frame, 1 /* head of feature arc */ ); + return ip6_local_inline (vm, node, frame, 1 /* head of feature arc */, + 0 /* ip6_local_inline */); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_local_node) = { .name = "ip6-local", .vector_size = sizeof (u32), .format_trace = format_ip6_forward_next_trace, + .n_errors = IP6_N_ERROR, + .error_counters = ip6_error_counters, .n_next_nodes = IP_LOCAL_N_NEXT, .next_nodes = { @@ -1642,19 +1659,32 @@ VLIB_REGISTER_NODE (ip6_local_node) = [IP_LOCAL_NEXT_PUNT] = "ip6-punt", [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip6-udp-lookup", [IP_LOCAL_NEXT_ICMP] = "ip6-icmp-input", - [IP_LOCAL_NEXT_REASSEMBLY] = "ip6-full-reassembly", + [IP_LOCAL_NEXT_REASSEMBLY] = "ip6-local-full-reassembly", }, }; -/* *INDENT-ON* */ + +VLIB_NODE_FN (ip6_receive_local_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return ip6_local_inline (vm, node, frame, 1 /* head of feature arc */, + 1 /* is_receive_dpo */); +} + +VLIB_REGISTER_NODE (ip6_receive_local_node) = { + .name = "ip6-receive", + .vector_size = sizeof (u32), + .format_trace = format_ip6_forward_next_trace, + .sibling_of = "ip6-local" +}; VLIB_NODE_FN (ip6_local_end_of_arc_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return ip6_local_inline (vm, node, frame, 0 /* head of feature arc */ ); + return ip6_local_inline (vm, node, frame, 0 /* head of feature arc */, + 0 /* ip6_local_inline */); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_local_end_of_arc_node) = { .name = "ip6-local-end-of-arc", .vector_size = sizeof (u32), @@ -1668,7 +1698,6 @@ VNET_FEATURE_INIT (ip6_local_end_of_arc, static) = { .node_name = "ip6-local-end-of-arc", .runs_before = 0, /* not before any other features */ }; -/* *INDENT-ON* */ #ifdef CLIB_MARCH_VARIANT extern vlib_node_registration_t ip6_local_node; @@ -1941,13 +1970,6 @@ ip6_rewrite_inline_with_gso (vlib_main_t * vm, if (is_midchain) { - /* before we paint on the next header, update the L4 - * checksums if required, since there's no offload on a tunnel */ - vnet_calc_checksums_inline (vm, p0, 0 /* is_ip4 */ , - 1 /* is_ip6 */ ); - vnet_calc_checksums_inline (vm, p1, 0 /* is_ip4 */ , - 1 /* is_ip6 */ ); - /* Guess we are only writing on ipv6 header. */ vnet_rewrite_two_headers (adj0[0], adj1[0], ip0, ip1, sizeof (ip6_header_t)); @@ -2041,9 +2063,6 @@ ip6_rewrite_inline_with_gso (vlib_main_t * vm, if (is_midchain) { - vnet_calc_checksums_inline (vm, p0, 0 /* is_ip4 */ , - 1 /* is_ip6 */ ); - /* Guess we are only writing on ip6 header. */ vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip6_header_t)); } @@ -2193,14 +2212,12 @@ VLIB_NODE_FN (ip6_mcast_midchain_node) (vlib_main_t * vm, return ip6_rewrite_inline (vm, node, frame, 0, 1, 1); } -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (ip6_midchain_node) = -{ +VLIB_REGISTER_NODE (ip6_midchain_node) = { .name = "ip6-midchain", .vector_size = sizeof (u32), .format_trace = format_ip6_forward_next_trace, .sibling_of = "ip6-rewrite", - }; +}; VLIB_REGISTER_NODE (ip6_rewrite_node) = { @@ -2241,7 +2258,6 @@ VLIB_REGISTER_NODE (ip6_mcast_midchain_node) = .sibling_of = "ip6-rewrite", }; -/* *INDENT-ON* */ /* * Hop-by-Hop handling @@ -2255,7 +2271,6 @@ _(PROCESSED, "pkts with ip6 hop-by-hop options") \ _(FORMAT, "incorrectly formatted hop-by-hop options") \ _(UNKNOWN_OPTION, "unknown ip6 hop-by-hop options") -/* *INDENT-OFF* */ typedef enum { #define _(sym,str) IP6_HOP_BY_HOP_ERROR_##sym, @@ -2263,7 +2278,6 @@ typedef enum #undef _ IP6_HOP_BY_HOP_N_ERROR, } ip6_hop_by_hop_error_t; -/* *INDENT-ON* */ /* * Primary h-b-h handler trace support @@ -2690,7 +2704,6 @@ VLIB_NODE_FN (ip6_hop_by_hop_node) (vlib_main_t * vm, return frame->n_vectors; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_hop_by_hop_node) = { .name = "ip6-hop-by-hop", @@ -2702,7 +2715,6 @@ VLIB_REGISTER_NODE (ip6_hop_by_hop_node) = .error_strings = ip6_hop_by_hop_error_strings, .n_next_nodes = 0, }; -/* *INDENT-ON* */ static clib_error_t * ip6_hop_by_hop_init (vlib_main_t * vm) @@ -2954,14 +2966,12 @@ set_ip6_flow_hash_command_fn (vlib_main_t * vm, * @cliexend * @endparblock ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_ip6_flow_hash_command, static) = { .path = "set ip6 flow-hash", .short_help = "set ip6 flow-hash table <table-id> [src] [dst] [sport] " "[dport] [proto] [reverse] [flowlabel]", .function = set_ip6_flow_hash_command_fn, }; -/* *INDENT-ON* */ static clib_error_t * show_ip6_local_command_fn (vlib_main_t * vm, @@ -3002,14 +3012,12 @@ show_ip6_local_command_fn (vlib_main_t * vm, * 115 * @cliexend ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_ip6_local, static) = { .path = "show ip6 local", .function = show_ip6_local_command_fn, .short_help = "show ip6 local", }; -/* *INDENT-ON* */ #ifndef CLIB_MARCH_VARIANT int @@ -3121,7 +3129,6 @@ set_ip6_classify_command_fn (vlib_main_t * vm, * Example of how to assign a classification table to an interface: * @cliexcmd{set ip6 classify intfc GigabitEthernet2/0/0 table-index 1} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_ip6_classify_command, static) = { .path = "set ip6 classify", @@ -3129,7 +3136,6 @@ VLIB_CLI_COMMAND (set_ip6_classify_command, static) = "set ip6 classify intfc <interface> table-index <classify-idx>", .function = set_ip6_classify_command_fn, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/ip/ip6_hop_by_hop.c b/src/vnet/ip/ip6_hop_by_hop.c index e66084c2c4d..412741abcf8 100644 --- a/src/vnet/ip/ip6_hop_by_hop.c +++ b/src/vnet/ip/ip6_hop_by_hop.c @@ -438,8 +438,7 @@ VLIB_NODE_FN (ip6_add_hop_by_hop_node) (vlib_main_t * vm, return frame->n_vectors; } -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (ip6_add_hop_by_hop_node) = /* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip6_add_hop_by_hop_node) = { .name = "ip6-add-hop-by-hop", .vector_size = sizeof (u32), @@ -455,7 +454,6 @@ VLIB_REGISTER_NODE (ip6_add_hop_by_hop_node) = /* *INDENT-OFF* */ #undef _ }, }; -/* *INDENT-ON* */ /* The main h-b-h tracer was already invoked, no need to do much here */ typedef struct @@ -778,7 +776,6 @@ VLIB_NODE_FN (ip6_pop_hop_by_hop_node) (vlib_main_t * vm, return frame->n_vectors; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_pop_hop_by_hop_node) = { .name = "ip6-pop-hop-by-hop", @@ -791,7 +788,6 @@ VLIB_REGISTER_NODE (ip6_pop_hop_by_hop_node) = /* See ip/lookup.h */ .n_next_nodes = 0, }; -/* *INDENT-ON* */ typedef struct { @@ -1006,7 +1002,6 @@ VLIB_NODE_FN (ip6_local_hop_by_hop_node) (vlib_main_t * vm, } #ifndef CLIB_MARCH_VARIANT -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_local_hop_by_hop_node) = { .name = "ip6-local-hop-by-hop", @@ -1025,7 +1020,6 @@ VLIB_REGISTER_NODE (ip6_local_hop_by_hop_node) = [IP6_LOCAL_HOP_BY_HOP_NEXT_DROP] = "error-drop", }, }; -/* *INDENT-ON* */ clib_error_t * show_ip6_hbh_command_fn (vlib_main_t * vm, @@ -1059,13 +1053,11 @@ show_ip6_hbh_command_fn (vlib_main_t * vm, * Display ip6 local hop-by-hop next protocol handler nodes * @cliexcmd{show ip6 hbh} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_ip6_hbh, static) = { .path = "show ip6 hbh", .short_help = "show ip6 hbh", .function = show_ip6_hbh_command_fn, }; -/* *INDENT-ON* */ #endif /* CLIB_MARCH_VARIANT */ @@ -1105,12 +1097,10 @@ ip6_hop_by_hop_ioam_init (vlib_main_t * vm) return (0); } -/* *INDENT-OFF* */ VLIB_INIT_FUNCTION (ip6_hop_by_hop_ioam_init) = { .runs_after = VLIB_INITS("ip_main_init", "ip6_lookup_init"), }; -/* *INDENT-ON* */ void ip6_local_hop_by_hop_register_protocol (u32 protocol, u32 node_index) @@ -1264,13 +1254,11 @@ clear_ioam_rewrite_command_fn (vlib_main_t * vm, * Example of how to clear iOAM features: * @cliexcmd{clear ioam rewrite} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (ip6_clear_ioam_rewrite_cmd, static) = { .path = "clear ioam rewrite", .short_help = "clear ioam rewrite", .function = clear_ioam_rewrite_command_fn, }; -/* *INDENT-ON* */ clib_error_t * ip6_ioam_enable (int has_trace_option, int has_pot_option, @@ -1371,13 +1359,11 @@ ip6_set_ioam_rewrite_command_fn (vlib_main_t * vm, * Example of how to enable trace and pot with ppc set to encap: * @cliexcmd{set ioam rewrite trace pot ppc encap} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (ip6_set_ioam_rewrite_cmd, static) = { .path = "set ioam rewrite", .short_help = "set ioam [trace] [pot] [seqno] [analyse]", .function = ip6_set_ioam_rewrite_command_fn, }; -/* *INDENT-ON* */ static clib_error_t * ip6_show_ioam_summary_cmd_fn (vlib_main_t * vm, @@ -1455,13 +1441,11 @@ ip6_show_ioam_summary_cmd_fn (vlib_main_t * vm, * EDGE TO EDGE - PPC OPTION - 1 (Encap) * @cliexend ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (ip6_show_ioam_run_cmd, static) = { .path = "show ioam summary", .short_help = "show ioam summary", .function = ip6_show_ioam_summary_cmd_fn, }; -/* *INDENT-ON* */ void vnet_register_ioam_end_of_path_callback (void *cb) diff --git a/src/vnet/ip/ip6_inlines.h b/src/vnet/ip/ip6_inlines.h index 2a4bb70573b..9bd475224eb 100644 --- a/src/vnet/ip/ip6_inlines.h +++ b/src/vnet/ip/ip6_inlines.h @@ -49,29 +49,40 @@ always_inline u32 ip6_compute_flow_hash (const ip6_header_t * ip, flow_hash_config_t flow_hash_config) { - tcp_header_t *tcp; + const tcp_header_t *tcp; + const udp_header_t *udp = (void *) (ip + 1); + const gtpv1u_header_t *gtpu = (void *) (udp + 1); u64 a, b, c; u64 t1, t2; + u32 t3; uword is_tcp_udp = 0; u8 protocol = ip->protocol; + uword is_udp = protocol == IP_PROTOCOL_UDP; - if (PREDICT_TRUE - ((ip->protocol == IP_PROTOCOL_TCP) - || (ip->protocol == IP_PROTOCOL_UDP))) + if (PREDICT_TRUE ((protocol == IP_PROTOCOL_TCP) || is_udp)) { is_tcp_udp = 1; tcp = (void *) (ip + 1); } - else if (ip->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) + else { - ip6_hop_by_hop_header_t *hbh = (ip6_hop_by_hop_header_t *) (ip + 1); - if ((hbh->protocol == IP_PROTOCOL_TCP) || - (hbh->protocol == IP_PROTOCOL_UDP)) + const void *cur = ip + 1; + if (protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) + { + const ip6_hop_by_hop_header_t *hbh = cur; + protocol = hbh->protocol; + cur += (hbh->length + 1) * 8; + } + if (protocol == IP_PROTOCOL_IPV6_FRAGMENTATION) + { + const ip6_fragment_ext_header_t *frag = cur; + protocol = frag->protocol; + } + else if (protocol == IP_PROTOCOL_TCP || protocol == IP_PROTOCOL_UDP) { is_tcp_udp = 1; - tcp = (tcp_header_t *) ((u8 *) hbh + ((hbh->length + 1) << 3)); + tcp = cur; } - protocol = hbh->protocol; } t1 = (ip->src_address.as_u64[0] ^ ip->src_address.as_u64[1]); @@ -113,7 +124,13 @@ ip6_compute_flow_hash (const ip6_header_t * ip, ((flow_hash_config & IP_FLOW_HASH_FL) ? ip6_flow_label_network_order (ip) : 0); c ^= t1; - + if (PREDICT_TRUE (is_udp) && + PREDICT_FALSE ((flow_hash_config & IP_FLOW_HASH_GTPV1_TEID) && + udp->dst_port == GTPV1_PORT_BE)) + { + t3 = gtpu->teid; + a ^= t3; + } hash_mix64 (a, b, c); return (u32) c; } @@ -134,65 +151,17 @@ ip6_compute_flow_hash (const ip6_header_t * ip, * it is a non-first fragment -1 is returned. */ always_inline int -ip6_locate_header (vlib_buffer_t * p0, - ip6_header_t * ip0, int find_hdr_type, u32 * offset) +ip6_locate_header (vlib_buffer_t *b, ip6_header_t *ip, int find_hdr_type, + u32 *offset) { - u8 next_proto = ip0->protocol; - u8 *next_header; - u8 done = 0; - u32 cur_offset; - u8 *temp_nxthdr = 0; - u32 exthdr_len = 0; - - next_header = ip6_next_header (ip0); - cur_offset = sizeof (ip6_header_t); - while (1) + ip6_ext_hdr_chain_t hdr_chain; + int res = ip6_ext_header_walk (b, ip, find_hdr_type, &hdr_chain); + if (res >= 0) { - done = (next_proto == find_hdr_type); - if (PREDICT_FALSE - (next_header >= - (u8 *) vlib_buffer_get_current (p0) + p0->current_length)) - { - //A malicious packet could set an extension header with a too big size - return (-1); - } - if (done) - break; - if ((!ip6_ext_hdr (next_proto)) || next_proto == IP_PROTOCOL_IP6_NONXT) - { - if (find_hdr_type < 0) - break; - return -1; - } - if (next_proto == IP_PROTOCOL_IPV6_FRAGMENTATION) - { - ip6_frag_hdr_t *frag_hdr = (ip6_frag_hdr_t *) next_header; - u16 frag_off = ip6_frag_hdr_offset (frag_hdr); - /* Non first fragment return -1 */ - if (frag_off) - return (-1); - exthdr_len = sizeof (ip6_frag_hdr_t); - temp_nxthdr = next_header + exthdr_len; - } - else if (next_proto == IP_PROTOCOL_IPSEC_AH) - { - exthdr_len = - ip6_ext_authhdr_len (((ip6_ext_header_t *) next_header)); - temp_nxthdr = next_header + exthdr_len; - } - else - { - exthdr_len = - ip6_ext_header_len (((ip6_ext_header_t *) next_header)); - temp_nxthdr = next_header + exthdr_len; - } - next_proto = ((ip6_ext_header_t *) next_header)->next_hdr; - next_header = temp_nxthdr; - cur_offset += exthdr_len; + *offset = hdr_chain.eh[res].offset; + return hdr_chain.eh[res].protocol; } - - *offset = cur_offset; - return (next_proto); + return -1; } diff --git a/src/vnet/ip/ip6_input.c b/src/vnet/ip/ip6_input.c index 01b8f46b4d8..64c9d76ebaa 100644 --- a/src/vnet/ip/ip6_input.c +++ b/src/vnet/ip/ip6_input.c @@ -219,21 +219,12 @@ VLIB_NODE_FN (ip6_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node, return frame->n_vectors; } -#ifndef CLIB_MARCH_VARIANT -char *ip6_error_strings[] = { -#define _(sym,string) string, - foreach_ip6_error -#undef _ -}; -#endif /* CLIB_MARCH_VARIANT */ - -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_input_node) = { .name = "ip6-input", .vector_size = sizeof (u32), .n_errors = IP6_N_ERROR, - .error_strings = ip6_error_strings, + .error_counters = ip6_error_counters, .n_next_nodes = IP6_INPUT_N_NEXT, .next_nodes = { @@ -246,7 +237,6 @@ VLIB_REGISTER_NODE (ip6_input_node) = { .format_buffer = format_ip6_header, .format_trace = format_ip6_input_trace, }; -/* *INDENT-ON* */ static clib_error_t * ip6_init (vlib_main_t * vm) diff --git a/src/vnet/ip/ip6_input.h b/src/vnet/ip/ip6_input.h index fe993caa889..49e37ec1808 100644 --- a/src/vnet/ip/ip6_input.h +++ b/src/vnet/ip/ip6_input.h @@ -43,8 +43,6 @@ #include <vnet/ip/ip.h> #include <vnet/ip/icmp6.h> -extern char *ip6_error_strings[]; - typedef enum { IP6_INPUT_NEXT_DROP, diff --git a/src/vnet/ip/ip6_link.c b/src/vnet/ip/ip6_link.c index afa9d8e3ea9..c2a7ccacbc1 100644 --- a/src/vnet/ip/ip6_link.c +++ b/src/vnet/ip/ip6_link.c @@ -242,12 +242,10 @@ ip6_link_delegate_flush (ip6_link_t * il) { ip6_link_delegate_t *ild; - /* *INDENT-OFF* */ FOREACH_IP6_LINK_DELEGATE (ild, il, ({ il_delegate_vfts[ild->ild_type].ildv_disable(ild->ild_index); })); - /* *INDENT-ON* */ vec_free (il->il_delegates); il->il_delegates = NULL; @@ -357,14 +355,12 @@ ip6_link_set_local_address (u32 sw_if_index, const ip6_address_t * address) ip6_address_copy (&ilp.ilp_addr, address); ip6_ll_table_entry_update (&ilp, FIB_ROUTE_PATH_LOCAL); - /* *INDENT-OFF* */ FOREACH_IP6_LINK_DELEGATE (ild, il, ({ if (NULL != il_delegate_vfts[ild->ild_type].ildv_ll_change) il_delegate_vfts[ild->ild_type].ildv_ll_change(ild->ild_index, &il->il_ll_addr); })); - /* *INDENT-ON* */ return (0); } @@ -465,7 +461,6 @@ ip6_link_add_del_address (ip6_main_t * im, if (NULL == il) return; - /* *INDENT-OFF* */ FOREACH_IP6_LINK_DELEGATE (ild, il, ({ if (is_delete) @@ -481,7 +476,6 @@ ip6_link_add_del_address (ip6_main_t * im, address, address_length); } })); - /* *INDENT-ON* */ } static clib_error_t * @@ -555,14 +549,12 @@ test_ip6_link_command_fn (vlib_main_t * vm, * Original MAC address: 16:d9:e0:91:79:86 * @cliexend ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (test_link_command, static) = { .path = "test ip6 link", .function = test_ip6_link_command_fn, .short_help = "test ip6 link <mac-address>", }; -/* *INDENT-ON* */ static u8 * ip6_print_addrs (u8 * s, u32 * addrs) @@ -594,11 +586,10 @@ format_ip6_link (u8 * s, va_list * arg) if (!ip6_link_is_enabled_i (il)) return (s); - s = format (s, "%U is admin %s\n", - format_vnet_sw_interface_name, vnm, - vnet_get_sw_interface (vnm, il->il_sw_if_index), - (vnet_sw_interface_is_admin_up (vnm, il->il_sw_if_index) ? - "up" : "down")); + s = format ( + s, "%U is admin %s\n", format_vnet_sw_if_index_name, vnm, + il->il_sw_if_index, + (vnet_sw_interface_is_admin_up (vnm, il->il_sw_if_index) ? "up" : "down")); u32 ai; u32 *link_scope = 0, *global_scope = 0; @@ -660,13 +651,11 @@ format_ip6_link (u8 * s, va_list * arg) s = format (s, "%U%U\n", format_white_space, 4, format_ip6_address, &il->il_ll_addr); - /* *INDENT-OFF* */ FOREACH_IP6_LINK_DELEGATE(ild, il, ({ s = format (s, "%U", il_delegate_vfts[ild->ild_type].ildv_format, ild->ild_index, 2); })); - /* *INDENT-ON* */ return (s); } @@ -739,14 +728,12 @@ ip6_link_show (vlib_main_t * vm, * show ip6 interface: IPv6 not enabled on interface * @cliexend ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (ip6_link_show_command, static) = { .path = "show ip6 interface", .function = ip6_link_show, .short_help = "show ip6 interface <interface>", }; -/* *INDENT-ON* */ static clib_error_t * enable_ip6_interface_cmd (vlib_main_t * vm, @@ -779,14 +766,12 @@ enable_ip6_interface_cmd (vlib_main_t * vm, * Example of how enable IPv6 on a given interface: * @cliexcmd{enable ip6 interface GigabitEthernet2/0/0} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (enable_ip6_interface_command, static) = { .path = "enable ip6 interface", .function = enable_ip6_interface_cmd, .short_help = "enable ip6 interface <interface>", }; -/* *INDENT-ON* */ static clib_error_t * disable_ip6_interface_cmd (vlib_main_t * vm, @@ -819,14 +804,12 @@ disable_ip6_interface_cmd (vlib_main_t * vm, * Example of how disable IPv6 on a given interface: * @cliexcmd{disable ip6 interface GigabitEthernet2/0/0} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (disable_ip6_interface_command, static) = { .path = "disable ip6 interface", .function = disable_ip6_interface_cmd, .short_help = "disable ip6 interface <interface>", }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/ip/ip6_ll_table.c b/src/vnet/ip/ip6_ll_table.c index e4010bc43c4..f9172f6c50c 100644 --- a/src/vnet/ip/ip6_ll_table.c +++ b/src/vnet/ip/ip6_ll_table.c @@ -52,9 +52,8 @@ ip6_ll_fib_create (u32 sw_if_index) vnet_main_t *vnm = vnet_get_main (); u8 *desc; - desc = format (NULL, "IP6-link-local:%U", - format_vnet_sw_interface_name, - vnm, vnet_get_sw_interface (vnm, sw_if_index)); + desc = format (NULL, "IP6-link-local:%U", format_vnet_sw_if_index_name, vnm, + sw_if_index); ip6_ll_table.ilt_fibs[sw_if_index] = ip6_fib_table_create_and_lock (FIB_SOURCE_IP6_ND, @@ -64,7 +63,6 @@ ip6_ll_fib_create (u32 sw_if_index) * leave the default route as a drop, but fix fe::/10 to be a glean * via the interface. */ - /* *INDENT-OFF* */ fib_prefix_t pfx = { .fp_proto = FIB_PROTOCOL_IP6, .fp_len = 10, @@ -90,7 +88,6 @@ ip6_ll_fib_create (u32 sw_if_index) 1, NULL, FIB_ROUTE_PATH_FLAG_NONE); - /* *INDENT-ON* */ } static void @@ -111,12 +108,17 @@ ip6_ll_table_entry_update (const ip6_ll_prefix_t * ilp, .frp_flags = flags, .frp_sw_if_index = ilp->ilp_sw_if_index, .frp_proto = DPO_PROTO_IP6, + .frp_fib_index = ~0, + .frp_weight = 1, }; - fib_prefix_t fp; + fib_prefix_t fp = { 0 }; - vec_validate (ip6_ll_table.ilt_fibs, ilp->ilp_sw_if_index); + if (flags & FIB_ROUTE_PATH_LOCAL) + rpath.frp_addr.ip6 = ilp->ilp_addr; - if (0 == ip6_ll_fib_get (ilp->ilp_sw_if_index)) + vec_validate_init_empty (ip6_ll_table.ilt_fibs, ilp->ilp_sw_if_index, ~0); + + if (~0 == ip6_ll_fib_get (ilp->ilp_sw_if_index)) { ip6_ll_fib_create (ilp->ilp_sw_if_index); } @@ -151,11 +153,12 @@ ip6_ll_table_entry_delete (const ip6_ll_prefix_t * ilp) * if there are no ND sourced prefixes left, then we can clean up this FIB */ fib_index = ip6_ll_fib_get (ilp->ilp_sw_if_index); - if (0 == fib_table_get_num_entries (fib_index, - FIB_PROTOCOL_IP6, FIB_SOURCE_IP6_ND)) + if (~0 != fib_index && + 0 == fib_table_get_num_entries (fib_index, FIB_PROTOCOL_IP6, + FIB_SOURCE_IP6_ND)) { fib_table_unlock (fib_index, FIB_PROTOCOL_IP6, FIB_SOURCE_IP6_ND); - ip6_ll_table.ilt_fibs[ilp->ilp_sw_if_index] = 0; + ip6_ll_table.ilt_fibs[ilp->ilp_sw_if_index] = ~0; } } @@ -273,8 +276,7 @@ ip6_ll_show_fib (vlib_main_t * vm, u8 *s = NULL; fib_index = ip6_ll_table.ilt_fibs[sw_if_index]; - - if (0 == fib_index) + if (~0 == fib_index) continue; fib_table = fib_table_get (fib_index, FIB_PROTOCOL_IP6); @@ -345,13 +347,21 @@ ip6_ll_show_fib (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (ip6_show_fib_command, static) = { .path = "show ip6-ll", .short_help = "show ip6-ll [summary] [interface] [<ip6-addr>[/<width>]] [detail]", .function = ip6_ll_show_fib, }; -/* *INDENT-ON* */ + +static clib_error_t * +ip6_ll_sw_interface_add_del (vnet_main_t *vnm, u32 sw_if_index, u32 is_add) +{ + vec_validate_init_empty (ip6_ll_table.ilt_fibs, sw_if_index, ~0); + + return (NULL); +} + +VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip6_ll_sw_interface_add_del); static clib_error_t * ip6_ll_module_init (vlib_main_t * vm) diff --git a/src/vnet/ip/ip6_ll_types.c b/src/vnet/ip/ip6_ll_types.c index a7ac164b05a..b074b6e991c 100644 --- a/src/vnet/ip/ip6_ll_types.c +++ b/src/vnet/ip/ip6_ll_types.c @@ -23,10 +23,8 @@ format_ip6_ll_prefix (u8 * s, va_list * args) ip6_ll_prefix_t *ilp = va_arg (*args, ip6_ll_prefix_t *); vnet_main_t *vnm = vnet_get_main (); - s = format (s, "(%U, %U)", - format_ip6_address, &ilp->ilp_addr, - format_vnet_sw_interface_name, - vnm, vnet_get_sw_interface (vnm, ilp->ilp_sw_if_index)); + s = format (s, "(%U, %U)", format_ip6_address, &ilp->ilp_addr, + format_vnet_sw_if_index_name, vnm, ilp->ilp_sw_if_index); return (s); } diff --git a/src/vnet/ip/ip6_packet.h b/src/vnet/ip/ip6_packet.h index 7a8c31cee48..c506792ddcf 100644 --- a/src/vnet/ip/ip6_packet.h +++ b/src/vnet/ip/ip6_packet.h @@ -40,8 +40,9 @@ #ifndef included_ip6_packet_h #define included_ip6_packet_h -#include <vnet/tcp/tcp_packet.h> +#include <vlib/vlib.h> #include <vnet/ip/ip4_packet.h> +#include <stdbool.h> typedef union { @@ -62,13 +63,11 @@ typedef struct } ip6_address_and_mask_t; /* Packed so that the mhash key doesn't include uninitialized pad bytes */ -/* *INDENT-OFF* */ typedef CLIB_PACKED (struct { /* IP address must be first for ip_interface_address_get_address() to work */ ip6_address_t ip6_addr; u32 fib_index; }) ip6_address_fib_t; -/* *INDENT-ON* */ always_inline void ip6_addr_fib_init (ip6_address_fib_t * addr_fib, @@ -424,97 +423,39 @@ ip6_copy_header (ip6_header_t * dst, const ip6_header_t * src) dst->dst_address.as_uword[1] = src->dst_address.as_uword[1]; } -always_inline void -ip6_tcp_reply_x1 (ip6_header_t * ip0, tcp_header_t * tcp0) -{ - { - ip6_address_t src0, dst0; - - src0 = ip0->src_address; - dst0 = ip0->dst_address; - ip0->src_address = dst0; - ip0->dst_address = src0; - } - - { - u16 src0, dst0; - - src0 = tcp0->src; - dst0 = tcp0->dst; - tcp0->src = dst0; - tcp0->dst = src0; - } -} - -always_inline void -ip6_tcp_reply_x2 (ip6_header_t * ip0, ip6_header_t * ip1, - tcp_header_t * tcp0, tcp_header_t * tcp1) -{ - { - ip6_address_t src0, dst0, src1, dst1; - - src0 = ip0->src_address; - src1 = ip1->src_address; - dst0 = ip0->dst_address; - dst1 = ip1->dst_address; - ip0->src_address = dst0; - ip1->src_address = dst1; - ip0->dst_address = src0; - ip1->dst_address = src1; - } - - { - u16 src0, dst0, src1, dst1; - - src0 = tcp0->src; - src1 = tcp1->src; - dst0 = tcp0->dst; - dst1 = tcp1->dst; - tcp0->src = dst0; - tcp1->src = dst1; - tcp0->dst = src0; - tcp1->dst = src1; - } -} - - -/* *INDENT-OFF* */ typedef CLIB_PACKED (struct { u8 data; }) ip6_pad1_option_t; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ typedef CLIB_PACKED (struct { u8 type; u8 len; u8 data[0]; }) ip6_padN_option_t; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ typedef CLIB_PACKED (struct { #define IP6_MLDP_ALERT_TYPE 0x5 u8 type; u8 len; u16 value; }) ip6_router_alert_option_t; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ +typedef CLIB_PACKED (struct { + u8 protocol; + u8 reserved; + u16 fragoff; + u32 id; +}) ip6_fragment_ext_header_t; + typedef CLIB_PACKED (struct { u8 next_hdr; /* Length of this header plus option data in 8 byte units. */ u8 n_data_u64s; }) ip6_ext_header_t; -/* *INDENT-ON* */ #define foreach_ext_hdr_type \ _(IP6_HOP_BY_HOP_OPTIONS) \ _(IPV6_ROUTE) \ - _(IPV6_FRAGMENTATION) \ - _(IPSEC_ESP) \ - _(IPSEC_AH) \ _(IP6_DESTINATION_OPTIONS) \ _(MOBILITY) \ _(HIP) \ @@ -542,15 +483,70 @@ ip6_ext_hdr (u8 nexthdr) #endif } +typedef CLIB_PACKED (struct { + u8 next_hdr; + /* Length of this header plus option data in 8 byte units. */ + u8 n_data_u64s; + u8 data[0]; +}) ip6_hop_by_hop_ext_t; + +typedef CLIB_PACKED (struct { + u8 next_hdr; + u8 rsv; + u16 fragment_offset_and_more; + u32 identification; +}) ip6_frag_hdr_t; + +#define ip6_frag_hdr_offset(hdr) \ + (clib_net_to_host_u16 ((hdr)->fragment_offset_and_more) >> 3) + +#define ip6_frag_hdr_offset_bytes(hdr) (8 * ip6_frag_hdr_offset (hdr)) + +#define ip6_frag_hdr_more(hdr) \ + (clib_net_to_host_u16 ((hdr)->fragment_offset_and_more) & 0x1) + +#define ip6_frag_hdr_offset_and_more(offset, more) \ + clib_host_to_net_u16 (((offset) << 3) + !!(more)) + #define ip6_ext_header_len(p) ((((ip6_ext_header_t *)(p))->n_data_u64s+1) << 3) #define ip6_ext_authhdr_len(p) ((((ip6_ext_header_t *)(p))->n_data_u64s+2) << 2) +static inline int +ip6_ext_header_len_s (ip_protocol_t nh, void *p) +{ + if (ip6_ext_hdr (nh)) + return ip6_ext_header_len (p); + switch (nh) + { + case IP_PROTOCOL_IPSEC_AH: + return ip6_ext_authhdr_len (p); + case IP_PROTOCOL_IPV6_FRAGMENTATION: + return sizeof (ip6_frag_hdr_t); + case IP_PROTOCOL_ICMP6: + return 4; + case IP_PROTOCOL_UDP: + return 8; + case IP_PROTOCOL_TCP: + return 20; + default: /* Caller is responsible for validating the length of terminating + protocols */ + ; + } + return 0; +} + always_inline void * ip6_ext_next_header (ip6_ext_header_t * ext_hdr) { return (void *) ((u8 *) ext_hdr + ip6_ext_header_len (ext_hdr)); } +always_inline void * +ip6_ext_next_header_offset (void *hdr, u16 offset) +{ + return (hdr + offset); +} + always_inline int vlib_object_within_buffer_data (vlib_main_t * vm, vlib_buffer_t * b, void *obj, size_t len) @@ -562,153 +558,144 @@ vlib_object_within_buffer_data (vlib_main_t * vm, vlib_buffer_t * b, return 1; } -/* - * find ipv6 extension header within ipv6 header within buffer b - * - * @param vm - * @param b buffer to limit search to - * @param ip6_header ipv6 header - * @param header_type extension header type to search for - * @param[out] prev_ext_header address of header preceding found header - */ +/* Returns the number of bytes left in buffer from p. */ +static inline u32 +vlib_bytes_left_in_buffer (vlib_buffer_t *b, void *obj) +{ + return b->current_length - (((u8 *) obj - b->data) - b->current_data); +} + always_inline void * -ip6_ext_header_find (vlib_main_t * vm, vlib_buffer_t * b, - ip6_header_t * ip6_header, u8 header_type, - ip6_ext_header_t ** prev_ext_header) +ip6_ext_next_header_s (ip_protocol_t cur_nh, void *hdr, u32 max_offset, + u32 *offset, int *res_nh, bool *last) { - ip6_ext_header_t *prev = NULL; - ip6_ext_header_t *result = NULL; - if ((ip6_header)->protocol == header_type) + u16 hdrlen = 0; + int new_nh = -1; + void *res = 0; + if (ip6_ext_hdr (cur_nh)) { - result = (void *) (ip6_header + 1); - if (!vlib_object_within_buffer_data (vm, b, result, - ip6_ext_header_len (result))) - { - result = NULL; - } + hdrlen = ip6_ext_header_len (hdr); + new_nh = ((ip6_ext_header_t *) hdr)->next_hdr; + res = hdr + hdrlen; + } + else if (cur_nh == IP_PROTOCOL_IPV6_FRAGMENTATION) + { + ip6_frag_hdr_t *frag_hdr = (ip6_frag_hdr_t *) hdr; + if (ip6_frag_hdr_offset (frag_hdr) > 0) + *last = true; + new_nh = frag_hdr->next_hdr; + hdrlen = sizeof (ip6_frag_hdr_t); + res = hdr + hdrlen; + } + else if (cur_nh == IP_PROTOCOL_IPSEC_AH) + { + new_nh = ((ip6_ext_header_t *) hdr)->next_hdr; + hdrlen = ip6_ext_authhdr_len (hdr); + res = hdr + hdrlen; } else { - result = NULL; - prev = (void *) (ip6_header + 1); - while (ip6_ext_hdr (prev->next_hdr) && prev->next_hdr != header_type) - { - prev = ip6_ext_next_header (prev); - if (!vlib_object_within_buffer_data (vm, b, prev, - ip6_ext_header_len (prev))) - { - prev = NULL; - break; - } - } - if (prev && (prev->next_hdr == header_type)) - { - result = ip6_ext_next_header (prev); - if (!vlib_object_within_buffer_data (vm, b, result, - ip6_ext_header_len (result))) - { - result = NULL; - } - } + ; } - if (prev_ext_header) + + if (res && (*offset + hdrlen) >= max_offset) { - *prev_ext_header = prev; + return 0; } - return result; + *res_nh = new_nh; + *offset += hdrlen; + return res; } +#define IP6_EXT_HDR_MAX (4) /* Maximum number of headers */ +#define IP6_EXT_HDR_MAX_DEPTH (256) /* Maximum header depth */ +typedef struct +{ + int length; + struct + { + u16 protocol; + u16 offset; + } eh[IP6_EXT_HDR_MAX]; +} ip6_ext_hdr_chain_t; + /* - * walk extension headers, looking for a specific extension header and last - * extension header, calculating length of all extension headers + * Find ipv6 extension header within ipv6 header within + * whichever is smallest of buffer or IP6_EXT_HDR_MAX_DEPTH. + * The complete header chain must be in first buffer. * - * @param vm - * @param b buffer to limit search to - * @param ip6_header ipv6 header - * @param find_hdr extension header to look for (ignored if ext_hdr is NULL) - * @param length[out] length of all extension headers - * @param ext_hdr[out] extension header of type find_hdr (may be NULL) - * @param last_ext_hdr[out] last extension header (may be NULL) - * - * @return 0 on success, -1 on failure (ext headers crossing buffer boundary) + * The complete header chain (up to the terminating header) is + * returned in res. + * Returns the index of the find_hdr_type if > 0. Otherwise + * it returns the index of the last header. */ always_inline int -ip6_walk_ext_hdr (vlib_main_t * vm, vlib_buffer_t * b, - const ip6_header_t * ip6_header, u8 find_hdr, u32 * length, - ip6_ext_header_t ** ext_hdr, - ip6_ext_header_t ** last_ext_hdr) -{ - if (!ip6_ext_hdr (ip6_header->protocol)) - { - *length = 0; - *ext_hdr = NULL; - *last_ext_hdr = NULL; - return 0; - } - *length = 0; - ip6_ext_header_t *h = (void *) (ip6_header + 1); - if (!vlib_object_within_buffer_data (vm, b, h, ip6_ext_header_len (h))) +ip6_ext_header_walk (vlib_buffer_t *b, ip6_header_t *ip, int find_hdr_type, + ip6_ext_hdr_chain_t *res) +{ + int i = 0; + int found = -1; + void *next_header = ip6_next_header (ip); + int next_proto = ip->protocol; + res->length = 0; + u32 n_bytes_this_buffer = + clib_min (vlib_bytes_left_in_buffer (b, ip), IP6_EXT_HDR_MAX_DEPTH); + u32 max_offset = clib_min (n_bytes_this_buffer, + sizeof (ip6_header_t) + + clib_net_to_host_u16 (ip->payload_length)); + u32 offset = sizeof (ip6_header_t); + if ((ip6_ext_header_len_s (ip->protocol, next_header) + offset) > max_offset) { return -1; } - *length += ip6_ext_header_len (h); - *last_ext_hdr = h; - *ext_hdr = NULL; - if (ip6_header->protocol == find_hdr) + bool last = false; + while (next_header) { - *ext_hdr = h; + /* Move on to next header */ + res->eh[i].offset = offset; + res->eh[i].protocol = next_proto; + if (next_proto == find_hdr_type) + found = i; + i++; + if (last) + break; + if (i >= IP6_EXT_HDR_MAX) + break; + next_header = ip6_ext_next_header_s (next_proto, next_header, max_offset, + &offset, &next_proto, &last); } - while (ip6_ext_hdr (h->next_hdr)) + res->length = i; + if (find_hdr_type < 0) + return i - 1; + return found != -1 ? found : i - 1; +} + +always_inline void * +ip6_ext_header_find (vlib_main_t *vm, vlib_buffer_t *b, ip6_header_t *ip, + int find_hdr_type, ip6_ext_header_t **prev_ext_header) +{ + ip6_ext_hdr_chain_t hdr_chain; + int res = ip6_ext_header_walk (b, ip, find_hdr_type, &hdr_chain); + if (res < 0) + return 0; + + if (prev_ext_header) { - if (h->next_hdr == find_hdr) + if (res > 0) { - h = ip6_ext_next_header (h); - *ext_hdr = h; + *prev_ext_header = + ip6_ext_next_header_offset (ip, hdr_chain.eh[res - 1].offset); } else { - h = ip6_ext_next_header (h); + *prev_ext_header = 0; } - if (!vlib_object_within_buffer_data (vm, b, h, ip6_ext_header_len (h))) - { - return -1; - } - *length += ip6_ext_header_len (h); - *last_ext_hdr = h; } + if (find_hdr_type == hdr_chain.eh[res].protocol) + return ip6_ext_next_header_offset (ip, hdr_chain.eh[res].offset); return 0; } -/* *INDENT-OFF* */ -typedef CLIB_PACKED (struct { - u8 next_hdr; - /* Length of this header plus option data in 8 byte units. */ - u8 n_data_u64s; - u8 data[0]; -}) ip6_hop_by_hop_ext_t; -/* *INDENT-ON* */ - -/* *INDENT-OFF* */ -typedef CLIB_PACKED (struct { - u8 next_hdr; - u8 rsv; - u16 fragment_offset_and_more; - u32 identification; -}) ip6_frag_hdr_t; -/* *INDENT-ON* */ - -#define ip6_frag_hdr_offset(hdr) \ - (clib_net_to_host_u16((hdr)->fragment_offset_and_more) >> 3) - -#define ip6_frag_hdr_offset_bytes(hdr) \ - (8 * ip6_frag_hdr_offset(hdr)) - -#define ip6_frag_hdr_more(hdr) \ - (clib_net_to_host_u16((hdr)->fragment_offset_and_more) & 0x1) - -#define ip6_frag_hdr_offset_and_more(offset, more) \ - clib_host_to_net_u16(((offset) << 3) + !!(more)) - #endif /* included_ip6_packet_h */ /* diff --git a/src/vnet/ip/ip6_punt_drop.c b/src/vnet/ip/ip6_punt_drop.c index 4edb673c3fa..78ca9521f53 100644 --- a/src/vnet/ip/ip6_punt_drop.c +++ b/src/vnet/ip/ip6_punt_drop.c @@ -18,7 +18,6 @@ #include <vnet/policer/policer.h> #include <vnet/policer/police_inlines.h> -/* *INDENT-OFF* */ VNET_FEATURE_ARC_INIT (ip6_punt) = { .arc_name = "ip6-punt", @@ -30,7 +29,6 @@ VNET_FEATURE_ARC_INIT (ip6_drop) = .arc_name = "ip6-drop", .start_nodes = VNET_FEATURES ("ip6-drop", "ip6-not-enabled"), }; -/* *INDENT-ON* */ extern ip_punt_policer_t ip6_punt_policer_cfg; @@ -77,7 +75,6 @@ VLIB_NODE_FN (ip6_punt_policer_node) (vlib_main_t * vm, } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_punt_policer_node) = { .name = "ip6-punt-policer", @@ -99,7 +96,6 @@ VNET_FEATURE_INIT (ip6_punt_policer_node, static) = { .node_name = "ip6-punt-policer", .runs_before = VNET_FEATURES("ip6-punt-redirect") }; -/* *INDENT-ON* */ VLIB_NODE_FN (ip6_drop_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) @@ -134,7 +130,6 @@ VLIB_NODE_FN (ip6_punt_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vnet_feat_arc_ip6_punt.feature_arc_index); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_drop_node) = { .name = "ip6-drop", @@ -146,15 +141,11 @@ VLIB_REGISTER_NODE (ip6_drop_node) = }, }; -VLIB_REGISTER_NODE (ip6_not_enabled_node) = -{ +VLIB_REGISTER_NODE (ip6_not_enabled_node) = { .name = "ip6-not-enabled", .vector_size = sizeof (u32), .format_trace = format_ip6_forward_next_trace, - .n_next_nodes = 1, - .next_nodes = { - [0] = "error-drop", - }, + .sibling_of = "ip6-drop", }; VLIB_REGISTER_NODE (ip6_punt_node) = @@ -179,7 +170,6 @@ VNET_FEATURE_INIT (ip6_drop_end_of_arc, static) = { .node_name = "error-drop", .runs_before = 0, /* not before any other features */ }; -/* *INDENT-ON */ #ifndef CLIB_MARCH_VARIANT void @@ -243,7 +233,6 @@ done: * @cliexpar * @cliexcmd{set ip punt policer <INDEX>} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (ip6_punt_policer_command, static) = { .path = "ip6 punt policer", @@ -251,7 +240,6 @@ VLIB_CLI_COMMAND (ip6_punt_policer_command, static) = .short_help = "ip6 punt policer [add|del] <index>", }; -/* *INDENT-ON* */ #define foreach_ip6_punt_redirect_error \ _(DROP, "ip6 punt redirect drop") @@ -279,7 +267,6 @@ VLIB_NODE_FN (ip6_punt_redirect_node) (vlib_main_t * vm, FIB_PROTOCOL_IP6)); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_punt_redirect_node) = { .name = "ip6-punt-redirect", .vector_size = sizeof (u32), @@ -301,10 +288,11 @@ VNET_FEATURE_INIT (ip6_punt_redirect_node, static) = { .node_name = "ip6-punt-redirect", .runs_before = VNET_FEATURES("error-punt") }; -/* *INDENT-ON* */ #ifndef CLIB_MARCH_VARIANT +static u32 ip6_punt_redirect_enable_counts; + void ip6_punt_redirect_add_paths (u32 rx_sw_if_index, const fib_route_path_t *rpaths) @@ -313,13 +301,16 @@ ip6_punt_redirect_add_paths (u32 rx_sw_if_index, rx_sw_if_index, FIB_FORW_CHAIN_TYPE_UNICAST_IP6, rpaths); - vnet_feature_enable_disable ("ip6-punt", "ip6-punt-redirect", 0, 1, 0, 0); + if (1 == ++ip6_punt_redirect_enable_counts) + vnet_feature_enable_disable ("ip6-punt", "ip6-punt-redirect", 0, 1, 0, 0); } void ip6_punt_redirect_del (u32 rx_sw_if_index) { - vnet_feature_enable_disable ("ip6-punt", "ip6-punt-redirect", 0, 0, 0, 0); + ASSERT (ip6_punt_redirect_enable_counts); + if (0 == --ip6_punt_redirect_enable_counts) + vnet_feature_enable_disable ("ip6-punt", "ip6-punt-redirect", 0, 0, 0, 0); ip_punt_redirect_del (FIB_PROTOCOL_IP6, rx_sw_if_index); } @@ -392,14 +383,12 @@ done: * @cliexpar * @cliexcmd{set ip punt policer <INDEX>} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (ip6_punt_redirect_command, static) = { .path = "ip6 punt redirect", .function = ip6_punt_redirect_cmd, .short_help = "ip6 punt redirect [add|del] rx [<interface>|all] via [<nh>] <tx_interface>", }; -/* *INDENT-ON* */ #ifndef CLIB_MARCH_VARIANT @@ -420,7 +409,6 @@ ip6_punt_redirect_show_cmd (vlib_main_t * vm, * @cliexpar * @cliexcmd{set ip punt policer <INDEX>} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_ip6_punt_redirect_command, static) = { .path = "show ip6 punt redirect", @@ -428,7 +416,6 @@ VLIB_CLI_COMMAND (show_ip6_punt_redirect_command, static) = .short_help = "show ip6 punt redirect", .is_mp_safe = 1, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/ip/ip6_to_ip4.h b/src/vnet/ip/ip6_to_ip4.h index 6a533e3b54e..29d5718d4da 100644 --- a/src/vnet/ip/ip6_to_ip4.h +++ b/src/vnet/ip/ip6_to_ip4.h @@ -31,7 +31,6 @@ typedef int (*ip6_to_ip4_tcp_udp_set_fn_t) (vlib_buffer_t * b, ip6_header_t * ip6, ip4_header_t * ip4, void *ctx); -/* *INDENT-OFF* */ static u8 icmp6_to_icmp_updater_pointer_table[] = { 0, 1, ~0, ~0, 2, 2, 9, 8, @@ -44,7 +43,6 @@ static u8 icmp6_to_icmp_updater_pointer_table[] = 24, 24, 24, 24, 24, 24, 24, 24 }; -/* *INDENT-ON* */ #define frag_id_6to4(id) ((id) ^ ((id) >> 16)) @@ -62,41 +60,25 @@ static u8 icmp6_to_icmp_updater_pointer_table[] = * @returns 0 on success, non-zero value otherwise. */ static_always_inline int -ip6_parse (vlib_main_t * vm, vlib_buffer_t * b, const ip6_header_t * ip6, - u32 buff_len, u8 * l4_protocol, u16 * l4_offset, - u16 * frag_hdr_offset) +ip6_parse (vlib_main_t *vm, vlib_buffer_t *b, ip6_header_t *ip6, u32 buff_len, + u8 *l4_protocol, u16 *l4_offset, u16 *frag_hdr_offset) { - ip6_ext_header_t *last_hdr, *frag_hdr; - u32 length; - if (ip6_walk_ext_hdr - (vm, b, ip6, IP_PROTOCOL_IPV6_FRAGMENTATION, &length, &frag_hdr, - &last_hdr)) + ip6_ext_hdr_chain_t hdr_chain; + int res = + ip6_ext_header_walk (b, ip6, IP_PROTOCOL_IPV6_FRAGMENTATION, &hdr_chain); + if (res < 0) { return -1; } - - if (length > 0) - { - if (frag_hdr) - { - *frag_hdr_offset = (u8 *) frag_hdr - (u8 *) ip6; - } - else - { - *frag_hdr_offset = 0; - } - *l4_protocol = last_hdr->next_hdr; - } + if (hdr_chain.eh[res].protocol == IP_PROTOCOL_IPV6_FRAGMENTATION) + *frag_hdr_offset = hdr_chain.eh[res].offset; else - { - *frag_hdr_offset = 0; - *l4_protocol = ip6->protocol; - } - *l4_offset = sizeof (*ip6) + length; + *frag_hdr_offset = 0; - return (buff_len < (*l4_offset + 4)) || - (clib_net_to_host_u16 (ip6->payload_length) < - (*l4_offset + 4 - sizeof (*ip6))); + *l4_protocol = hdr_chain.eh[hdr_chain.length - 1].protocol; + *l4_offset = hdr_chain.eh[hdr_chain.length - 1].offset; + + return 0; } /** @@ -124,13 +106,13 @@ ip6_get_port (vlib_main_t * vm, vlib_buffer_t * b, ip6_header_t * ip6, u16 frag_offset; u8 *l4; - if (ip6_parse - (vm, b, ip6, buffer_len, &l4_protocol, &l4_offset, &frag_offset)) - return 0; - + if (ip6_parse (vm, b, ip6, buffer_len, &l4_protocol, &l4_offset, + &frag_offset)) + { + return 0; + } if (frag_offset && - ip6_frag_hdr_offset (((ip6_frag_hdr_t *) - u8_ptr_add (ip6, frag_offset)))) + ip6_frag_hdr_offset (((ip6_frag_hdr_t *) u8_ptr_add (ip6, frag_offset)))) return 0; //Can't deal with non-first fragment for now if (ip_protocol) diff --git a/src/vnet/ip/ip_api.c b/src/vnet/ip/ip_api.c index f9f9ac783d9..644b4988abc 100644 --- a/src/vnet/ip/ip_api.c +++ b/src/vnet/ip/ip_api.c @@ -106,7 +106,6 @@ vl_api_ip_table_dump_t_handler (vl_api_ip_table_dump_t * mp) if (!reg) return; - /* *INDENT-OFF* */ pool_foreach (fib_table, ip4_main.fibs) { send_ip_table_details(am, reg, mp->context, fib_table); @@ -118,7 +117,6 @@ vl_api_ip_table_dump_t_handler (vl_api_ip_table_dump_t * mp) continue; send_ip_table_details(am, reg, mp->context, fib_table); } - /* *INDENT-ON* */ } typedef struct vl_api_ip_fib_dump_walk_ctx_t_ @@ -326,7 +324,6 @@ vl_api_ip_mtable_dump_t_handler (vl_api_ip_mtable_dump_t * mp) if (!reg) return; - /* *INDENT-OFF* */ pool_foreach (mfib_table, ip4_main.mfibs) { send_ip_mtable_details (reg, mp->context, mfib_table); @@ -335,7 +332,6 @@ vl_api_ip_mtable_dump_t_handler (vl_api_ip_mtable_dump_t * mp) { send_ip_mtable_details (reg, mp->context, mfib_table); } - /* *INDENT-ON* */ } typedef struct vl_api_ip_mfib_dump_ctx_t_ @@ -514,7 +510,9 @@ vl_api_add_del_ip_punt_redirect_v2_t_handler ( goto out; if (0 != n_paths) - vec_validate (rpaths, n_paths - 1); + { + vec_validate (rpaths, n_paths - 1); + } for (ii = 0; ii < n_paths; ii++) { @@ -601,6 +599,32 @@ ip_table_delete (fib_protocol_t fproto, u32 table_id, u8 is_api) } } +/* + * Returns an unused table id, and ~0 if it can't find one. + */ +u32 +ip_table_get_unused_id (fib_protocol_t fproto) +{ + int i, j; + static u32 seed = 0; + /* limit to 1M tries */ + for (j = 0; j < 1 << 10; j++) + { + seed = random_u32 (&seed); + for (i = 0; i < 1 << 10; i++) + { + /* look around randomly generated id */ + seed += (2 * (i % 2) - 1) * i; + if (seed == ~0) + continue; + if (fib_table_find (fproto, seed) == ~0) + return seed; + } + } + + return ~0; +} + void vl_api_ip_table_add_del_t_handler (vl_api_ip_table_add_del_t * mp) { @@ -622,6 +646,29 @@ vl_api_ip_table_add_del_t_handler (vl_api_ip_table_add_del_t * mp) REPLY_MACRO (VL_API_IP_TABLE_ADD_DEL_REPLY); } +void +vl_api_ip_table_allocate_t_handler (vl_api_ip_table_allocate_t *mp) +{ + vl_api_ip_table_allocate_reply_t *rmp; + fib_protocol_t fproto = + (mp->table.is_ip6 ? FIB_PROTOCOL_IP6 : FIB_PROTOCOL_IP4); + u32 table_id = ntohl (mp->table.table_id); + int rv = 0; + + if (~0 == table_id) + table_id = ip_table_get_unused_id (fproto); + + if (~0 == table_id) + rv = VNET_API_ERROR_EAGAIN; + else + ip_table_create (fproto, table_id, 1, mp->table.name); + + REPLY_MACRO2 (VL_API_IP_TABLE_ALLOCATE_REPLY, { + clib_memcpy_fast (&rmp->table, &mp->table, sizeof (mp->table)); + rmp->table.table_id = htonl (table_id); + }) +} + static int ip_route_add_del_t_handler (vl_api_ip_route_add_del_t * mp, u32 * stats_index) { @@ -731,12 +778,10 @@ vl_api_ip_route_add_del_t_handler (vl_api_ip_route_add_del_t * mp) rv = ip_route_add_del_t_handler (mp, &stats_index); - /* *INDENT-OFF* */ REPLY_MACRO2 (VL_API_IP_ROUTE_ADD_DEL_REPLY, ({ rmp->stats_index = htonl (stats_index); })) - /* *INDENT-ON* */ } void @@ -788,7 +833,6 @@ vl_api_ip_route_lookup_t_handler (vl_api_ip_route_lookup_t * mp) } } - /* *INDENT-OFF* */ REPLY_MACRO3_ZERO(VL_API_IP_ROUTE_LOOKUP_REPLY, npaths * sizeof (*fp), ({ @@ -808,7 +852,6 @@ vl_api_ip_route_lookup_t_handler (vl_api_ip_route_lookup_t * mp) } } })); - /* *INDENT-ON* */ vec_free (rpaths); } @@ -895,20 +938,14 @@ ip_table_create (fib_protocol_t fproto, fib_index = fib_table_find (fproto, table_id); mfib_index = mfib_table_find (fproto, table_id); - if (~0 == fib_index) - { - fib_table_find_or_create_and_lock_w_name (fproto, table_id, - (is_api ? - FIB_SOURCE_API : - FIB_SOURCE_CLI), name); - } - if (~0 == mfib_index) - { - mfib_table_find_or_create_and_lock_w_name (fproto, table_id, - (is_api ? - MFIB_SOURCE_API : - MFIB_SOURCE_CLI), name); - } + /* + * Always try to re-lock in case the fib was deleted by an API call + * but was not yet freed because some other locks were held + */ + fib_table_find_or_create_and_lock_w_name ( + fproto, table_id, (is_api ? FIB_SOURCE_API : FIB_SOURCE_CLI), name); + mfib_table_find_or_create_and_lock_w_name ( + fproto, table_id, (is_api ? MFIB_SOURCE_API : MFIB_SOURCE_CLI), name); if ((~0 == fib_index) || (~0 == mfib_index)) call_elf_section_ip_table_callbacks (vnm, table_id, 1 /* is_add */ , @@ -936,9 +973,8 @@ mroute_add_del_handler (u8 is_add, { if (is_add) { - mfib_entry_index = - mfib_table_entry_paths_update (fib_index, prefix, - MFIB_SOURCE_API, rpaths); + mfib_entry_index = mfib_table_entry_paths_update ( + fib_index, prefix, MFIB_SOURCE_API, entry_flags, rpaths); } else { @@ -1005,12 +1041,10 @@ vl_api_ip_mroute_add_del_t_handler (vl_api_ip_mroute_add_del_t * mp) rv = api_mroute_add_del_t_handler (mp, &stats_index); - /* *INDENT-OFF* */ REPLY_MACRO2 (VL_API_IP_MROUTE_ADD_DEL_REPLY, ({ rmp->stats_index = htonl (stats_index); })); - /* *INDENT-ON* */ } static void @@ -1073,7 +1107,6 @@ vl_api_ip_address_dump_t_handler (vl_api_ip_address_dump_t * mp) if (mp->is_ipv6) { - /* *INDENT-OFF* */ /* Do not send subnet details of the IP-interface for * unnumbered interfaces. otherwise listening clients * will be confused that the subnet is applied on more @@ -1087,11 +1120,9 @@ vl_api_ip_address_dump_t_handler (vl_api_ip_address_dump_t * mp) }; send_ip_address_details(am, reg, &pfx, sw_if_index, mp->context); })); - /* *INDENT-ON* */ } else { - /* *INDENT-OFF* */ foreach_ip_interface_address (lm4, ia, sw_if_index, 0, ({ fib_prefix_t pfx = { @@ -1102,7 +1133,6 @@ vl_api_ip_address_dump_t_handler (vl_api_ip_address_dump_t * mp) send_ip_address_details(am, reg, &pfx, sw_if_index, mp->context); })); - /* *INDENT-ON* */ } BAD_SW_IF_INDEX_LABEL; @@ -1159,7 +1189,6 @@ vl_api_ip_unnumbered_dump_t_handler (vl_api_ip_unnumbered_dump_t * mp) } else { - /* *INDENT-OFF* */ pool_foreach (si, im->sw_interfaces) { if ((si->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED)) @@ -1170,7 +1199,6 @@ vl_api_ip_unnumbered_dump_t_handler (vl_api_ip_unnumbered_dump_t * mp) mp->context); } } - /* *INDENT-ON* */ } BAD_SW_IF_INDEX_LABEL; @@ -1193,13 +1221,11 @@ vl_api_ip_dump_t_handler (vl_api_ip_dump_t * mp) /* Gather interfaces. */ sorted_sis = vec_new (vnet_sw_interface_t, pool_elts (im->sw_interfaces)); - _vec_len (sorted_sis) = 0; - /* *INDENT-OFF* */ + vec_set_len (sorted_sis, 0); pool_foreach (si, im->sw_interfaces) { vec_add1 (sorted_sis, si[0]); } - /* *INDENT-ON* */ vec_foreach (si, sorted_sis) { @@ -1254,6 +1280,22 @@ vl_api_set_ip_flow_hash_v2_t_handler (vl_api_set_ip_flow_hash_v2_t *mp) } static void +vl_api_set_ip_flow_hash_v3_t_handler (vl_api_set_ip_flow_hash_v3_t *mp) +{ + vl_api_set_ip_flow_hash_v3_reply_t *rmp; + ip_address_family_t af; + int rv; + + rv = ip_address_family_decode (mp->af, &af); + + if (!rv) + rv = ip_flow_hash_set (af, htonl (mp->table_id), + htonl (mp->flow_hash_config)); + + REPLY_MACRO (VL_API_SET_IP_FLOW_HASH_V3_REPLY); +} + +static void vl_api_set_ip_flow_hash_router_id_t_handler ( vl_api_set_ip_flow_hash_router_id_t *mp) { @@ -1663,7 +1705,6 @@ vl_api_ip_table_flush_t_handler (vl_api_ip_table_flush_t * mp) vnet_sw_interface_t *si; /* Shut down interfaces in this FIB / clean out intfc routes */ - /* *INDENT-OFF* */ pool_foreach (si, im->sw_interfaces) { if (fib_index == fib_table_get_index_for_sw_if_index (fproto, @@ -1674,7 +1715,6 @@ vl_api_ip_table_flush_t_handler (vl_api_ip_table_flush_t * mp) vnet_sw_interface_set_flags (vnm, si->sw_if_index, flags); } } - /* *INDENT-ON* */ fib_table_flush (fib_index, fproto, FIB_SOURCE_API); mfib_table_flush (mfib_table_find (fproto, ntohl (mp->table.table_id)), @@ -1831,6 +1871,30 @@ void REPLY_MACRO (VL_API_IP_REASSEMBLY_ENABLE_DISABLE_REPLY); } +void +vl_api_ip_local_reass_enable_disable_t_handler ( + vl_api_ip_local_reass_enable_disable_t *mp) +{ + vl_api_ip_local_reass_enable_disable_reply_t *rmp; + int rv = 0; + + ip4_local_full_reass_enable_disable (mp->enable_ip4); + ip6_local_full_reass_enable_disable (mp->enable_ip6); + + REPLY_MACRO (VL_API_IP_LOCAL_REASS_ENABLE_DISABLE_REPLY); +} + +void +vl_api_ip_local_reass_get_t_handler (vl_api_ip_local_reass_get_t *mp) +{ + vl_api_ip_local_reass_get_reply_t *rmp; + int rv = 0; + REPLY_MACRO2 (VL_API_IP_LOCAL_REASS_GET, { + rmp->ip4_is_enabled = ip4_local_full_reass_enabled (); + rmp->ip6_is_enabled = ip6_local_full_reass_enabled (); + }); +} + static walk_rc_t send_ip_punt_redirect_details (u32 rx_sw_if_index, const ip_punt_redirect_rx_t * ipr, void *arg) @@ -2049,17 +2113,21 @@ ip_api_hookup (vlib_main_t * vm) api_main_t *am = vlibapi_get_main (); /* - * Mark the route add/del API as MP safe + * Set up the (msg_name, crc, message-id) table */ - am->is_mp_safe[VL_API_IP_ROUTE_ADD_DEL] = 1; - am->is_mp_safe[VL_API_IP_ROUTE_ADD_DEL_REPLY] = 1; - am->is_mp_safe[VL_API_IP_ROUTE_ADD_DEL_V2] = 1; - am->is_mp_safe[VL_API_IP_ROUTE_ADD_DEL_V2_REPLY] = 1; + REPLY_MSG_ID_BASE = setup_message_id_table (); /* - * Set up the (msg_name, crc, message-id) table + * Mark the route add/del API as MP safe */ - REPLY_MSG_ID_BASE = setup_message_id_table (); + vl_api_set_msg_thread_safe (am, REPLY_MSG_ID_BASE + VL_API_IP_ROUTE_ADD_DEL, + 1); + vl_api_set_msg_thread_safe ( + am, REPLY_MSG_ID_BASE + VL_API_IP_ROUTE_ADD_DEL_REPLY, 1); + vl_api_set_msg_thread_safe ( + am, REPLY_MSG_ID_BASE + VL_API_IP_ROUTE_ADD_DEL_V2, 1); + vl_api_set_msg_thread_safe ( + am, REPLY_MSG_ID_BASE + VL_API_IP_ROUTE_ADD_DEL_V2_REPLY, 1); return 0; } diff --git a/src/vnet/ip/ip_checksum.c b/src/vnet/ip/ip_checksum.c index 1ac7248ea05..4fbf1fb74fa 100644 --- a/src/vnet/ip/ip_checksum.c +++ b/src/vnet/ip/ip_checksum.c @@ -165,14 +165,12 @@ test_ip_checksum_fn (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (test_checksum, static) = { .path = "test ip checksum", .short_help = "test ip checksum", .function = test_ip_checksum_fn, }; -/* *INDENT-ON* */ #endif /* CLIB_DEBUG */ diff --git a/src/vnet/ip/ip_container_proxy.c b/src/vnet/ip/ip_container_proxy.c index 18d07ba6082..1618704e804 100644 --- a/src/vnet/ip/ip_container_proxy.c +++ b/src/vnet/ip/ip_container_proxy.c @@ -138,7 +138,6 @@ ip_container_proxy_walk (ip_container_proxy_cb_t cb, void *ctx) }; u32 fib_index; - /* *INDENT-OFF* */ pool_foreach_index (fib_index, ip4_main.fibs) { fib_table_walk (fib_index, FIB_PROTOCOL_IP4, @@ -149,7 +148,6 @@ ip_container_proxy_walk (ip_container_proxy_cb_t cb, void *ctx) fib_table_walk (fib_index, FIB_PROTOCOL_IP6, ip_container_proxy_fib_table_walk, &wctx); } - /* *INDENT-ON* */ } clib_error_t * @@ -216,14 +214,12 @@ ip_container_cmd (vlib_main_t * vm, return (NULL); } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (ip_container_command_node, static) = { .path = "ip container", .function = ip_container_cmd, .short_help = "ip container <address> <interface>", .is_mp_safe = 1, }; -/* *INDENT-ON* */ clib_error_t * show_ip_container_cmd_fn (vlib_main_t * vm, unformat_input_t * main_input, @@ -275,14 +271,12 @@ show_ip_container_cmd_fn (vlib_main_t * vm, unformat_input_t * main_input, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_ip_container_command, static) = { .path = "show ip container", .function = show_ip_container_cmd_fn, .short_help = "show ip container <address> <interface>", .is_mp_safe = 1, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/ip/ip_flow_hash.h b/src/vnet/ip/ip_flow_hash.h index bd37ef7307b..30dfcd70a1b 100644 --- a/src/vnet/ip/ip_flow_hash.h +++ b/src/vnet/ip/ip_flow_hash.h @@ -38,7 +38,17 @@ _ (proto, 4, IP_FLOW_HASH_PROTO) \ _ (reverse, 5, IP_FLOW_HASH_REVERSE_SRC_DST) \ _ (symmetric, 6, IP_FLOW_HASH_SYMMETRIC) \ - _ (flowlabel, 7, IP_FLOW_HASH_FL) + _ (flowlabel, 7, IP_FLOW_HASH_FL) \ + _ (gtpv1teid, 8, IP_FLOW_HASH_GTPV1_TEID) + +typedef struct +{ + u8 ver_flags; + u8 type; + u16 length; + u32 teid; +} __attribute__ ((packed)) gtpv1u_header_t; +#define GTPV1_PORT_BE 0x6808 /** * A flow hash configuration is a mask of the flow hash options diff --git a/src/vnet/ip/ip_frag.c b/src/vnet/ip/ip_frag.c index cafa9a66d6b..934e40a5d18 100644 --- a/src/vnet/ip/ip_frag.c +++ b/src/vnet/ip/ip_frag.c @@ -25,10 +25,10 @@ typedef struct { - u8 ipv6; u16 mtu; u8 next; u16 n_fragments; + u16 pkt_size; } ip_frag_trace_t; static u8 * @@ -37,8 +37,8 @@ format_ip_frag_trace (u8 * s, va_list * args) CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); ip_frag_trace_t *t = va_arg (*args, ip_frag_trace_t *); - s = format (s, "IPv%s mtu: %u fragments: %u next: %d", - t->ipv6 ? "6" : "4", t->mtu, t->n_fragments, t->next); + s = format (s, "mtu: %u pkt-size: %u fragments: %u next: %d", t->mtu, + t->pkt_size, t->n_fragments, t->next); return s; } @@ -95,7 +95,7 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu, { vlib_buffer_t *from_b; ip4_header_t *ip4; - u16 len, max, rem, ip_frag_id, ip_frag_offset; + u16 len, max, rem, ip_frag_id, ip_frag_offset, head_bytes; u8 *org_from_packet, more; from_b = vlib_get_buffer (vm, from_bi); @@ -103,9 +103,9 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu, ip4 = vlib_buffer_get_current (from_b) + l2unfragmentablesize; rem = clib_net_to_host_u16 (ip4->length) - sizeof (ip4_header_t); - max = - (clib_min (mtu, vlib_buffer_get_default_data_size (vm)) - - sizeof (ip4_header_t)) & ~0x7; + head_bytes = sizeof (ip4_header_t) + l2unfragmentablesize; + max = (clib_min (mtu, vlib_buffer_get_default_data_size (vm)) - head_bytes) & + ~0x7; if (rem > (vlib_buffer_length_in_chain (vm, from_b) - sizeof (ip4_header_t))) @@ -142,8 +142,7 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu, u8 *from_data = (void *) (ip4 + 1); vlib_buffer_t *org_from_b = from_b; u16 fo = 0; - u16 left_in_from_buffer = - from_b->current_length - (l2unfragmentablesize + sizeof (ip4_header_t)); + u16 left_in_from_buffer = from_b->current_length - head_bytes; u16 ptr = 0; /* Do the actual fragmentation */ @@ -166,8 +165,7 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu, /* Copy ip4 header */ to_data = vlib_buffer_get_current (to_b); - clib_memcpy_fast (to_data, org_from_packet, - l2unfragmentablesize + sizeof (ip4_header_t)); + clib_memcpy_fast (to_data, org_from_packet, head_bytes); to_ip4 = (ip4_header_t *) (to_data + l2unfragmentablesize); to_data = (void *) (to_ip4 + 1); vnet_buffer (to_b)->l3_hdr_offset = to_b->current_data; @@ -213,8 +211,7 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu, } to_b->flags |= VNET_BUFFER_F_IS_IP4; - to_b->current_length = - len + sizeof (ip4_header_t) + l2unfragmentablesize; + to_b->current_length = len + head_bytes; to_ip4->fragment_id = ip_frag_id; to_ip4->flags_and_fragment_offset = @@ -286,7 +283,7 @@ frag_node_inline (vlib_main_t * vm, vlib_node_runtime_t * node, ip_frag_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr)); tr->mtu = mtu; - tr->ipv6 = is_ip6 ? 1 : 0; + tr->pkt_size = vlib_buffer_length_in_chain (vm, p0); tr->n_fragments = vec_len (buffer); tr->next = vnet_buffer (p0)->ip_frag.next_index; } @@ -385,13 +382,17 @@ ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu, ip6_header_t *ip6; u16 len, max, rem, ip_frag_id; u8 *org_from_packet; + u16 head_bytes; from_b = vlib_get_buffer (vm, from_bi); org_from_packet = vlib_buffer_get_current (from_b); ip6 = vlib_buffer_get_current (from_b) + l2unfragmentablesize; + head_bytes = + (sizeof (ip6_header_t) + sizeof (ip6_frag_hdr_t) + l2unfragmentablesize); rem = clib_net_to_host_u16 (ip6->payload_length); - max = (mtu - sizeof (ip6_header_t) - sizeof (ip6_frag_hdr_t)) & ~0x7; // TODO: Is max correct?? + max = (clib_min (mtu, vlib_buffer_get_default_data_size (vm)) - head_bytes) & + ~0x7; if (rem > (vlib_buffer_length_in_chain (vm, from_b) - sizeof (ip6_header_t))) @@ -423,9 +424,7 @@ ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu, ip6_frag_hdr_t *to_frag_hdr; u8 *to_data; - len = - (rem > - (mtu - sizeof (ip6_header_t) - sizeof (ip6_frag_hdr_t)) ? max : rem); + len = (rem > max ? max : rem); if (len != rem) /* Last fragment does not need to divisible by 8 */ len &= ~0x7; if ((to_b = frag_buffer_alloc (org_from_b, &to_bi)) == 0) @@ -438,7 +437,7 @@ ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu, /* Copy ip6 header */ clib_memcpy_fast (to_b->data, org_from_packet, l2unfragmentablesize + sizeof (ip6_header_t)); - to_ip6 = vlib_buffer_get_current (to_b); + to_ip6 = vlib_buffer_get_current (to_b) + l2unfragmentablesize; to_frag_hdr = (ip6_frag_hdr_t *) (to_ip6 + 1); to_data = (void *) (to_frag_hdr + 1); @@ -484,8 +483,7 @@ ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu, to_ptr += bytes_to_copy; } - to_b->current_length = - len + sizeof (ip6_header_t) + sizeof (ip6_frag_hdr_t); + to_b->current_length = len + head_bytes; to_ip6->payload_length = clib_host_to_net_u16 (len + sizeof (ip6_frag_hdr_t)); to_ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION; @@ -502,13 +500,6 @@ ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu, return IP_FRAG_ERROR_NONE; } -static char *ip4_frag_error_strings[] = { -#define _(sym,string) string, - foreach_ip_frag_error -#undef _ -}; - -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_frag_node) = { .function = ip4_frag, .name = IP4_FRAG_NODE_NAME, @@ -517,21 +508,17 @@ VLIB_REGISTER_NODE (ip4_frag_node) = { .type = VLIB_NODE_TYPE_INTERNAL, .n_errors = IP_FRAG_N_ERROR, - .error_strings = ip4_frag_error_strings, + .error_counters = ip_frag_error_counters, .n_next_nodes = IP_FRAG_N_NEXT, - .next_nodes = { - [IP_FRAG_NEXT_IP_REWRITE] = "ip4-rewrite", - [IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN] = "ip4-midchain", - [IP_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup", - [IP_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup", - [IP_FRAG_NEXT_ICMP_ERROR] = "ip4-icmp-error", - [IP_FRAG_NEXT_DROP] = "ip4-drop" - }, + .next_nodes = { [IP_FRAG_NEXT_IP_REWRITE] = "ip4-rewrite", + [IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN] = "ip4-midchain", + [IP_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup", + [IP_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup", + [IP_FRAG_NEXT_ICMP_ERROR] = "ip4-icmp-error", + [IP_FRAG_NEXT_DROP] = "ip4-drop" }, }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_frag_node) = { .function = ip6_frag, .name = IP6_FRAG_NODE_NAME, @@ -540,19 +527,16 @@ VLIB_REGISTER_NODE (ip6_frag_node) = { .type = VLIB_NODE_TYPE_INTERNAL, .n_errors = IP_FRAG_N_ERROR, - .error_strings = ip4_frag_error_strings, + .error_counters = ip_frag_error_counters, .n_next_nodes = IP_FRAG_N_NEXT, - .next_nodes = { - [IP_FRAG_NEXT_IP_REWRITE] = "ip6-rewrite", - [IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN] = "ip6-midchain", - [IP_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup", - [IP_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup", - [IP_FRAG_NEXT_ICMP_ERROR] = "error-drop", - [IP_FRAG_NEXT_DROP] = "ip6-drop" - }, + .next_nodes = { [IP_FRAG_NEXT_IP_REWRITE] = "ip6-rewrite", + [IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN] = "ip6-midchain", + [IP_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup", + [IP_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup", + [IP_FRAG_NEXT_ICMP_ERROR] = "error-drop", + [IP_FRAG_NEXT_DROP] = "ip6-drop" }, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/ip/ip_frag.h b/src/vnet/ip/ip_frag.h index 86462e6c7d2..4ddd62b89e6 100644 --- a/src/vnet/ip/ip_frag.h +++ b/src/vnet/ip/ip_frag.h @@ -36,6 +36,7 @@ #define IP_FRAG_H #include <vnet/vnet.h> +#include <vnet/ip/ip.api_enum.h> #define IP_FRAG_FLAG_IP4_HEADER 0x01 //Encapsulating IPv4 header #define IP_FRAG_FLAG_IP6_HEADER 0x02 //Encapsulating IPv6 header @@ -57,24 +58,7 @@ typedef enum IP_FRAG_N_NEXT } ip_frag_next_t; -#define foreach_ip_frag_error \ - /* Must be first. */ \ - _(NONE, "packet fragmented") \ - _(SMALL_PACKET, "packet smaller than MTU") \ - _(FRAGMENT_SENT, "number of sent fragments") \ - _(CANT_FRAGMENT_HEADER, "can't fragment header") \ - _(DONT_FRAGMENT_SET, "can't fragment this packet") \ - _(MALFORMED, "malformed packet") \ - _(MEMORY, "could not allocate buffer") \ - _(UNKNOWN, "unknown error") - -typedef enum -{ -#define _(sym,str) IP_FRAG_ERROR_##sym, - foreach_ip_frag_error -#undef _ - IP_FRAG_N_ERROR, -} ip_frag_error_t; +typedef vl_counter_ip_frag_enum_t ip_frag_error_t; void ip_frag_set_vnet_buffer (vlib_buffer_t * b, u16 mtu, u8 next_index, u8 flags); diff --git a/src/vnet/ip/ip_in_out_acl.c b/src/vnet/ip/ip_in_out_acl.c index a5e652e1ee8..eb3c94a188a 100644 --- a/src/vnet/ip/ip_in_out_acl.c +++ b/src/vnet/ip/ip_in_out_acl.c @@ -32,11 +32,26 @@ format_ip_in_out_acl_trace (u8 * s, u32 is_output, va_list * args) CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); ip_in_out_acl_trace_t *t = va_arg (*args, ip_in_out_acl_trace_t *); - - s = format (s, "%s: sw_if_index %d, next_index %d, table %d, offset %d", - is_output ? "OUTACL" : "INACL", - t->sw_if_index, t->next_index, t->table_index, t->offset); - return s; + const vnet_classify_main_t *vcm = &vnet_classify_main; + const u32 indent = format_get_indent (s); + vnet_classify_table_t *table; + vnet_classify_entry_t *e; + + s = + format (s, "%s: sw_if_index %d, next_index %d, table_index %d, offset %d", + is_output ? "OUTACL" : "INACL", t->sw_if_index, t->next_index, + t->table_index, t->offset); + + if (pool_is_free_index (vcm->tables, t->table_index)) + return format (s, "\n%Uno table", format_white_space, indent + 4); + + if (~0 == t->offset) + return format (s, "\n%Uno match", format_white_space, indent + 4); + + table = vnet_classify_table_get (t->table_index); + e = vnet_classify_get_entry (table, t->offset); + return format (s, "\n%U%U", format_white_space, indent + 4, + format_classify_entry, table, e); } static u8 * @@ -97,57 +112,40 @@ static char *ip_outacl_error_strings[] = { }; static_always_inline void -ip_in_out_acl_inline (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_buffer_t ** b, - u16 * next, u32 n_left, int is_ip4, int is_output, - int do_trace) +ip_in_out_acl_inline_trace ( + vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame, + vlib_buffer_t **b, u16 *next, u32 n_left, u32 *hits__, u32 *misses__, + u32 *chain_hits__, const vlib_error_t error_none, + const vlib_error_t error_deny, const vlib_error_t error_miss, + vnet_classify_table_t *tables, const u32 *table_index_by_sw_if_index, + u32 *fib_index_by_sw_if_index, vnet_config_main_t *cm, + const vlib_rx_or_tx_t way, const int is_output, const int do_trace) { - in_out_acl_main_t *am = &in_out_acl_main; - vnet_classify_main_t *vcm = am->vnet_classify_main; f64 now = vlib_time_now (vm); u32 hits = 0; u32 misses = 0; u32 chain_hits = 0; - in_out_acl_table_id_t tid; - vlib_node_runtime_t *error_node; - u32 n_next_nodes; - + u32 n_next_nodes = node->n_next_nodes; u8 *h[4]; u32 sw_if_index[4]; u32 table_index[4]; vnet_classify_table_t *t[4] = { 0, 0 }; - u64 hash[4]; - - n_next_nodes = node->n_next_nodes; - - if (is_ip4) - { - tid = IN_OUT_ACL_TABLE_IP4; - error_node = vlib_node_get_runtime (vm, ip4_input_node.index); - } - else - { - tid = IN_OUT_ACL_TABLE_IP6; - error_node = vlib_node_get_runtime (vm, ip6_input_node.index); - } + u32 hash[4]; /* calculate hashes for b[0] & b[1] */ if (n_left >= 2) { - sw_if_index[2] = - vnet_buffer (b[0])->sw_if_index[is_output ? VLIB_TX : VLIB_RX]; - sw_if_index[3] = - vnet_buffer (b[1])->sw_if_index[is_output ? VLIB_TX : VLIB_RX]; + /* ~0 is used as a wildcard to say 'always use sw_if_index 0' + * aka local0. It is used when we do not care about the sw_if_index, as + * when punting */ + sw_if_index[2] = ~0 == way ? 0 : vnet_buffer (b[0])->sw_if_index[way]; + sw_if_index[3] = ~0 == way ? 0 : vnet_buffer (b[1])->sw_if_index[way]; - table_index[2] = - am->classify_table_index_by_sw_if_index[is_output][tid] - [sw_if_index[2]]; - table_index[3] = - am->classify_table_index_by_sw_if_index[is_output][tid] - [sw_if_index[3]]; + table_index[2] = table_index_by_sw_if_index[sw_if_index[2]]; + table_index[3] = table_index_by_sw_if_index[sw_if_index[3]]; - t[2] = pool_elt_at_index (vcm->tables, table_index[2]); - t[3] = pool_elt_at_index (vcm->tables, table_index[3]); + t[2] = pool_elt_at_index (tables, table_index[2]); + t[3] = pool_elt_at_index (tables, table_index[3]); if (t[2]->current_data_flag == CLASSIFY_FLAG_USE_CURR_DATA) h[2] = @@ -164,16 +162,16 @@ ip_in_out_acl_inline (vlib_main_t * vm, if (is_output) { /* Save the rewrite length, since we are using the l2_classify struct */ - vnet_buffer (b[0])->l2_classify.pad.l2_len = + vnet_buffer (b[0])->l2.l2_len = vnet_buffer (b[0])->ip.save_rewrite_length; /* advance the match pointer so the matching happens on IP header */ - h[2] += vnet_buffer (b[0])->l2_classify.pad.l2_len; + h[2] += vnet_buffer (b[0])->l2.l2_len; /* Save the rewrite length, since we are using the l2_classify struct */ - vnet_buffer (b[1])->l2_classify.pad.l2_len = + vnet_buffer (b[1])->l2.l2_len = vnet_buffer (b[1])->ip.save_rewrite_length; /* advance the match pointer so the matching happens on IP header */ - h[3] += vnet_buffer (b[1])->l2_classify.pad.l2_len; + h[3] += vnet_buffer (b[1])->l2.l2_len; } hash[2] = vnet_classify_hash_packet_inline (t[2], (u8 *) h[2]); @@ -198,7 +196,6 @@ ip_in_out_acl_inline (vlib_main_t * vm, { vnet_classify_entry_t *e[2] = { 0, 0 }; u32 _next[2] = { ACL_NEXT_INDEX_DENY, ACL_NEXT_INDEX_DENY }; - u8 error[2]; h[0] = h[2]; h[1] = h[3]; @@ -228,19 +225,15 @@ ip_in_out_acl_inline (vlib_main_t * vm, if (n_left >= 4) { sw_if_index[2] = - vnet_buffer (b[2])->sw_if_index[is_output ? VLIB_TX : VLIB_RX]; + ~0 == way ? 0 : vnet_buffer (b[2])->sw_if_index[way]; sw_if_index[3] = - vnet_buffer (b[3])->sw_if_index[is_output ? VLIB_TX : VLIB_RX]; + ~0 == way ? 0 : vnet_buffer (b[3])->sw_if_index[way]; - table_index[2] = - am->classify_table_index_by_sw_if_index[is_output][tid] - [sw_if_index[2]]; - table_index[3] = - am->classify_table_index_by_sw_if_index[is_output][tid] - [sw_if_index[3]]; + table_index[2] = table_index_by_sw_if_index[sw_if_index[2]]; + table_index[3] = table_index_by_sw_if_index[sw_if_index[3]]; - t[2] = pool_elt_at_index (vcm->tables, table_index[2]); - t[3] = pool_elt_at_index (vcm->tables, table_index[3]); + t[2] = pool_elt_at_index (tables, table_index[2]); + t[3] = pool_elt_at_index (tables, table_index[3]); if (t[2]->current_data_flag == CLASSIFY_FLAG_USE_CURR_DATA) h[2] = @@ -259,16 +252,16 @@ ip_in_out_acl_inline (vlib_main_t * vm, if (is_output) { /* Save the rewrite length, since we are using the l2_classify struct */ - vnet_buffer (b[2])->l2_classify.pad.l2_len = + vnet_buffer (b[2])->l2.l2_len = vnet_buffer (b[2])->ip.save_rewrite_length; /* advance the match pointer so the matching happens on IP header */ - h[2] += vnet_buffer (b[2])->l2_classify.pad.l2_len; + h[2] += vnet_buffer (b[2])->l2.l2_len; /* Save the rewrite length, since we are using the l2_classify struct */ - vnet_buffer (b[3])->l2_classify.pad.l2_len = + vnet_buffer (b[3])->l2.l2_len = vnet_buffer (b[3])->ip.save_rewrite_length; /* advance the match pointer so the matching happens on IP header */ - h[3] += vnet_buffer (b[3])->l2_classify.pad.l2_len; + h[3] += vnet_buffer (b[3])->l2.l2_len; } hash[2] = vnet_classify_hash_packet_inline (t[2], (u8 *) h[2]); @@ -292,11 +285,9 @@ ip_in_out_acl_inline (vlib_main_t * vm, } /* find entry for b[0] & b[1] */ - vnet_get_config_data (am->vnet_config_main[is_output][tid], - &b[0]->current_config_index, &_next[0], + vnet_get_config_data (cm, &b[0]->current_config_index, &_next[0], /* # bytes of config data */ 0); - vnet_get_config_data (am->vnet_config_main[is_output][tid], - &b[1]->current_config_index, &_next[1], + vnet_get_config_data (cm, &b[1]->current_config_index, &_next[1], /* # bytes of config data */ 0); if (PREDICT_TRUE (table_index[0] != ~0)) @@ -314,15 +305,8 @@ ip_in_out_acl_inline (vlib_main_t * vm, hits++; - if (is_ip4) - error[0] = (_next[0] == ACL_NEXT_INDEX_DENY) ? - (is_output ? IP4_ERROR_OUTACL_SESSION_DENY : - IP4_ERROR_INACL_SESSION_DENY) : IP4_ERROR_NONE; - else - error[0] = (_next[0] == ACL_NEXT_INDEX_DENY) ? - (is_output ? IP6_ERROR_OUTACL_SESSION_DENY : - IP6_ERROR_INACL_SESSION_DENY) : IP6_ERROR_NONE; - b[0]->error = error_node->errors[error[0]]; + b[0]->error = + (_next[0] == ACL_NEXT_INDEX_DENY) ? error_deny : error_none; if (!is_output) { @@ -330,17 +314,22 @@ ip_in_out_acl_inline (vlib_main_t * vm, e[0]->action == CLASSIFY_ACTION_SET_IP6_FIB_INDEX) vnet_buffer (b[0])->sw_if_index[VLIB_TX] = e[0]->metadata; else if (e[0]->action == CLASSIFY_ACTION_SET_METADATA) - vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = - e[0]->metadata; + { + vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = + e[0]->metadata; + /* For source check in case we skip the lookup node */ + ip_lookup_set_buffer_fib_index (fib_index_by_sw_if_index, + b[0]); + } } } else { while (1) { - if (PREDICT_TRUE (t[0]->next_table_index != ~0)) - t[0] = pool_elt_at_index (vcm->tables, - t[0]->next_table_index); + table_index[0] = t[0]->next_table_index; + if (PREDICT_TRUE (table_index[0] != ~0)) + t[0] = pool_elt_at_index (tables, table_index[0]); else { _next[0] = (t[0]->miss_next_index < n_next_nodes) ? @@ -348,15 +337,9 @@ ip_in_out_acl_inline (vlib_main_t * vm, misses++; - if (is_ip4) - error[0] = (_next[0] == ACL_NEXT_INDEX_DENY) ? - (is_output ? IP4_ERROR_OUTACL_TABLE_MISS : - IP4_ERROR_INACL_TABLE_MISS) : IP4_ERROR_NONE; - else - error[0] = (_next[0] == ACL_NEXT_INDEX_DENY) ? - (is_output ? IP6_ERROR_OUTACL_TABLE_MISS : - IP6_ERROR_INACL_TABLE_MISS) : IP6_ERROR_NONE; - b[0]->error = error_node->errors[error[0]]; + b[0]->error = (_next[0] == ACL_NEXT_INDEX_DENY) ? + error_miss : + error_none; break; } @@ -369,7 +352,7 @@ ip_in_out_acl_inline (vlib_main_t * vm, /* advance the match pointer so the matching happens on IP header */ if (is_output) - h[0] += vnet_buffer (b[0])->l2_classify.pad.l2_len; + h[0] += vnet_buffer (b[0])->l2.l2_len; hash[0] = vnet_classify_hash_packet_inline (t[0], (u8 *) h[0]); @@ -386,15 +369,9 @@ ip_in_out_acl_inline (vlib_main_t * vm, hits++; chain_hits++; - if (is_ip4) - error[0] = (_next[0] == ACL_NEXT_INDEX_DENY) ? - (is_output ? IP4_ERROR_OUTACL_SESSION_DENY : - IP4_ERROR_INACL_SESSION_DENY) : IP4_ERROR_NONE; - else - error[0] = (_next[0] == ACL_NEXT_INDEX_DENY) ? - (is_output ? IP6_ERROR_OUTACL_SESSION_DENY : - IP6_ERROR_INACL_SESSION_DENY) : IP6_ERROR_NONE; - b[0]->error = error_node->errors[error[0]]; + b[0]->error = (_next[0] == ACL_NEXT_INDEX_DENY) ? + error_deny : + error_none; if (!is_output) { @@ -406,8 +383,14 @@ ip_in_out_acl_inline (vlib_main_t * vm, e[0]->metadata; else if (e[0]->action == CLASSIFY_ACTION_SET_METADATA) - vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = - e[0]->metadata; + { + vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = + e[0]->metadata; + /* For source check in case we skip the lookup + * node */ + ip_lookup_set_buffer_fib_index ( + fib_index_by_sw_if_index, b[0]); + } } break; } @@ -430,15 +413,8 @@ ip_in_out_acl_inline (vlib_main_t * vm, hits++; - if (is_ip4) - error[1] = (_next[1] == ACL_NEXT_INDEX_DENY) ? - (is_output ? IP4_ERROR_OUTACL_SESSION_DENY : - IP4_ERROR_INACL_SESSION_DENY) : IP4_ERROR_NONE; - else - error[1] = (_next[1] == ACL_NEXT_INDEX_DENY) ? - (is_output ? IP6_ERROR_OUTACL_SESSION_DENY : - IP6_ERROR_INACL_SESSION_DENY) : IP6_ERROR_NONE; - b[1]->error = error_node->errors[error[1]]; + b[1]->error = + (_next[1] == ACL_NEXT_INDEX_DENY) ? error_deny : error_none; if (!is_output) { @@ -446,17 +422,22 @@ ip_in_out_acl_inline (vlib_main_t * vm, e[1]->action == CLASSIFY_ACTION_SET_IP6_FIB_INDEX) vnet_buffer (b[1])->sw_if_index[VLIB_TX] = e[1]->metadata; else if (e[1]->action == CLASSIFY_ACTION_SET_METADATA) - vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = - e[1]->metadata; + { + vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = + e[1]->metadata; + /* For source check in case we skip the lookup node */ + ip_lookup_set_buffer_fib_index (fib_index_by_sw_if_index, + b[1]); + } } } else { while (1) { - if (PREDICT_TRUE (t[1]->next_table_index != ~0)) - t[1] = pool_elt_at_index (vcm->tables, - t[1]->next_table_index); + table_index[1] = t[1]->next_table_index; + if (PREDICT_TRUE (table_index[1] != ~0)) + t[1] = pool_elt_at_index (tables, table_index[1]); else { _next[1] = (t[1]->miss_next_index < n_next_nodes) ? @@ -464,15 +445,9 @@ ip_in_out_acl_inline (vlib_main_t * vm, misses++; - if (is_ip4) - error[1] = (_next[1] == ACL_NEXT_INDEX_DENY) ? - (is_output ? IP4_ERROR_OUTACL_TABLE_MISS : - IP4_ERROR_INACL_TABLE_MISS) : IP4_ERROR_NONE; - else - error[1] = (_next[1] == ACL_NEXT_INDEX_DENY) ? - (is_output ? IP6_ERROR_OUTACL_TABLE_MISS : - IP6_ERROR_INACL_TABLE_MISS) : IP6_ERROR_NONE; - b[1]->error = error_node->errors[error[1]]; + b[1]->error = (_next[1] == ACL_NEXT_INDEX_DENY) ? + error_miss : + error_none; break; } @@ -485,7 +460,7 @@ ip_in_out_acl_inline (vlib_main_t * vm, /* advance the match pointer so the matching happens on IP header */ if (is_output) - h[1] += vnet_buffer (b[1])->l2_classify.pad.l2_len; + h[1] += vnet_buffer (b[1])->l2.l2_len; hash[1] = vnet_classify_hash_packet_inline (t[1], (u8 *) h[1]); @@ -502,15 +477,9 @@ ip_in_out_acl_inline (vlib_main_t * vm, hits++; chain_hits++; - if (is_ip4) - error[1] = (_next[1] == ACL_NEXT_INDEX_DENY) ? - (is_output ? IP4_ERROR_OUTACL_SESSION_DENY : - IP4_ERROR_INACL_SESSION_DENY) : IP4_ERROR_NONE; - else - error[1] = (_next[1] == ACL_NEXT_INDEX_DENY) ? - (is_output ? IP6_ERROR_OUTACL_SESSION_DENY : - IP6_ERROR_INACL_SESSION_DENY) : IP6_ERROR_NONE; - b[1]->error = error_node->errors[error[1]]; + b[1]->error = (_next[1] == ACL_NEXT_INDEX_DENY) ? + error_deny : + error_none; if (!is_output) { @@ -522,8 +491,14 @@ ip_in_out_acl_inline (vlib_main_t * vm, e[1]->metadata; else if (e[1]->action == CLASSIFY_ACTION_SET_METADATA) - vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = - e[1]->metadata; + { + vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = + e[1]->metadata; + /* For source check in case we skip the lookup + * node */ + ip_lookup_set_buffer_fib_index ( + fib_index_by_sw_if_index, b[1]); + } } break; } @@ -536,9 +511,9 @@ ip_in_out_acl_inline (vlib_main_t * vm, ip_in_out_acl_trace_t *_t = vlib_add_trace (vm, node, b[0], sizeof (*_t)); _t->sw_if_index = - vnet_buffer (b[0])->sw_if_index[is_output ? VLIB_TX : VLIB_RX]; + ~0 == way ? 0 : vnet_buffer (b[0])->sw_if_index[way]; _t->next_index = _next[0]; - _t->table_index = t[0] ? t[0] - vcm->tables : ~0; + _t->table_index = table_index[0]; _t->offset = (e[0] && t[0]) ? vnet_classify_get_offset (t[0], e[0]) : ~0; } @@ -548,9 +523,9 @@ ip_in_out_acl_inline (vlib_main_t * vm, ip_in_out_acl_trace_t *_t = vlib_add_trace (vm, node, b[1], sizeof (*_t)); _t->sw_if_index = - vnet_buffer (b[1])->sw_if_index[is_output ? VLIB_TX : VLIB_RX]; + ~0 == way ? 0 : vnet_buffer (b[1])->sw_if_index[way]; _t->next_index = _next[1]; - _t->table_index = t[1] ? t[1] - vcm->tables : ~0; + _t->table_index = table_index[1]; _t->offset = (e[1] && t[1]) ? vnet_classify_get_offset (t[1], e[1]) : ~0; } @@ -584,15 +559,12 @@ ip_in_out_acl_inline (vlib_main_t * vm, vnet_classify_table_t *t0 = 0; vnet_classify_entry_t *e0 = 0; u32 next0 = ACL_NEXT_INDEX_DENY; - u64 hash0; - u8 error0; + u32 hash0; - sw_if_index0 = - vnet_buffer (b[0])->sw_if_index[is_output ? VLIB_TX : VLIB_RX]; - table_index0 = - am->classify_table_index_by_sw_if_index[is_output][tid][sw_if_index0]; + sw_if_index0 = ~0 == way ? 0 : vnet_buffer (b[0])->sw_if_index[way]; + table_index0 = table_index_by_sw_if_index[sw_if_index0]; - t0 = pool_elt_at_index (vcm->tables, table_index0); + t0 = pool_elt_at_index (tables, table_index0); if (t0->current_data_flag == CLASSIFY_FLAG_USE_CURR_DATA) h0 = @@ -603,10 +575,10 @@ ip_in_out_acl_inline (vlib_main_t * vm, if (is_output) { /* Save the rewrite length, since we are using the l2_classify struct */ - vnet_buffer (b[0])->l2_classify.pad.l2_len = + vnet_buffer (b[0])->l2.l2_len = vnet_buffer (b[0])->ip.save_rewrite_length; /* advance the match pointer so the matching happens on IP header */ - h0 += vnet_buffer (b[0])->l2_classify.pad.l2_len; + h0 += vnet_buffer (b[0])->l2.l2_len; } vnet_buffer (b[0])->l2_classify.hash = @@ -615,14 +587,13 @@ ip_in_out_acl_inline (vlib_main_t * vm, vnet_buffer (b[0])->l2_classify.table_index = table_index0; vnet_buffer (b[0])->l2_classify.opaque_index = ~0; - vnet_get_config_data (am->vnet_config_main[is_output][tid], - &b[0]->current_config_index, &next0, + vnet_get_config_data (cm, &b[0]->current_config_index, &next0, /* # bytes of config data */ 0); if (PREDICT_TRUE (table_index0 != ~0)) { hash0 = vnet_buffer (b[0])->l2_classify.hash; - t0 = pool_elt_at_index (vcm->tables, table_index0); + t0 = pool_elt_at_index (tables, table_index0); if (t0->current_data_flag == CLASSIFY_FLAG_USE_CURR_DATA) h0 = @@ -633,7 +604,7 @@ ip_in_out_acl_inline (vlib_main_t * vm, /* advance the match pointer so the matching happens on IP header */ if (is_output) - h0 += vnet_buffer (b[0])->l2_classify.pad.l2_len; + h0 += vnet_buffer (b[0])->l2.l2_len; e0 = vnet_classify_find_entry_inline (t0, (u8 *) h0, hash0, now); if (e0) @@ -646,15 +617,8 @@ ip_in_out_acl_inline (vlib_main_t * vm, hits++; - if (is_ip4) - error0 = (next0 == ACL_NEXT_INDEX_DENY) ? - (is_output ? IP4_ERROR_OUTACL_SESSION_DENY : - IP4_ERROR_INACL_SESSION_DENY) : IP4_ERROR_NONE; - else - error0 = (next0 == ACL_NEXT_INDEX_DENY) ? - (is_output ? IP6_ERROR_OUTACL_SESSION_DENY : - IP6_ERROR_INACL_SESSION_DENY) : IP6_ERROR_NONE; - b[0]->error = error_node->errors[error0]; + b[0]->error = + (next0 == ACL_NEXT_INDEX_DENY) ? error_deny : error_none; if (!is_output) { @@ -662,16 +626,21 @@ ip_in_out_acl_inline (vlib_main_t * vm, e0->action == CLASSIFY_ACTION_SET_IP6_FIB_INDEX) vnet_buffer (b[0])->sw_if_index[VLIB_TX] = e0->metadata; else if (e0->action == CLASSIFY_ACTION_SET_METADATA) - vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = e0->metadata; + { + vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = e0->metadata; + /* For source check in case we skip the lookup node */ + ip_lookup_set_buffer_fib_index (fib_index_by_sw_if_index, + b[0]); + } } } else { while (1) { - if (PREDICT_TRUE (t0->next_table_index != ~0)) - t0 = - pool_elt_at_index (vcm->tables, t0->next_table_index); + table_index0 = t0->next_table_index; + if (PREDICT_TRUE (table_index0 != ~0)) + t0 = pool_elt_at_index (tables, table_index0); else { next0 = (t0->miss_next_index < n_next_nodes) ? @@ -679,15 +648,9 @@ ip_in_out_acl_inline (vlib_main_t * vm, misses++; - if (is_ip4) - error0 = (next0 == ACL_NEXT_INDEX_DENY) ? - (is_output ? IP4_ERROR_OUTACL_TABLE_MISS : - IP4_ERROR_INACL_TABLE_MISS) : IP4_ERROR_NONE; - else - error0 = (next0 == ACL_NEXT_INDEX_DENY) ? - (is_output ? IP6_ERROR_OUTACL_TABLE_MISS : - IP6_ERROR_INACL_TABLE_MISS) : IP6_ERROR_NONE; - b[0]->error = error_node->errors[error0]; + b[0]->error = (next0 == ACL_NEXT_INDEX_DENY) ? + error_miss : + error_none; break; } @@ -700,7 +663,7 @@ ip_in_out_acl_inline (vlib_main_t * vm, /* advance the match pointer so the matching happens on IP header */ if (is_output) - h0 += vnet_buffer (b[0])->l2_classify.pad.l2_len; + h0 += vnet_buffer (b[0])->l2.l2_len; hash0 = vnet_classify_hash_packet_inline (t0, (u8 *) h0); e0 = vnet_classify_find_entry_inline @@ -714,15 +677,9 @@ ip_in_out_acl_inline (vlib_main_t * vm, e0->next_index : next0; hits++; - if (is_ip4) - error0 = (next0 == ACL_NEXT_INDEX_DENY) ? - (is_output ? IP4_ERROR_OUTACL_SESSION_DENY : - IP4_ERROR_INACL_SESSION_DENY) : IP4_ERROR_NONE; - else - error0 = (next0 == ACL_NEXT_INDEX_DENY) ? - (is_output ? IP6_ERROR_OUTACL_SESSION_DENY : - IP6_ERROR_INACL_SESSION_DENY) : IP6_ERROR_NONE; - b[0]->error = error_node->errors[error0]; + b[0]->error = (next0 == ACL_NEXT_INDEX_DENY) ? + error_deny : + error_none; if (!is_output) { @@ -733,8 +690,14 @@ ip_in_out_acl_inline (vlib_main_t * vm, vnet_buffer (b[0])->sw_if_index[VLIB_TX] = e0->metadata; else if (e0->action == CLASSIFY_ACTION_SET_METADATA) - vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = - e0->metadata; + { + vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = + e0->metadata; + /* For source check in case we skip the lookup + * node */ + ip_lookup_set_buffer_fib_index ( + fib_index_by_sw_if_index, b[0]); + } } break; } @@ -747,9 +710,9 @@ ip_in_out_acl_inline (vlib_main_t * vm, ip_in_out_acl_trace_t *t = vlib_add_trace (vm, node, b[0], sizeof (*t)); t->sw_if_index = - vnet_buffer (b[0])->sw_if_index[is_output ? VLIB_TX : VLIB_RX]; + ~0 == way ? 0 : vnet_buffer (b[0])->sw_if_index[way]; t->next_index = next0; - t->table_index = t0 ? t0 - vcm->tables : ~0; + t->table_index = table_index0; t->offset = (e0 && t0) ? vnet_classify_get_offset (t0, e0) : ~0; } @@ -767,69 +730,92 @@ ip_in_out_acl_inline (vlib_main_t * vm, n_left--; } - vlib_node_increment_counter (vm, node->node_index, - is_output ? IP_OUTACL_ERROR_MISS : - IP_INACL_ERROR_MISS, misses); - vlib_node_increment_counter (vm, node->node_index, - is_output ? IP_OUTACL_ERROR_HIT : - IP_INACL_ERROR_HIT, hits); - vlib_node_increment_counter (vm, node->node_index, - is_output ? IP_OUTACL_ERROR_CHAIN_HIT : - IP_INACL_ERROR_CHAIN_HIT, chain_hits); + *hits__ = hits; + *misses__ = misses; + *chain_hits__ = chain_hits; } -VLIB_NODE_FN (ip4_inacl_node) (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_frame_t * frame) +static_always_inline uword +ip_in_out_acl_inline (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame, const in_out_acl_table_id_t tid, + u32 *fib_index_by_sw_if_index, + const vlib_node_registration_t *parent_error_node, + const u32 error_none_index, const u32 error_deny_index, + const u32 error_miss_index, const vlib_rx_or_tx_t way, + const int is_output) { - - u32 *from; + const in_out_acl_main_t *am = &in_out_acl_main; + vnet_classify_table_t *tables = am->vnet_classify_main->tables; + u32 *from = vlib_frame_vector_args (frame); + const u32 *table_index_by_sw_if_index = + am->classify_table_index_by_sw_if_index[is_output][tid]; + vnet_config_main_t *cm = am->vnet_config_main[is_output][tid]; + const vlib_node_runtime_t *error_node = + vlib_node_get_runtime (vm, parent_error_node->index); + const vlib_error_t error_none = error_node->errors[error_none_index]; + const vlib_error_t error_deny = error_node->errors[error_deny_index]; + const vlib_error_t error_miss = error_node->errors[error_miss_index]; vlib_buffer_t *bufs[VLIB_FRAME_SIZE]; u16 nexts[VLIB_FRAME_SIZE]; - - from = vlib_frame_vector_args (frame); + u32 hits, misses, chain_hits; vlib_get_buffers (vm, from, bufs, frame->n_vectors); - if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) - ip_in_out_acl_inline (vm, node, bufs, nexts, frame->n_vectors, - 1 /* is_ip4 */ , - 0 /* is_output */ , 1 /* is_trace */ ); +#define ip_in_out_acl_inline_trace__(do_trace) \ + ip_in_out_acl_inline_trace ( \ + vm, node, frame, bufs, nexts, frame->n_vectors, &hits, &misses, \ + &chain_hits, error_deny, error_miss, error_none, tables, \ + table_index_by_sw_if_index, fib_index_by_sw_if_index, cm, way, is_output, \ + do_trace) + + if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE)) + ip_in_out_acl_inline_trace__ (1 /* do_trace */); else - ip_in_out_acl_inline (vm, node, bufs, nexts, frame->n_vectors, - 1 /* is_ip4 */ , - 0 /* is_output */ , 0 /* is_trace */ ); + ip_in_out_acl_inline_trace__ (0 /* do_trace */); + + vlib_node_increment_counter ( + vm, node->node_index, + is_output ? IP_OUTACL_ERROR_MISS : IP_INACL_ERROR_MISS, misses); + vlib_node_increment_counter ( + vm, node->node_index, is_output ? IP_OUTACL_ERROR_HIT : IP_INACL_ERROR_HIT, + hits); + vlib_node_increment_counter (vm, node->node_index, + is_output ? IP_OUTACL_ERROR_CHAIN_HIT : + IP_INACL_ERROR_CHAIN_HIT, + chain_hits); vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors); - return frame->n_vectors; } -VLIB_NODE_FN (ip4_outacl_node) (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_frame_t * frame) +VLIB_NODE_FN (ip4_inacl_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) { - u32 *from; - vlib_buffer_t *bufs[VLIB_FRAME_SIZE]; - u16 nexts[VLIB_FRAME_SIZE]; - - from = vlib_frame_vector_args (frame); - - vlib_get_buffers (vm, from, bufs, frame->n_vectors); - - if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) - ip_in_out_acl_inline (vm, node, bufs, nexts, frame->n_vectors, - 1 /* is_ip4 */ , - 1 /* is_output */ , 1 /* is_trace */ ); - else - ip_in_out_acl_inline (vm, node, bufs, nexts, frame->n_vectors, - 1 /* is_ip4 */ , - 1 /* is_output */ , 0 /* is_trace */ ); + return ip_in_out_acl_inline ( + vm, node, frame, IN_OUT_ACL_TABLE_IP4, ip4_main.fib_index_by_sw_if_index, + &ip4_input_node, IP4_ERROR_NONE, IP4_ERROR_INACL_SESSION_DENY, + IP4_ERROR_INACL_TABLE_MISS, VLIB_RX, 0 /* is_output */); +} - vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors); +VLIB_NODE_FN (ip4_punt_acl_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return ip_in_out_acl_inline ( + vm, node, frame, IN_OUT_ACL_TABLE_IP4_PUNT, + ip4_main.fib_index_by_sw_if_index, &ip4_input_node, IP4_ERROR_NONE, + IP4_ERROR_INACL_SESSION_DENY, IP4_ERROR_INACL_TABLE_MISS, ~0 /* way */, + 0 /* is_output */); +} - return frame->n_vectors; +VLIB_NODE_FN (ip4_outacl_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return ip_in_out_acl_inline ( + vm, node, frame, IN_OUT_ACL_TABLE_IP4, NULL, &ip4_input_node, + IP4_ERROR_NONE, IP4_ERROR_INACL_SESSION_DENY, IP4_ERROR_INACL_TABLE_MISS, + VLIB_TX, 1 /* is_output */); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_inacl_node) = { .name = "ip4-inacl", .vector_size = sizeof (u32), @@ -843,6 +829,19 @@ VLIB_REGISTER_NODE (ip4_inacl_node) = { }, }; +VLIB_REGISTER_NODE (ip4_punt_acl_node) = { + .name = "ip4-punt-acl", + .vector_size = sizeof (u32), + .format_trace = format_ip_inacl_trace, + .n_errors = ARRAY_LEN(ip_inacl_error_strings), + .error_strings = ip_inacl_error_strings, + + .n_next_nodes = ACL_NEXT_INDEX_N_NEXT, + .next_nodes = { + [ACL_NEXT_INDEX_DENY] = "ip4-drop", + }, +}; + VLIB_REGISTER_NODE (ip4_outacl_node) = { .name = "ip4-outacl", .vector_size = sizeof (u32), @@ -855,59 +854,41 @@ VLIB_REGISTER_NODE (ip4_outacl_node) = { [ACL_NEXT_INDEX_DENY] = "ip4-drop", }, }; -/* *INDENT-ON* */ + +VNET_FEATURE_INIT (ip4_punt_acl_feature) = { + .arc_name = "ip4-punt", + .node_name = "ip4-punt-acl", + .runs_after = VNET_FEATURES ("ip4-punt-policer"), +}; VLIB_NODE_FN (ip6_inacl_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - u32 *from; - vlib_buffer_t *bufs[VLIB_FRAME_SIZE]; - u16 nexts[VLIB_FRAME_SIZE]; - - from = vlib_frame_vector_args (frame); - - vlib_get_buffers (vm, from, bufs, frame->n_vectors); - - if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) - ip_in_out_acl_inline (vm, node, bufs, nexts, frame->n_vectors, - 0 /* is_ip4 */ , - 0 /* is_output */ , 1 /* is_trace */ ); - else - ip_in_out_acl_inline (vm, node, bufs, nexts, frame->n_vectors, - 0 /* is_ip4 */ , - 0 /* is_output */ , 0 /* is_trace */ ); - - vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors); + return ip_in_out_acl_inline ( + vm, node, frame, IN_OUT_ACL_TABLE_IP6, ip6_main.fib_index_by_sw_if_index, + &ip6_input_node, IP6_ERROR_NONE, IP6_ERROR_INACL_SESSION_DENY, + IP6_ERROR_INACL_TABLE_MISS, VLIB_RX, 0 /* is_output */); +} - return frame->n_vectors; +VLIB_NODE_FN (ip6_punt_acl_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return ip_in_out_acl_inline ( + vm, node, frame, IN_OUT_ACL_TABLE_IP6_PUNT, + ip4_main.fib_index_by_sw_if_index, &ip6_input_node, IP6_ERROR_NONE, + IP6_ERROR_INACL_SESSION_DENY, IP6_ERROR_INACL_TABLE_MISS, ~0 /* way */, + 0 /* is_output */); } VLIB_NODE_FN (ip6_outacl_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - u32 *from; - vlib_buffer_t *bufs[VLIB_FRAME_SIZE]; - u16 nexts[VLIB_FRAME_SIZE]; - - from = vlib_frame_vector_args (frame); - - vlib_get_buffers (vm, from, bufs, frame->n_vectors); - - if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) - ip_in_out_acl_inline (vm, node, bufs, nexts, frame->n_vectors, - 0 /* is_ip4 */ , - 1 /* is_output */ , 1 /* is_trace */ ); - else - ip_in_out_acl_inline (vm, node, bufs, nexts, frame->n_vectors, - 0 /* is_ip4 */ , - 1 /* is_output */ , 0 /* is_trace */ ); - - vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors); - - return frame->n_vectors; + return ip_in_out_acl_inline ( + vm, node, frame, IN_OUT_ACL_TABLE_IP6, NULL, &ip6_input_node, + IP6_ERROR_NONE, IP6_ERROR_INACL_SESSION_DENY, IP6_ERROR_INACL_TABLE_MISS, + VLIB_TX, 1 /* is_output */); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_inacl_node) = { .name = "ip6-inacl", .vector_size = sizeof (u32), @@ -921,6 +902,19 @@ VLIB_REGISTER_NODE (ip6_inacl_node) = { }, }; +VLIB_REGISTER_NODE (ip6_punt_acl_node) = { + .name = "ip6-punt-acl", + .vector_size = sizeof (u32), + .format_trace = format_ip_inacl_trace, + .n_errors = ARRAY_LEN(ip_inacl_error_strings), + .error_strings = ip_inacl_error_strings, + + .n_next_nodes = ACL_NEXT_INDEX_N_NEXT, + .next_nodes = { + [ACL_NEXT_INDEX_DENY] = "ip6-drop", + }, +}; + VLIB_REGISTER_NODE (ip6_outacl_node) = { .name = "ip6-outacl", .vector_size = sizeof (u32), @@ -933,7 +927,12 @@ VLIB_REGISTER_NODE (ip6_outacl_node) = { [ACL_NEXT_INDEX_DENY] = "ip6-drop", }, }; -/* *INDENT-ON* */ + +VNET_FEATURE_INIT (ip6_punt_acl_feature) = { + .arc_name = "ip6-punt", + .node_name = "ip6-punt-acl", + .runs_after = VNET_FEATURES ("ip6-punt-policer"), +}; #ifndef CLIB_MARCH_VARIANT static clib_error_t * diff --git a/src/vnet/ip/ip_init.c b/src/vnet/ip/ip_init.c index 8894a878881..c2490f196ef 100644 --- a/src/vnet/ip/ip_init.c +++ b/src/vnet/ip/ip_init.c @@ -104,7 +104,6 @@ do { \ return error; } -/* *INDENT-OFF* */ VLIB_INIT_FUNCTION (ip_main_init) = { .init_order = VLIB_INITS ("vnet_main_init", "ip4_init", "ip6_init", "icmp4_init", "icmp6_init", "ip6_hop_by_hop_init", @@ -112,7 +111,6 @@ VLIB_INIT_FUNCTION (ip_main_init) = { "in_out_acl_init", "policer_classify_init", "flow_classify_init"), }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/ip/ip_interface.c b/src/vnet/ip/ip_interface.c index 48c20a6cf34..ca1938f651a 100644 --- a/src/vnet/ip/ip_interface.c +++ b/src/vnet/ip/ip_interface.c @@ -145,27 +145,23 @@ ip_interface_has_address (u32 sw_if_index, ip46_address_t * ip, u8 is_ip4) { ip_lookup_main_t *lm4 = &ip4_main.lookup_main; ip4_address_t *ip4; - /* *INDENT-OFF* */ foreach_ip_interface_address (lm4, ia, sw_if_index, 1 /* unnumbered */ , ({ ip4 = ip_interface_address_get_address (lm4, ia); if (ip4_address_compare (ip4, &ip->ip4) == 0) return 1; })); - /* *INDENT-ON* */ } else { ip_lookup_main_t *lm6 = &ip6_main.lookup_main; ip6_address_t *ip6; - /* *INDENT-OFF* */ foreach_ip_interface_address (lm6, ia, sw_if_index, 1 /* unnumbered */ , ({ ip6 = ip_interface_address_get_address (lm6, ia); if (ip6_address_compare (ip6, &ip->ip6) == 0) return 1; })); - /* *INDENT-ON* */ } return 0; } @@ -179,16 +175,13 @@ ip_interface_get_first_ip (u32 sw_if_index, u8 is_ip4) if (is_ip4) { - /* *INDENT-OFF* */ foreach_ip_interface_address (lm4, ia, sw_if_index, 1 /* unnumbered */ , ({ return ip_interface_address_get_address (lm4, ia); })); - /* *INDENT-ON* */ } else { - /* *INDENT-OFF* */ foreach_ip_interface_address (lm6, ia, sw_if_index, 1 /* unnumbered */ , ({ ip6_address_t *rv; @@ -197,21 +190,19 @@ ip_interface_get_first_ip (u32 sw_if_index, u8 is_ip4) if (!ip6_address_is_link_local_unicast (rv)) return rv; })); - /* *INDENT-ON* */ } return 0; } -static walk_rc_t -ip_interface_address_mark_one_interface (vnet_main_t * vnm, - vnet_sw_interface_t * si, void *ctx) +walk_rc_t +ip_interface_address_mark_one_interface (vnet_main_t *vnm, + vnet_sw_interface_t *si, void *ctx) { ip_lookup_main_t *lm4 = &ip4_main.lookup_main; ip_lookup_main_t *lm6 = &ip6_main.lookup_main; ip_interface_address_t *ia = 0; - /* *INDENT-OFF* */ foreach_ip_interface_address (lm4, ia, si->sw_if_index, 1 /* unnumbered */ , ({ ia->flags |= IP_INTERFACE_ADDRESS_FLAG_STALE; @@ -220,7 +211,6 @@ ip_interface_address_mark_one_interface (vnet_main_t * vnm, ({ ia->flags |= IP_INTERFACE_ADDRESS_FLAG_STALE; })); - /* *INDENT-ON* */ return (WALK_CONTINUE); } @@ -246,7 +236,6 @@ ip_interface_address_sweep_one_interface (vnet_main_t * vnm, u32 *ip4_masks = 0; int i; - /* *INDENT-OFF* */ foreach_ip_interface_address (&im4->lookup_main, ia, si->sw_if_index, 1, ({ if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE) @@ -268,7 +257,6 @@ ip_interface_address_sweep_one_interface (vnet_main_t * vnm, vec_add1 (ip6_masks, ia->address_length); } })); - /* *INDENT-ON* */ for (i = 0; i < vec_len (ip4_addrs); i++) ip4_add_del_interface_address (vm, si->sw_if_index, &ip4_addrs[i], diff --git a/src/vnet/ip/ip_interface.h b/src/vnet/ip/ip_interface.h index b48eebdbc90..f0034ed0314 100644 --- a/src/vnet/ip/ip_interface.h +++ b/src/vnet/ip/ip_interface.h @@ -38,6 +38,9 @@ void ip_interface_address_sweep (void); u32 ip_interface_address_find (ip_lookup_main_t * lm, void *addr_fib, u32 address_length); u8 ip_interface_has_address (u32 sw_if_index, ip46_address_t * ip, u8 is_ip4); +walk_rc_t ip_interface_address_mark_one_interface (vnet_main_t *vnm, + vnet_sw_interface_t *si, + void *ctx); always_inline void * ip_interface_address_get_address (ip_lookup_main_t * lm, @@ -53,7 +56,6 @@ ip_get_interface_prefix (ip_lookup_main_t * lm, ip_interface_prefix_key_t * k) return p ? pool_elt_at_index (lm->if_prefix_pool, p[0]) : 0; } -/* *INDENT-OFF* */ #define foreach_ip_interface_address(lm,a,sw_if_index,loop,body) \ do { \ vnet_main_t *_vnm = vnet_get_main(); \ @@ -87,7 +89,6 @@ do { \ body; \ } \ } while (0) -/* *INDENT-ON* */ #endif /* included_ip_interface_h */ diff --git a/src/vnet/ip/ip_packet.h b/src/vnet/ip/ip_packet.h index b0b5f41260c..04cf9f11d70 100644..100755 --- a/src/vnet/ip/ip_packet.h +++ b/src/vnet/ip/ip_packet.h @@ -149,98 +149,6 @@ STATIC_ASSERT_SIZEOF (ip_ecn_t, 1); extern u8 *format_ip_ecn (u8 * s, va_list * va); -/* IP checksum support. */ - -static_always_inline u16 -ip_csum (void *data, u16 n_left) -{ - u32 sum; -#ifdef CLIB_HAVE_VEC256 - u16x16 v1, v2; - u32x8 zero = { 0 }; - u32x8 sum8 = { 0 }; - u32x4 sum4; -#endif - - /* if there is odd number of bytes, pad by zero and store in sum */ - sum = (n_left & 1) ? ((u8 *) data)[n_left - 1] << 8 : 0; - - /* we deal with words */ - n_left >>= 1; - -#ifdef CLIB_HAVE_VEC256 - while (n_left >= 32) - { - v1 = u16x16_load_unaligned (data); - v2 = u16x16_load_unaligned (data + 32); - -#ifdef CLIB_ARCH_IS_LITTLE_ENDIAN - v1 = u16x16_byte_swap (v1); - v2 = u16x16_byte_swap (v2); -#endif - sum8 += u32x8_from_u16x8 (u16x16_extract_lo (v1)); - sum8 += u32x8_from_u16x8 (u16x16_extract_hi (v1)); - sum8 += u32x8_from_u16x8 (u16x16_extract_lo (v2)); - sum8 += u32x8_from_u16x8 (u16x16_extract_hi (v2)); - n_left -= 32; - data += 64; - } - - if (n_left >= 16) - { - v1 = u16x16_load_unaligned (data); -#ifdef CLIB_ARCH_IS_LITTLE_ENDIAN - v1 = u16x16_byte_swap (v1); -#endif - sum8 += u32x8_from_u16x8 (u16x16_extract_lo (v1)); - sum8 += u32x8_from_u16x8 (u16x16_extract_hi (v1)); - n_left -= 16; - data += 32; - } - - if (n_left) - { - v1 = u16x16_load_unaligned (data); -#ifdef CLIB_ARCH_IS_LITTLE_ENDIAN - v1 = u16x16_byte_swap (v1); -#endif - v1 = u16x16_mask_last (v1, 16 - n_left); - sum8 += u32x8_from_u16x8 (u16x16_extract_lo (v1)); - sum8 += u32x8_from_u16x8 (u16x16_extract_hi (v1)); - } - - sum8 = u32x8_hadd (sum8, zero); - sum4 = u32x8_extract_lo (sum8) + u32x8_extract_hi (sum8); - sum += sum4[0] + sum4[1]; - -#else - /* scalar version */ - while (n_left >= 8) - { - sum += clib_net_to_host_u16 (*((u16 *) data + 0)); - sum += clib_net_to_host_u16 (*((u16 *) data + 1)); - sum += clib_net_to_host_u16 (*((u16 *) data + 2)); - sum += clib_net_to_host_u16 (*((u16 *) data + 3)); - sum += clib_net_to_host_u16 (*((u16 *) data + 4)); - sum += clib_net_to_host_u16 (*((u16 *) data + 5)); - sum += clib_net_to_host_u16 (*((u16 *) data + 6)); - sum += clib_net_to_host_u16 (*((u16 *) data + 7)); - n_left -= 8; - data += 16; - } - while (n_left) - { - sum += clib_net_to_host_u16 (*(u16 *) data); - n_left -= 1; - data += 2; - } -#endif - - sum = (sum & 0xffff) + (sum >> 16); - sum = (sum & 0xffff) + (sum >> 16); - return ~((u16) sum); -} - /* Incremental checksum update. */ typedef uword ip_csum_t; @@ -301,6 +209,20 @@ always_inline u16 ip_csum_fold (ip_csum_t c) { /* Reduce to 16 bits. */ +#if defined(__x86_64__) && defined(__BMI2__) + u64 tmp; + asm volatile( + /* using ADC is much faster than mov, shift, add sequence + * compiler produces */ + "mov %k[sum], %k[tmp] \n\t" + "shr $32, %[sum] \n\t" + "add %k[tmp], %k[sum] \n\t" + "mov $16, %k[tmp] \n\t" + "shrx %k[tmp], %k[sum], %k[tmp] \n\t" + "adc %w[tmp], %w[sum] \n\t" + "adc $0, %w[sum] \n\t" + : [ sum ] "+&r"(c), [ tmp ] "=&r"(tmp)); +#else #if uword_bits == 64 c = (c & (ip_csum_t) 0xffffffff) + (c >> (ip_csum_t) 32); c = (c & 0xffff) + (c >> 16); @@ -308,7 +230,7 @@ ip_csum_fold (ip_csum_t c) c = (c & 0xffff) + (c >> 16); c = (c & 0xffff) + (c >> 16); - +#endif return c; } diff --git a/src/vnet/ip/ip_path_mtu.c b/src/vnet/ip/ip_path_mtu.c index 38adb44065b..ccb57e1e352 100644 --- a/src/vnet/ip/ip_path_mtu.c +++ b/src/vnet/ip/ip_path_mtu.c @@ -297,10 +297,19 @@ ip_ptmu_adj_walk_update (adj_index_t ai, void *ctx) static ip_pmtu_dpo_t * ip_pmtu_dpo_alloc (void) { + vlib_main_t *vm = vlib_get_main (); + u8 need_barrier_sync = pool_get_will_expand (ip_pmtu_dpo_pool); ip_pmtu_dpo_t *ipm; + + if (need_barrier_sync) + vlib_worker_thread_barrier_sync (vm); + pool_get_aligned_zero (ip_pmtu_dpo_pool, ipm, sizeof (ip_pmtu_dpo_t)); + if (need_barrier_sync) + vlib_worker_thread_barrier_release (vm); + return (ipm); } @@ -353,18 +362,16 @@ ip_pmtu_dpo_get_urpf (const dpo_id_t *dpo) } void -ip_pmtu_dpo_add_or_lock (fib_protocol_t fproto, u16 pmtu, dpo_id_t *dpo) +ip_pmtu_dpo_add_or_lock (u16 pmtu, const dpo_id_t *parent, dpo_id_t *dpo) { ip_pmtu_dpo_t *ipm; - dpo_id_t parent = DPO_INVALID; ipm = ip_pmtu_dpo_alloc (); - ipm->ipm_proto = fib_proto_to_dpo (fproto); + ipm->ipm_proto = parent->dpoi_proto; ipm->ipm_pmtu = pmtu; - dpo_copy (&parent, drop_dpo_get (ipm->ipm_proto)); - dpo_stack (ip_pmtu_dpo_type, ipm->ipm_proto, &ipm->ipm_dpo, &parent); + dpo_stack (ip_pmtu_dpo_type, ipm->ipm_proto, &ipm->ipm_dpo, parent); dpo_set (dpo, ip_pmtu_dpo_type, ipm->ipm_proto, ip_pmtu_dpo_get_index (ipm)); } @@ -516,7 +523,9 @@ ip_pmtu_alloc (u32 fib_index, const fib_prefix_t *pfx, /* * interpose a policy DPO from the nh so that MTU is applied */ - ip_pmtu_dpo_add_or_lock (pfx->fp_proto, ipt->ipt_oper_pmtu, &ip_dpo); + ip_pmtu_dpo_add_or_lock (ipt->ipt_oper_pmtu, + drop_dpo_get (fib_proto_to_dpo (pfx->fp_proto)), + &ip_dpo); fib_table_entry_special_dpo_add (fib_index, pfx, ip_pmtu_source, FIB_ENTRY_FLAG_INTERPOSE, &ip_dpo); @@ -587,7 +596,9 @@ ip_pmtu_stack (ip_pmtu_t *ipt) { dpo_id_t ip_dpo = DPO_INVALID; - ip_pmtu_dpo_add_or_lock (pfx->fp_proto, ipt->ipt_oper_pmtu, &ip_dpo); + ip_pmtu_dpo_add_or_lock ( + ipt->ipt_oper_pmtu, + drop_dpo_get (fib_proto_to_dpo (pfx->fp_proto)), &ip_dpo); fib_table_entry_special_dpo_update ( fib_index, pfx, ip_pmtu_source, FIB_ENTRY_FLAG_INTERPOSE, &ip_dpo); @@ -826,7 +837,8 @@ ip_path_module_init (vlib_main_t *vm) adj_delegate_register_new_type (&ip_path_adj_delegate_vft); ip_pmtu_source = fib_source_allocate ("path-mtu", FIB_SOURCE_PRIORITY_HI, FIB_SOURCE_BH_SIMPLE); - ip_pmtu_fib_type = fib_node_register_new_type (&ip_ptmu_fib_node_vft); + ip_pmtu_fib_type = + fib_node_register_new_type ("ip-pmtu", &ip_ptmu_fib_node_vft); ip_pmtu_db = hash_create_mem (0, sizeof (ip_pmtu_key_t), sizeof (index_t)); ip_pmtu_logger = vlib_log_register_class ("ip", "pmtu"); diff --git a/src/vnet/ip/ip_path_mtu.h b/src/vnet/ip/ip_path_mtu.h index 2c54fcd7401..96a5227237a 100644 --- a/src/vnet/ip/ip_path_mtu.h +++ b/src/vnet/ip/ip_path_mtu.h @@ -100,6 +100,9 @@ extern int ip_path_mtu_replace_end (void); extern u32 ip_pmtu_get_table_id (const ip_pmtu_t *ipt); extern void ip_pmtu_get_ip (const ip_pmtu_t *ipt, ip_address_t *ip); +extern void ip_pmtu_dpo_add_or_lock (u16 pmtu, const dpo_id_t *parent, + dpo_id_t *dpo); + /** * Data-plane accessor functions */ diff --git a/src/vnet/ip/ip_path_mtu_node.c b/src/vnet/ip/ip_path_mtu_node.c index b13f9de849c..cadf1cbe137 100644 --- a/src/vnet/ip/ip_path_mtu_node.c +++ b/src/vnet/ip/ip_path_mtu_node.c @@ -49,7 +49,6 @@ ip_pmtu_dpo_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame, ip_address_family_t af) { u32 n_left_from, *from, next_index, *to_next, n_left_to_next; - u32 frag_sent = 0, small_packets = 0; from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; @@ -114,8 +113,6 @@ ip_pmtu_dpo_inline (vlib_main_t *vm, vlib_node_runtime_t *node, if (error0 == IP_FRAG_ERROR_NONE) { /* Free original buffer chain */ - frag_sent += vec_len (buffer); - small_packets += (vec_len (buffer) == 1); vlib_buffer_free_one (vm, pi0); /* Free original packet */ } else @@ -176,7 +173,8 @@ VLIB_REGISTER_NODE (ip4_ip_pmtu_dpo_node) = { .name = "ip4-pmtu-dpo", .vector_size = sizeof (u32), .format_trace = format_ip_pmtu_trace, - .n_errors = 0, + .n_errors = IP_FRAG_N_ERROR, + .error_counters = ip_frag_error_counters, .n_next_nodes = IP_PMTU_N_NEXT, .next_nodes = { @@ -187,7 +185,8 @@ VLIB_REGISTER_NODE (ip6_ip_pmtu_dpo_node) = { .name = "ip6-pmtu-dpo", .vector_size = sizeof (u32), .format_trace = format_ip_pmtu_trace, - .n_errors = 0, + .n_errors = IP_FRAG_N_ERROR, + .error_counters = ip_frag_error_counters, .n_next_nodes = IP_PMTU_N_NEXT, .next_nodes = { diff --git a/src/vnet/ip/ip_psh_cksum.h b/src/vnet/ip/ip_psh_cksum.h new file mode 100644 index 00000000000..a80211561b7 --- /dev/null +++ b/src/vnet/ip/ip_psh_cksum.h @@ -0,0 +1,55 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2021 Cisco Systems, Inc. + */ + +#ifndef included_ip_psh_cksum_h +#define included_ip_psh_cksum_h + +#include <vnet/ip/ip.h> +#include <vppinfra/vector/ip_csum.h> + +typedef struct _ip4_psh +{ + ip4_address_t src; + ip4_address_t dst; + u8 zero; + u8 proto; + u16 l4len; +} ip4_psh_t; + +typedef struct _ip6_psh +{ + ip6_address_t src; + ip6_address_t dst; + u32 l4len; + u32 proto; +} ip6_psh_t; + +STATIC_ASSERT (sizeof (ip4_psh_t) == 12, "ipv4 pseudo header is 12B"); +STATIC_ASSERT (sizeof (ip6_psh_t) == 40, "ipv6 pseudo header is 40B"); + +static_always_inline u16 +ip4_pseudo_header_cksum (ip4_header_t *ip4) +{ + ip4_psh_t psh = { 0 }; + psh.src = ip4->src_address; + psh.dst = ip4->dst_address; + psh.proto = ip4->protocol; + psh.l4len = clib_host_to_net_u16 (clib_net_to_host_u16 (ip4->length) - + sizeof (ip4_header_t)); + return ~(clib_ip_csum ((u8 *) &psh, sizeof (ip4_psh_t))); +} + +static_always_inline u16 +ip6_pseudo_header_cksum (ip6_header_t *ip6) +{ + ip6_psh_t psh = { 0 }; + psh.src = ip6->src_address; + psh.dst = ip6->dst_address; + psh.l4len = ip6->payload_length; + psh.proto = clib_host_to_net_u32 ((u32) ip6->protocol); + return ~(clib_ip_csum ((u8 *) &psh, sizeof (ip6_psh_t))); +} + +#endif /* included_ip_psh_cksum_h */ diff --git a/src/vnet/ip/ip_punt_drop.c b/src/vnet/ip/ip_punt_drop.c index bf01adadb10..dc113f51386 100644 --- a/src/vnet/ip/ip_punt_drop.c +++ b/src/vnet/ip/ip_punt_drop.c @@ -143,9 +143,8 @@ format_ip_punt_redirect (u8 * s, va_list * args) rx = ip_punt_redirect_get (rxs[rx_sw_if_index]); - s = format (s, " rx %U via:\n", - format_vnet_sw_interface_name, vnm, - vnet_get_sw_interface (vnm, rx_sw_if_index)); + s = format (s, " rx %U via:\n", format_vnet_sw_if_index_name, vnm, + rx_sw_if_index); s = format (s, " %U", format_fib_path_list, rx->pl, 2); s = format (s, " forwarding\n", format_dpo_id, &rx->dpo, 0); s = format (s, " %U\n", format_dpo_id, &rx->dpo, 0); diff --git a/src/vnet/ip/ip_sas.c b/src/vnet/ip/ip_sas.c index 7d3632d95ed..0fc261724f1 100644 --- a/src/vnet/ip/ip_sas.c +++ b/src/vnet/ip/ip_sas.c @@ -80,7 +80,12 @@ ip6_sas_by_sw_if_index (u32 sw_if_index, const ip6_address_t *dst, if (ip6_address_is_link_local_unicast (dst) || dst->as_u32[0] == clib_host_to_net_u32 (0xff020000)) { - ip6_address_copy (src, ip6_get_link_local_address (sw_if_index)); + const ip6_address_t *ll = ip6_get_link_local_address (sw_if_index); + if (NULL == ll) + { + return false; + } + ip6_address_copy (src, ll); return true; } diff --git a/src/vnet/ip/ip_test.c b/src/vnet/ip/ip_test.c index 2de8235288d..727afba67f4 100644 --- a/src/vnet/ip/ip_test.c +++ b/src/vnet/ip/ip_test.c @@ -30,16 +30,20 @@ #include <vnet/format_fns.h> #include <vnet/ip/ip.api_enum.h> #include <vnet/ip/ip.api_types.h> +#include <vlibmemory/vlib.api_types.h> #define vl_endianfun /* define message structures */ #include <vnet/ip/ip.api.h> #undef vl_endianfun +#define vl_calcsizefun +#include <vnet/ip/ip.api.h> +#undef vl_calcsizefun + typedef struct { /* API message ID base */ u16 msg_id_base; - u32 ping_id; vat_main_t *vat_main; } ip_test_main_t; @@ -99,7 +103,7 @@ increment_address (vl_api_address_t *a) static uword unformat_fib_path (unformat_input_t *input, va_list *args) { - vnet_main_t *vnm = va_arg (*args, vnet_main_t *); + vat_main_t *vam = va_arg (*args, vat_main_t *); vl_api_fib_path_t *path = va_arg (*args, vl_api_fib_path_t *); u32 weight, preference; mpls_label_t out_label; @@ -113,14 +117,14 @@ unformat_fib_path (unformat_input_t *input, va_list *args) while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { if (unformat (input, "%U %U", unformat_vl_api_ip4_address, - &path->nh.address.ip4, unformat_vnet_sw_interface, vnm, + &path->nh.address.ip4, api_unformat_sw_if_index, vam, &path->sw_if_index)) { path->proto = FIB_API_PATH_NH_PROTO_IP4; } else if (unformat (input, "%U %U", unformat_vl_api_ip6_address, - &path->nh.address.ip6, unformat_vnet_sw_interface, - vnm, &path->sw_if_index)) + &path->nh.address.ip6, api_unformat_sw_if_index, vam, + &path->sw_if_index)) { path->proto = FIB_API_PATH_NH_PROTO_IP6; } @@ -237,7 +241,6 @@ unformat_fib_path (unformat_input_t *input, va_list *args) static int api_ip_route_add_del (vat_main_t *vam) { - vnet_main_t *vnm = vnet_get_main (); unformat_input_t *i = vam->input; vl_api_ip_route_add_del_t *mp; u32 vrf_id = 0; @@ -273,7 +276,7 @@ api_ip_route_add_del (vat_main_t *vam) is_multipath = 1; else if (unformat (i, "seed %d", &random_seed)) ; - else if (unformat (i, "via %U", unformat_fib_path, vnm, + else if (unformat (i, "via %U", unformat_fib_path, vam, &paths[path_count])) { path_count++; @@ -524,6 +527,17 @@ api_ip_table_flush (vat_main_t *vam) return ret; } +static int +api_ip_table_allocate (vat_main_t *vam) +{ + return -1; +} + +static void +vl_api_ip_table_allocate_reply_t_handler (vl_api_ip_table_allocate_reply_t *mp) +{ +} + static void vl_api_ip_route_add_del_v2_reply_t_handler ( vl_api_ip_route_add_del_v2_reply_t *mp) @@ -692,7 +706,6 @@ vl_api_ip_punt_redirect_v2_details_t_handler ( static int api_ip_address_dump (vat_main_t *vam) { - vnet_main_t *vnm = vnet_get_main (); unformat_input_t *i = vam->input; vl_api_ip_address_dump_t *mp; vl_api_control_ping_t *mp_ping; @@ -706,8 +719,7 @@ api_ip_address_dump (vat_main_t *vam) { if (unformat (i, "sw_if_index %d", &sw_if_index)) sw_if_index_set = 1; - else if (unformat (i, "%U", unformat_vnet_sw_interface, vnm, - &sw_if_index)) + else if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index)) sw_if_index_set = 1; else if (unformat (i, "ipv4")) ipv4_set = 1; @@ -1002,6 +1014,24 @@ api_ip_reassembly_enable_disable (vat_main_t *vat) return -1; } +static int +api_ip_local_reass_enable_disable (vat_main_t *vat) +{ + return -1; +} + +static int +api_ip_local_reass_get (vat_main_t *vat) +{ + return -1; +} + +static void +vl_api_ip_local_reass_get_reply_t_handler ( + vl_api_ip_local_reass_get_reply_t *mp) +{ +} + static void vl_api_ip_reassembly_get_reply_t_handler (vl_api_ip_reassembly_get_reply_t *mp) { @@ -1010,7 +1040,6 @@ vl_api_ip_reassembly_get_reply_t_handler (vl_api_ip_reassembly_get_reply_t *mp) int api_ip_source_and_port_range_check_interface_add_del (vat_main_t *vam) { - vnet_main_t *vnm = vnet_get_main (); unformat_input_t *input = vam->input; vl_api_ip_source_and_port_range_check_interface_add_del_t *mp; u32 sw_if_index = ~0; @@ -1022,8 +1051,7 @@ api_ip_source_and_port_range_check_interface_add_del (vat_main_t *vam) while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { - if (unformat (input, "%U", unformat_vnet_sw_interface, vnm, - &sw_if_index)) + if (unformat (input, "%U", api_unformat_sw_if_index, vam, &sw_if_index)) ; else if (unformat (input, "sw_if_index %d", &sw_if_index)) ; @@ -1202,7 +1230,6 @@ api_ip_mroute_dump (vat_main_t *vam) static int api_sw_interface_ip6_enable_disable (vat_main_t *vam) { - vnet_main_t *vnm = vnet_get_main (); unformat_input_t *i = vam->input; vl_api_sw_interface_ip6_enable_disable_t *mp; u32 sw_if_index; @@ -1212,7 +1239,7 @@ api_sw_interface_ip6_enable_disable (vat_main_t *vam) while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) { - if (unformat (i, "%U", unformat_vnet_sw_interface, vnm, &sw_if_index)) + if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index)) sw_if_index_set = 1; else if (unformat (i, "sw_if_index %d", &sw_if_index)) sw_if_index_set = 1; @@ -1250,6 +1277,12 @@ api_set_ip_flow_hash_v2 (vat_main_t *vat) } static int +api_set_ip_flow_hash_v3 (vat_main_t *vat) +{ + return -1; +} + +static int api_ip_mroute_add_del (vat_main_t *vam) { unformat_input_t *i = vam->input; @@ -1536,8 +1569,6 @@ vl_api_ip_details_t_handler (vl_api_ip_details_t *mp) #include <vnet/ip/ip.api_test.c> -VAT_REGISTER_FEATURE_FUNCTION (vat_ip_plugin_register); - /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vnet/ip/ip_types.c b/src/vnet/ip/ip_types.c index 3e5ecebf142..ec80a96f15c 100644 --- a/src/vnet/ip/ip_types.c +++ b/src/vnet/ip/ip_types.c @@ -41,14 +41,16 @@ uword unformat_ip_address (unformat_input_t * input, va_list * args) { ip_address_t *a = va_arg (*args, ip_address_t *); + ip_address_t tmp, *p_tmp = &tmp; - clib_memset (a, 0, sizeof (*a)); - if (unformat (input, "%U", unformat_ip4_address, &ip_addr_v4 (a))) - ip_addr_version (a) = AF_IP4; - else if (unformat_user (input, unformat_ip6_address, &ip_addr_v6 (a))) - ip_addr_version (a) = AF_IP6; + clib_memset (p_tmp, 0, sizeof (*p_tmp)); + if (unformat (input, "%U", unformat_ip4_address, &ip_addr_v4 (p_tmp))) + ip_addr_version (p_tmp) = AF_IP4; + else if (unformat_user (input, unformat_ip6_address, &ip_addr_v6 (p_tmp))) + ip_addr_version (p_tmp) = AF_IP6; else return 0; + *a = *p_tmp; return 1; } @@ -288,6 +290,13 @@ ip_address_to_fib_prefix (const ip_address_t * addr, fib_prefix_t * prefix) } void +ip_address_to_prefix (const ip_address_t *addr, ip_prefix_t *prefix) +{ + prefix->len = (addr->version == AF_IP4 ? 32 : 128); + clib_memcpy (&prefix->addr, addr, sizeof (prefix->addr)); +} + +void ip_address_increment (ip_address_t * ip) { ip46_address_increment ((ip_addr_version (ip) == AF_IP4 ? @@ -380,23 +389,24 @@ ip_prefix_copy (void *dst, void *src) } int -ip_prefix_cmp (ip_prefix_t * p1, ip_prefix_t * p2) +ip_prefix_cmp (const ip_prefix_t *ipp1, const ip_prefix_t *ipp2) { + ip_prefix_t p1 = *ipp1, p2 = *ipp2; int cmp = 0; - ip_prefix_normalize (p1); - ip_prefix_normalize (p2); + ip_prefix_normalize (&p1); + ip_prefix_normalize (&p2); - cmp = ip_address_cmp (&ip_prefix_addr (p1), &ip_prefix_addr (p2)); + cmp = ip_address_cmp (&ip_prefix_addr (&p1), &ip_prefix_addr (&p2)); if (cmp == 0) { - if (ip_prefix_len (p1) < ip_prefix_len (p2)) + if (ip_prefix_len (&p1) < ip_prefix_len (&p2)) { cmp = 1; } else { - if (ip_prefix_len (p1) > ip_prefix_len (p2)) + if (ip_prefix_len (&p1) > ip_prefix_len (&p2)) cmp = 2; } } diff --git a/src/vnet/ip/ip_types.h b/src/vnet/ip/ip_types.h index 83a0f6adc72..f1b387df194 100644 --- a/src/vnet/ip/ip_types.h +++ b/src/vnet/ip/ip_types.h @@ -75,13 +75,11 @@ typedef enum ip_feature_location_t_ #define N_IP_FEATURE_LOCATIONS (IP_FEATURE_DROP+1) -/* *INDENT-OFF* */ typedef struct ip_address { ip46_address_t ip; ip_address_family_t version; } __clib_packed ip_address_t; -/* *INDENT-ON* */ #define IP_ADDRESS_V4_ALL_0S {.ip.ip4.as_u32 = 0, .version = AF_IP4} #define IP_ADDRESS_V6_ALL_0S {.ip.ip6.as_u64 = {0, 0}, .version = AF_IP6} @@ -112,13 +110,11 @@ extern void ip_address_from_46 (const ip46_address_t * a, extern void ip_address_increment (ip_address_t * ip); extern void ip_address_reset (ip_address_t * ip); -/* *INDENT-OFF* */ typedef struct ip_prefix { ip_address_t addr; u8 len; } __clib_packed ip_prefix_t; -/* *INDENT-ON* */ #define ip_prefix_addr(_a) (_a)->addr #define ip_prefix_version(_a) ip_addr_version(&ip_prefix_addr(_a)) @@ -126,11 +122,13 @@ typedef struct ip_prefix #define ip_prefix_v4(_a) ip_addr_v4(&ip_prefix_addr(_a)) #define ip_prefix_v6(_a) ip_addr_v6(&ip_prefix_addr(_a)) -extern int ip_prefix_cmp (ip_prefix_t * p1, ip_prefix_t * p2); +extern int ip_prefix_cmp (const ip_prefix_t *p1, const ip_prefix_t *p2); extern void ip_prefix_normalize (ip_prefix_t * a); extern void ip_address_to_fib_prefix (const ip_address_t * addr, fib_prefix_t * prefix); +extern void ip_address_to_prefix (const ip_address_t *addr, + ip_prefix_t *prefix); extern void ip_prefix_to_fib_prefix (const ip_prefix_t * ipp, fib_prefix_t * fibp); extern u8 *format_ip_prefix (u8 * s, va_list * args); diff --git a/src/vnet/ip/lookup.c b/src/vnet/ip/lookup.c index 1753ffd9232..c225c222a38 100644 --- a/src/vnet/ip/lookup.c +++ b/src/vnet/ip/lookup.c @@ -128,6 +128,42 @@ format_ip_flow_hash_config (u8 * s, va_list * args) return s; } +uword +unformat_ip_flow_hash_config (unformat_input_t *input, va_list *args) +{ + flow_hash_config_t *flow_hash_config = va_arg (*args, flow_hash_config_t *); + uword start_index = unformat_check_input (input); + int matched_once = 0; + + if (unformat (input, "default")) + { + *flow_hash_config = IP_FLOW_HASH_DEFAULT; + return 1; + } + while (!unformat_is_eof (input) && + !is_white_space (unformat_peek_input (input))) + { + if (unformat (input, "%_,")) + ; +#define _(a, b, c) \ + else if (unformat (input, "%_" #a)) \ + { \ + *flow_hash_config |= c; \ + matched_once = 1; \ + } + foreach_flow_hash_bit +#undef _ + else + { + /* Roll back to our start */ + input->index = start_index; + return 0; + } + } + + return matched_once; +} + u8 * format_ip_adjacency_packet_data (u8 * s, va_list * args) { @@ -184,6 +220,27 @@ const ip46_address_t zero_addr = { 0, 0}, }; +bool +fib_prefix_validate (const fib_prefix_t *prefix) +{ + if (FIB_PROTOCOL_IP4 == prefix->fp_proto) + { + if (prefix->fp_len > 32) + { + return false; + } + } + + if (FIB_PROTOCOL_IP6 == prefix->fp_proto) + { + if (prefix->fp_len > 128) + { + return false; + } + } + return true; +} + static clib_error_t * vnet_ip_route_cmd (vlib_main_t * vm, unformat_input_t * main_input, vlib_cli_command_t * cmd) @@ -304,22 +361,25 @@ vnet_ip_route_cmd (vlib_main_t * vm, } else if (0 < vec_len (rpaths)) { - u32 k, n, incr; - ip46_address_t dst = prefixs[i].fp_addr; + u32 k, n; f64 t[2]; n = count; t[0] = vlib_time_now (vm); - incr = 1 << ((FIB_PROTOCOL_IP4 == prefixs[0].fp_proto ? 32 : 128) - - prefixs[i].fp_len); for (k = 0; k < n; k++) { fib_prefix_t rpfx = { .fp_len = prefixs[i].fp_len, .fp_proto = prefixs[i].fp_proto, - .fp_addr = dst, + .fp_addr = prefixs[i].fp_addr, }; + if (!fib_prefix_validate (&rpfx)) + { + vlib_cli_output (vm, "Invalid prefix len: %d", rpfx.fp_len); + continue; + } + if (is_del) fib_table_entry_path_remove2 (fib_index, &rpfx, FIB_SOURCE_CLI, rpaths); @@ -329,21 +389,7 @@ vnet_ip_route_cmd (vlib_main_t * vm, FIB_SOURCE_CLI, FIB_ENTRY_FLAG_NONE, rpaths); - if (FIB_PROTOCOL_IP4 == prefixs[0].fp_proto) - { - dst.ip4.as_u32 = - clib_host_to_net_u32 (incr + - clib_net_to_host_u32 (dst. - ip4.as_u32)); - } - else - { - int bucket = (incr < 64 ? 0 : 1); - dst.ip6.as_u64[bucket] = - clib_host_to_net_u64 (incr + - clib_net_to_host_u64 (dst.ip6.as_u64 - [bucket])); - } + fib_prefix_increment (&prefixs[i]); } t[1] = vlib_time_now (vm); @@ -399,29 +445,35 @@ vnet_ip_table_cmd (vlib_main_t * vm, } } - if (~0 == table_id) - { - error = clib_error_return (0, "No table id"); - goto done; - } - else if (0 == table_id) + if (0 == table_id) { error = clib_error_return (0, "Can't change the default table"); goto done; } else - { - if (is_add) - { - ip_table_create (fproto, table_id, 0, name); - } - else { - ip_table_delete (fproto, table_id, 0); + if (is_add) + { + if (~0 == table_id) + { + table_id = ip_table_get_unused_id (fproto); + vlib_cli_output (vm, "%u\n", table_id); + } + ip_table_create (fproto, table_id, 0, name); + } + else + { + if (~0 == table_id) + { + error = clib_error_return (0, "No table id"); + goto done; + } + ip_table_delete (fproto, table_id, 0); + } } - } done: + vec_free (name); unformat_free (line_input); return error; } @@ -477,13 +529,13 @@ vnet_show_ip_table_cmd (vlib_main_t *vm, unformat_input_t *main_input, } fib = fib_table_get (fib_index, fproto); - vlib_cli_output (vm, "[%3u] table_id:%3u %v", fib->ft_index, + vlib_cli_output (vm, "[%u] table_id:%u %v", fib->ft_index, fib->ft_table_id, fib->ft_desc); } else { pool_foreach (fib, fibs) - vlib_cli_output (vm, "[%3u] table_id:%3u %v", fib->ft_index, + vlib_cli_output (vm, "[%u] table_id:%u %v", fib->ft_index, fib->ft_table_id, fib->ft_desc); } @@ -505,33 +557,25 @@ vnet_show_ip6_table_cmd (vlib_main_t *vm, unformat_input_t *main_input, return (vnet_show_ip_table_cmd (vm, main_input, cmd, FIB_PROTOCOL_IP6)); } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (vlib_cli_ip_command, static) = { .path = "ip", .short_help = "Internet protocol (IP) commands", }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (vlib_cli_ip6_command, static) = { .path = "ip6", .short_help = "Internet protocol version 6 (IPv6) commands", }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (vlib_cli_show_ip_command, static) = { .path = "show ip", .short_help = "Internet protocol (IP) show commands", }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (vlib_cli_show_ip6_command, static) = { .path = "show ip6", .short_help = "Internet protocol version 6 (IPv6) show commands", }; -/* *INDENT-ON* */ /*? * This command is used to add or delete IPv4 or IPv6 routes. All @@ -560,37 +604,37 @@ VLIB_CLI_COMMAND (vlib_cli_show_ip6_command, static) = { * To add a route to a particular FIB table (VRF), use: * @cliexcmd{ip route add 172.16.24.0/24 table 7 via GigabitEthernet2/0/0} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (ip_route_command, static) = { .path = "ip route", - .short_help = "ip route [add|del] [count <n>] <dst-ip-addr>/<width> [table <table-id>] via [next-hop-address] [next-hop-interface] [next-hop-table <value>] [weight <value>] [preference <value>] [udp-encap-id <value>] [ip4-lookup-in-table <value>] [ip6-lookup-in-table <value>] [mpls-lookup-in-table <value>] [resolve-via-host] [resolve-via-connected] [rx-ip4 <interface>] [out-labels <value value value>]", + .short_help = "ip route [add|del] [count <n>] <dst-ip-addr>/<width> [table " + "<table-id>] via [next-hop-address] [next-hop-interface] " + "[next-hop-table <value>] [weight <value>] [preference " + "<value>] [udp-encap <value>] [ip4-lookup-in-table <value>] " + "[ip6-lookup-in-table <value>] [mpls-lookup-in-table <value>] " + "[resolve-via-host] [resolve-via-connected] [rx-ip4|rx-ip6 " + "<interface>] [out-labels <value value value>]", .function = vnet_ip_route_cmd, .is_mp_safe = 1, }; -/* *INDENT-ON* */ /*? * This command is used to add or delete IPv4 Tables. All * Tables must be explicitly added before that can be used. Creating a * table will add both unicast and multicast FIBs * ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (ip4_table_command, static) = { .path = "ip table", .short_help = "ip table [add|del] <table-id>", .function = vnet_ip4_table_cmd, }; -/* *INDENT-ON* */ -/* *INDENT-ON* */ /*? * This command is used to add or delete IPv4 Tables. All * Tables must be explicitly added before that can be used. Creating a * table will add both unicast and multicast FIBs * ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (ip6_table_command, static) = { .path = "ip6 table", .short_help = "ip6 table [add|del] <table-id>", @@ -638,7 +682,7 @@ ip_table_bind_cmd (vlib_main_t * vm, goto done; } - rv = ip_table_bind (fproto, sw_if_index, table_id, 0); + rv = ip_table_bind (fproto, sw_if_index, table_id); if (VNET_API_ERROR_ADDRESS_FOUND_FOR_INTERFACE == rv) { @@ -695,14 +739,12 @@ ip6_table_bind_cmd (vlib_main_t * vm, * Example of how to add an interface to an IPv4 FIB table (where 2 is the table-id): * @cliexcmd{set interface ip table GigabitEthernet2/0/0 2} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = { .path = "set interface ip table", .function = ip4_table_bind_cmd, .short_help = "set interface ip table <interface> <table-id>", }; -/* *INDENT-ON* */ /*? * Place the indicated interface into the supplied IPv6 FIB table (also known @@ -723,14 +765,12 @@ VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = * Example of how to add an interface to an IPv6 FIB table (where 2 is the table-id): * @cliexcmd{set interface ip6 table GigabitEthernet2/0/0 2} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_interface_ip6_table_command, static) = { .path = "set interface ip6 table", .function = ip6_table_bind_cmd, .short_help = "set interface ip6 table <interface> <table-id>" }; -/* *INDENT-ON* */ clib_error_t * vnet_ip_mroute_cmd (vlib_main_t * vm, @@ -894,8 +934,8 @@ vnet_ip_mroute_cmd (vlib_main_t * vm, mfib_table_entry_path_remove (fib_index, &pfx, MFIB_SOURCE_CLI, rpaths); else - mfib_table_entry_path_update (fib_index, - &pfx, MFIB_SOURCE_CLI, rpaths); + mfib_table_entry_path_update (fib_index, &pfx, MFIB_SOURCE_CLI, + MFIB_ENTRY_FLAG_NONE, rpaths); } if (FIB_PROTOCOL_IP4 == pfx.fp_proto) @@ -967,7 +1007,6 @@ done: * @cliexcmd{ip mroute add 232.1.1.1 Signal} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (ip_mroute_command, static) = { .path = "ip mroute", @@ -975,7 +1014,6 @@ VLIB_CLI_COMMAND (ip_mroute_command, static) = .function = vnet_ip_mroute_cmd, .is_mp_safe = 1, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/ip/lookup.h b/src/vnet/ip/lookup.h index 48ba468d7c2..8083d974df6 100644 --- a/src/vnet/ip/lookup.h +++ b/src/vnet/ip/lookup.h @@ -162,23 +162,22 @@ typedef struct ip_lookup_main_t } ip_lookup_main_t; u8 *format_ip_flow_hash_config (u8 * s, va_list * args); - +uword unformat_ip_flow_hash_config (unformat_input_t *input, va_list *args); always_inline void ip_lookup_set_buffer_fib_index (u32 * fib_index_by_sw_if_index, vlib_buffer_t * b) { - /* *INDENT-OFF* */ vnet_buffer (b)->ip.fib_index = vec_elt (fib_index_by_sw_if_index, vnet_buffer (b)->sw_if_index[VLIB_RX]); vnet_buffer (b)->ip.fib_index = ((vnet_buffer (b)->sw_if_index[VLIB_TX] == (u32) ~ 0) ? vnet_buffer (b)->ip.fib_index : vnet_buffer (b)->sw_if_index[VLIB_TX]); - /* *INDENT-ON* */ } void ip_lookup_init (ip_lookup_main_t * lm, u32 ip_lookup_node_index); +bool fib_prefix_validate (const fib_prefix_t *prefix); #endif /* included_ip_lookup_h */ /* diff --git a/src/vnet/ip/punt.c b/src/vnet/ip/punt.c index fb0cc221950..3c46549634a 100644 --- a/src/vnet/ip/punt.c +++ b/src/vnet/ip/punt.c @@ -148,14 +148,31 @@ punt_socket_register_l4 (vlib_main_t * vm, punt_main_t *pm = &punt_main; punt_client_t *c; - /* For now we only support UDP punt */ - if (protocol != IP_PROTOCOL_UDP) - return clib_error_return (0, - "only UDP protocol (%d) is supported, got %d", - IP_PROTOCOL_UDP, protocol); - if (port == (u16) ~ 0) - return clib_error_return (0, "UDP port number required"); + return clib_error_return (0, "Port number required"); + + u32 node_index; + switch (protocol) + { + case IP_PROTOCOL_UDP: + node_index = (af == AF_IP4 ? udp4_punt_socket_node.index : + udp6_punt_socket_node.index); + udp_register_dst_port (vm, port, node_index, af == AF_IP4); + break; + case IP_PROTOCOL_ICMP6: + if (af != AF_IP6) + return clib_error_return ( + 0, "only UDP or ICMP6 protocol (%d, %d) is supported, got %d", + IP_PROTOCOL_UDP, IP_PROTOCOL_ICMP6, protocol); + + node_index = icmp6_punt_socket_node.index; + icmp6_register_type (vm, port, node_index); + break; + default: + return clib_error_return ( + 0, "only UDP or ICMP6 protocol (%d) is supported, got %d", + IP_PROTOCOL_UDP, protocol); + } c = punt_client_l4_get (af, port); @@ -165,19 +182,14 @@ punt_socket_register_l4 (vlib_main_t * vm, punt_client_l4_db_add (af, port, c - pm->punt_client_pool); } - memcpy (c->caddr.sun_path, client_pathname, sizeof (c->caddr.sun_path)); + snprintf (c->caddr.sun_path, sizeof (c->caddr.sun_path), "%s", + client_pathname); c->caddr.sun_family = AF_UNIX; c->reg.type = PUNT_TYPE_L4; c->reg.punt.l4.port = port; c->reg.punt.l4.protocol = protocol; c->reg.punt.l4.af = af; - u32 node_index = (af == AF_IP4 ? - udp4_punt_socket_node.index : - udp6_punt_socket_node.index); - - udp_register_dst_port (vm, port, node_index, af == AF_IP4); - return (NULL); } @@ -197,7 +209,8 @@ punt_socket_register_ip_proto (vlib_main_t * vm, punt_client_ip_proto_db_add (af, proto, c - pm->punt_client_pool); } - memcpy (c->caddr.sun_path, client_pathname, sizeof (c->caddr.sun_path)); + snprintf (c->caddr.sun_path, sizeof (c->caddr.sun_path), "%s", + client_pathname); c->caddr.sun_family = AF_UNIX; c->reg.type = PUNT_TYPE_IP_PROTO; c->reg.punt.ip_proto.protocol = proto; @@ -227,7 +240,8 @@ punt_socket_register_exception (vlib_main_t * vm, punt_client_exception_db_add (reason, pc - pm->punt_client_pool); } - memcpy (pc->caddr.sun_path, client_pathname, sizeof (pc->caddr.sun_path)); + snprintf (pc->caddr.sun_path, sizeof (pc->caddr.sun_path), "%s", + client_pathname); pc->caddr.sun_family = AF_UNIX; pc->reg.type = PUNT_TYPE_EXCEPTION; pc->reg.punt.exception.reason = reason; @@ -369,6 +383,8 @@ punt_l4_add_del (vlib_main_t * vm, ip_address_family_t af, ip_protocol_t protocol, u16 port, bool is_add) { + int is_ip4 = af == AF_IP4; + /* For now we only support TCP and UDP punt */ if (protocol != IP_PROTOCOL_UDP && protocol != IP_PROTOCOL_TCP) return clib_error_return (0, @@ -378,19 +394,22 @@ punt_l4_add_del (vlib_main_t * vm, if (port == (u16) ~ 0) { if (protocol == IP_PROTOCOL_UDP) - udp_punt_unknown (vm, af == AF_IP4, is_add); + udp_punt_unknown (vm, is_ip4, is_add); else if (protocol == IP_PROTOCOL_TCP) - tcp_punt_unknown (vm, af == AF_IP4, is_add); + tcp_punt_unknown (vm, is_ip4, is_add); return 0; } else if (is_add) { + const vlib_node_registration_t *punt_node = + is_ip4 ? &udp4_punt_node : &udp6_punt_node; + if (protocol == IP_PROTOCOL_TCP) return clib_error_return (0, "punt TCP ports is not supported yet"); - udp_register_dst_port (vm, port, udp4_punt_node.index, af == AF_IP4); + udp_register_dst_port (vm, port, punt_node->index, is_ip4); return 0; } @@ -399,7 +418,7 @@ punt_l4_add_del (vlib_main_t * vm, if (protocol == IP_PROTOCOL_TCP) return clib_error_return (0, "punt TCP ports is not supported yet"); - udp_unregister_dst_port (vm, port, af == AF_IP4); + udp_unregister_dst_port (vm, port, is_ip4); return 0; } @@ -455,7 +474,6 @@ punt_cli (vlib_main_t * vm, unformat_input_t line_input, *input = &line_input; clib_error_t *error = NULL; bool is_add = true; - /* *INDENT-OFF* */ punt_reg_t pr = { .punt = { .l4 = { @@ -467,7 +485,6 @@ punt_cli (vlib_main_t * vm, .type = PUNT_TYPE_L4, }; u32 port; - /* *INDENT-ON* */ if (!unformat_user (input__, unformat_line_input, input)) return 0; @@ -533,13 +550,11 @@ done: * @cliexcmd{set punt udp del all} * @endparblock ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (punt_command, static) = { .path = "set punt", .short_help = "set punt [IPV4|ip6|ipv6] [UDP|tcp] [del] [ALL|<port-num>]", .function = punt_cli, }; -/* *INDENT-ON* */ static clib_error_t * punt_socket_register_cmd (vlib_main_t * vm, @@ -549,7 +564,6 @@ punt_socket_register_cmd (vlib_main_t * vm, unformat_input_t line_input, *input = &line_input; u8 *socket_name = 0; clib_error_t *error = NULL; - /* *INDENT-OFF* */ punt_reg_t pr = { .punt = { .l4 = { @@ -560,7 +574,6 @@ punt_socket_register_cmd (vlib_main_t * vm, }, .type = PUNT_TYPE_L4, }; - /* *INDENT-ON* */ if (!unformat_user (input__, unformat_line_input, input)) return 0; @@ -608,7 +621,6 @@ done: * @cliexcmd{punt socket register socket punt_l4_foo.sock} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (punt_socket_register_command, static) = { .path = "punt socket register", @@ -616,7 +628,6 @@ VLIB_CLI_COMMAND (punt_socket_register_command, static) = .short_help = "punt socket register [IPV4|ipv6] [UDP|tcp] [ALL|<port-num>] socket <socket>", .is_mp_safe = 1, }; -/* *INDENT-ON* */ static clib_error_t * punt_socket_deregister_cmd (vlib_main_t * vm, @@ -625,7 +636,6 @@ punt_socket_deregister_cmd (vlib_main_t * vm, { unformat_input_t line_input, *input = &line_input; clib_error_t *error = NULL; - /* *INDENT-OFF* */ punt_reg_t pr = { .punt = { .l4 = { @@ -636,7 +646,6 @@ punt_socket_deregister_cmd (vlib_main_t * vm, }, .type = PUNT_TYPE_L4, }; - /* *INDENT-ON* */ if (!unformat_user (input__, unformat_line_input, input)) return 0; @@ -677,7 +686,6 @@ done: * @cliexpar * @cliexcmd{punt socket register} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (punt_socket_deregister_command, static) = { .path = "punt socket deregister", @@ -685,7 +693,6 @@ VLIB_CLI_COMMAND (punt_socket_deregister_command, static) = .short_help = "punt socket deregister [IPV4|ipv6] [UDP|tcp] [ALL|<port-num>]", .is_mp_safe = 1, }; -/* *INDENT-ON* */ void punt_client_walk (punt_type_t pt, punt_client_walk_cb_t cb, void *ctx) @@ -698,24 +705,20 @@ punt_client_walk (punt_type_t pt, punt_client_walk_cb_t cb, void *ctx) { u32 pci, key; - /* *INDENT-OFF* */ hash_foreach(key, pci, pm->db.clients_by_l4_port, ({ cb (pool_elt_at_index(pm->punt_client_pool, pci), ctx); })); - /* *INDENT-ON* */ break; } case PUNT_TYPE_IP_PROTO: { u32 pci, key; - /* *INDENT-OFF* */ hash_foreach(key, pci, pm->db.clients_by_ip_proto, ({ cb (pool_elt_at_index(pm->punt_client_pool, pci), ctx); })); - /* *INDENT-ON* */ break; } case PUNT_TYPE_EXCEPTION: @@ -813,7 +816,6 @@ done: * @cliexpar * @cliexcmd{show punt socket ipv4} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_punt_socket_registration_command, static) = { .path = "show punt socket registrations", @@ -821,7 +823,6 @@ VLIB_CLI_COMMAND (show_punt_socket_registration_command, static) = .short_help = "show punt socket registrations [l4|exception]", .is_mp_safe = 1, }; -/* *INDENT-ON* */ clib_error_t * ip_punt_init (vlib_main_t * vm) diff --git a/src/vnet/ip/punt.h b/src/vnet/ip/punt.h index a2612d60f07..e8495caad61 100644 --- a/src/vnet/ip/punt.h +++ b/src/vnet/ip/punt.h @@ -20,7 +20,12 @@ #ifndef included_punt_h #define included_punt_h +#ifdef __linux__ #include <linux/un.h> +#elif __FreeBSD__ +#include <sys/un.h> +#define UNIX_PATH_MAX SUNPATHLEN +#endif /* __linux__ */ #include <stdbool.h> #include <vnet/ip/ip.h> @@ -239,6 +244,7 @@ extern vlib_node_registration_t udp4_punt_node; extern vlib_node_registration_t udp6_punt_node; extern vlib_node_registration_t udp4_punt_socket_node; extern vlib_node_registration_t udp6_punt_socket_node; +extern vlib_node_registration_t icmp6_punt_socket_node; extern vlib_node_registration_t ip4_proto_punt_socket_node; extern vlib_node_registration_t ip6_proto_punt_socket_node; extern vlib_node_registration_t punt_socket_rx_node; diff --git a/src/vnet/ip/punt_api.c b/src/vnet/ip/punt_api.c index bcbf939f69d..20297af2e75 100644 --- a/src/vnet/ip/punt_api.c +++ b/src/vnet/ip/punt_api.c @@ -224,12 +224,10 @@ vl_api_punt_socket_register_t_handler (vl_api_punt_socket_register_t * mp) char *p = vnet_punt_get_server_pathname (); - /* *INDENT-OFF* */ REPLY_MACRO2 (VL_API_PUNT_SOCKET_REGISTER_REPLY, ({ memcpy ((char *) rmp->pathname, p, sizeof (rmp->pathname)); })); - /* *INDENT-ON* */ } typedef struct punt_socket_send_ctx_t_ diff --git a/src/vnet/ip/punt_node.c b/src/vnet/ip/punt_node.c index 7f9beef0ffe..6400e49c626 100644 --- a/src/vnet/ip/punt_node.c +++ b/src/vnet/ip/punt_node.c @@ -23,6 +23,7 @@ */ #include <vnet/ip/ip.h> +#include <vnet/ethernet/ethernet.h> #include <vlib/vlib.h> #include <vnet/ip/punt.h> #include <vlib/unix/unix.h> @@ -182,7 +183,6 @@ VLIB_NODE_FN (udp6_punt_node) (vlib_main_t * vm, return udp46_punt_inline (vm, node, from_frame, 0 /* is_ip4 */ ); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (udp4_punt_node) = { .name = "ip4-udp-punt", /* Takes a vector of packets. */ @@ -214,7 +214,6 @@ VLIB_REGISTER_NODE (udp6_punt_node) = { #undef _ }, }; -/* *INDENT-ON* */ typedef struct { @@ -243,10 +242,9 @@ format_udp_punt_trace (u8 * s, va_list * args) } always_inline uword -punt_socket_inline (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame, - punt_type_t pt, ip_address_family_t af) +punt_socket_inline2 (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame, punt_type_t pt, + ip_address_family_t af, ip_protocol_t protocol) { u32 *buffers = vlib_frame_vector_args (frame); u32 thread_index = vm->thread_index; @@ -266,33 +264,42 @@ punt_socket_inline (vlib_main_t * vm, uword l; punt_packetdesc_t packetdesc; punt_client_t *c; - + u16 port = 0; b = vlib_get_buffer (vm, buffers[i]); if (PUNT_TYPE_L4 == pt) { - /* Reverse UDP Punt advance */ - udp_header_t *udp; - if (AF_IP4 == af) + if (protocol == IP_PROTOCOL_UDP) { - vlib_buffer_advance (b, -(sizeof (ip4_header_t) + - sizeof (udp_header_t))); - ip4_header_t *ip = vlib_buffer_get_current (b); - udp = (udp_header_t *) (ip + 1); + /* Reverse UDP Punt advance */ + udp_header_t *udp; + if (AF_IP4 == af) + { + vlib_buffer_advance ( + b, -(sizeof (ip4_header_t) + sizeof (udp_header_t))); + ip4_header_t *ip = vlib_buffer_get_current (b); + udp = (udp_header_t *) (ip + 1); + } + else + { + vlib_buffer_advance ( + b, -(sizeof (ip6_header_t) + sizeof (udp_header_t))); + ip6_header_t *ip = vlib_buffer_get_current (b); + udp = (udp_header_t *) (ip + 1); + } + port = clib_net_to_host_u16 (udp->dst_port); } - else + else if (protocol == IP_PROTOCOL_ICMP6) { - vlib_buffer_advance (b, -(sizeof (ip6_header_t) + - sizeof (udp_header_t))); ip6_header_t *ip = vlib_buffer_get_current (b); - udp = (udp_header_t *) (ip + 1); + icmp46_header_t *icmp = ip6_next_header (ip); + port = icmp->type; } - /* * Find registerered client * If no registered client, drop packet and count */ - c = punt_client_l4_get (af, clib_net_to_host_u16 (udp->dst_port)); + c = punt_client_l4_get (af, port); } else if (PUNT_TYPE_IP_PROTO == pt) { @@ -339,7 +346,7 @@ punt_socket_inline (vlib_main_t * vm, iov->iov_len = sizeof (packetdesc); /** VLIB buffer chain -> Unix iovec(s). */ - vlib_buffer_advance (b, -(sizeof (ethernet_header_t))); + vlib_buffer_advance (b, -ethernet_buffer_header_size (b)); vec_add2 (ptd->iovecs, iov, 1); iov->iov_base = b->data + b->current_data; iov->iov_len = l = b->current_length; @@ -396,6 +403,14 @@ error: return n_packets; } +always_inline uword +punt_socket_inline (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame, punt_type_t pt, + ip_address_family_t af) +{ + return punt_socket_inline2 (vm, node, frame, pt, af, IP_PROTOCOL_UDP); +} + static uword udp4_punt_socket (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * from_frame) @@ -427,6 +442,14 @@ ip6_proto_punt_socket (vlib_main_t * vm, } static uword +icmp6_punt_socket (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *from_frame) +{ + return punt_socket_inline2 (vm, node, from_frame, PUNT_TYPE_L4, AF_IP6, + IP_PROTOCOL_ICMP6); +} + +static uword exception_punt_socket (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * from_frame) { @@ -435,7 +458,6 @@ exception_punt_socket (vlib_main_t * vm, } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (udp4_punt_socket_node) = { .function = udp4_punt_socket, .name = "ip4-udp-punt-socket", @@ -483,7 +505,16 @@ VLIB_REGISTER_NODE (exception_punt_socket_node) = { .n_errors = PUNT_N_ERROR, .error_strings = punt_error_strings, }; -/* *INDENT-ON* */ +VLIB_REGISTER_NODE (icmp6_punt_socket_node) = { + .function = icmp6_punt_socket, + .name = "ip6-icmp-punt-socket", + .format_trace = format_udp_punt_trace, + .flags = VLIB_NODE_FLAG_IS_DROP, + .vector_size = sizeof (u32), + .n_errors = PUNT_N_ERROR, + .error_strings = punt_error_strings, +}; + typedef struct { @@ -614,7 +645,6 @@ punt_socket_rx (vlib_main_t * vm, return total_count; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (punt_socket_rx_node) = { .function = punt_socket_rx, @@ -633,7 +663,6 @@ VLIB_REGISTER_NODE (punt_socket_rx_node) = }, .format_trace = format_punt_trace, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/ip/reass/ip4_full_reass.c b/src/vnet/ip/reass/ip4_full_reass.c index d2069c0876c..bab7d479dcf 100644 --- a/src/vnet/ip/reass/ip4_full_reass.c +++ b/src/vnet/ip/reass/ip4_full_reass.c @@ -23,16 +23,21 @@ #include <vppinfra/vec.h> #include <vnet/vnet.h> #include <vnet/ip/ip.h> +#include <vnet/ip/ip.api_enum.h> #include <vppinfra/fifo.h> #include <vppinfra/bihash_16_8.h> #include <vnet/ip/reass/ip4_full_reass.h> #include <stddef.h> #define MSEC_PER_SEC 1000 -#define IP4_REASS_TIMEOUT_DEFAULT_MS 100 -#define IP4_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS 10000 // 10 seconds default +#define IP4_REASS_TIMEOUT_DEFAULT_MS 200 + +/* As there are only 1024 reass context per thread, either the DDOS attacks + * or fractions of real timeouts, would consume these contexts quickly and + * running out context space and unable to perform reassembly */ +#define IP4_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS 50 // 50 ms default #define IP4_REASS_MAX_REASSEMBLIES_DEFAULT 1024 -#define IP4_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT 3 +#define IP4_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT 3 #define IP4_REASS_HT_LOAD_FACTOR (0.75) #define IP4_REASS_DEBUG_BUFFERS 0 @@ -68,21 +73,19 @@ typedef enum typedef struct { - union + struct { - struct - { - u32 xx_id; - ip4_address_t src; - ip4_address_t dst; - u16 frag_id; - u8 proto; - u8 unused; - }; - u64 as_u64[2]; + u16 frag_id; + u8 proto; + u8 unused; + u32 fib_index; + ip4_address_t src; + ip4_address_t dst; }; } ip4_full_reass_key_t; +STATIC_ASSERT_SIZEOF (ip4_full_reass_key_t, 16); + typedef union { struct @@ -155,6 +158,8 @@ typedef struct ip4_full_reass_t *pool; u32 reass_n; u32 id_counter; + // for pacing the main thread timeouts + u32 last_id; clib_spinlock_t lock; } ip4_full_reass_per_thread_t; @@ -177,17 +182,19 @@ typedef struct // convenience vlib_main_t *vlib_main; - // node index of ip4-drop node - u32 ip4_drop_idx; u32 ip4_full_reass_expire_node_idx; /** Worker handoff */ u32 fq_index; + u32 fq_local_index; u32 fq_feature_index; u32 fq_custom_index; // reference count for enabling/disabling feature - per interface u32 *feature_use_refcount_per_intf; + + // whether local fragmented packets are reassembled or not + int is_local_reass_enabled; } ip4_full_reass_main_t; extern ip4_full_reass_main_t ip4_full_reass_main; @@ -219,6 +226,7 @@ typedef enum RANGE_OVERLAP, FINALIZE, HANDOFF, + PASSTHROUGH, } ip4_full_reass_trace_operation_e; typedef struct @@ -329,6 +337,9 @@ format_ip4_full_reass_trace (u8 * s, va_list * args) format (s, "handoff from thread #%u to thread #%u", t->thread_id, t->thread_id_to); break; + case PASSTHROUGH: + s = format (s, "passthrough - not a fragment"); + break; } return s; } @@ -404,13 +415,16 @@ ip4_full_reass_free (ip4_full_reass_main_t * rm, ip4_full_reass_per_thread_t * rt, ip4_full_reass_t * reass) { - clib_bihash_kv_16_8_t kv; - kv.key[0] = reass->key.as_u64[0]; - kv.key[1] = reass->key.as_u64[1]; + clib_bihash_kv_16_8_t kv = {}; + clib_memcpy_fast (&kv, &reass->key, sizeof (kv.key)); clib_bihash_add_del_16_8 (&rm->hash, &kv, 0); return ip4_full_reass_free_ctx (rt, reass); } +/* n_left_to_next, and to_next are taken as input params, as this function + * could be called from a graphnode, where its managing local copy of these + * variables, and ignoring those and still trying to enqueue the buffers + * with local variables would cause either buffer leak or corruption */ always_inline void ip4_full_reass_drop_all (vlib_main_t *vm, vlib_node_runtime_t *node, ip4_full_reass_t *reass) @@ -419,58 +433,103 @@ ip4_full_reass_drop_all (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_buffer_t *range_b; vnet_buffer_opaque_t *range_vnb; u32 *to_free = NULL; + while (~0 != range_bi) { range_b = vlib_get_buffer (vm, range_bi); range_vnb = vnet_buffer (range_b); - u32 bi = range_bi; - while (~0 != bi) + + if (~0 != range_bi) { - vec_add1 (to_free, bi); - vlib_buffer_t *b = vlib_get_buffer (vm, bi); - if (b->flags & VLIB_BUFFER_NEXT_PRESENT) - { - bi = b->next_buffer; - b->flags &= ~VLIB_BUFFER_NEXT_PRESENT; - } - else - { - bi = ~0; - } + vec_add1 (to_free, range_bi); } + range_bi = range_vnb->ip.reass.next_range_bi; } + /* send to next_error_index */ - if (~0 != reass->error_next_index) + if (~0 != reass->error_next_index && + reass->error_next_index < node->n_next_nodes) { - u32 n_left_to_next, *to_next, next_index; + u32 n_free = vec_len (to_free); + + /* record number of packets sent to custom app */ + vlib_node_increment_counter (vm, node->node_index, + IP4_ERROR_REASS_TO_CUSTOM_APP, n_free); + + if (node->flags & VLIB_NODE_FLAG_TRACE) + for (u32 i = 0; i < n_free; i++) + { + vlib_buffer_t *b = vlib_get_buffer (vm, to_free[i]); + if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED)) + ip4_full_reass_add_trace (vm, node, reass, to_free[i], + RANGE_DISCARD, 0, ~0); + } - next_index = reass->error_next_index; - u32 bi = ~0; + vlib_buffer_enqueue_to_single_next (vm, node, to_free, + reass->error_next_index, n_free); + } + else + { + vlib_buffer_free (vm, to_free, vec_len (to_free)); + } + vec_free (to_free); +} - while (vec_len (to_free) > 0) - { - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); +always_inline void +sanitize_reass_buffers_add_missing (vlib_main_t *vm, ip4_full_reass_t *reass, + u32 *bi0) +{ + u32 range_bi = reass->first_bi; + vlib_buffer_t *range_b; + vnet_buffer_opaque_t *range_vnb; - while (vec_len (to_free) > 0 && n_left_to_next > 0) + while (~0 != range_bi) + { + range_b = vlib_get_buffer (vm, range_bi); + range_vnb = vnet_buffer (range_b); + u32 bi = range_bi; + if (~0 != bi) + { + if (bi == *bi0) + *bi0 = ~0; + if (range_b->flags & VLIB_BUFFER_NEXT_PRESENT) { - bi = vec_pop (to_free); - - if (~0 != bi) + u32 _bi = bi; + vlib_buffer_t *_b = vlib_get_buffer (vm, _bi); + while (_b->flags & VLIB_BUFFER_NEXT_PRESENT) { - to_next[0] = bi; - to_next += 1; - n_left_to_next -= 1; + if (_b->next_buffer != range_vnb->ip.reass.next_range_bi) + { + _bi = _b->next_buffer; + _b = vlib_get_buffer (vm, _bi); + } + else + { + _b->flags &= ~VLIB_BUFFER_NEXT_PRESENT; + break; + } } } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); + range_bi = range_vnb->ip.reass.next_range_bi; } } - else + if (*bi0 != ~0) { - vlib_buffer_free (vm, to_free, vec_len (to_free)); + vlib_buffer_t *fb = vlib_get_buffer (vm, *bi0); + vnet_buffer_opaque_t *fvnb = vnet_buffer (fb); + if (~0 != reass->first_bi) + { + fvnb->ip.reass.next_range_bi = reass->first_bi; + reass->first_bi = *bi0; + } + else + { + reass->first_bi = *bi0; + fvnb->ip.reass.next_range_bi = ~0; + } + *bi0 = ~0; } - vec_free (to_free); } always_inline void @@ -484,10 +543,10 @@ ip4_full_reass_init (ip4_full_reass_t * reass) } always_inline ip4_full_reass_t * -ip4_full_reass_find_or_create (vlib_main_t * vm, vlib_node_runtime_t * node, - ip4_full_reass_main_t * rm, - ip4_full_reass_per_thread_t * rt, - ip4_full_reass_kv_t * kv, u8 * do_handoff) +ip4_full_reass_find_or_create (vlib_main_t *vm, vlib_node_runtime_t *node, + ip4_full_reass_main_t *rm, + ip4_full_reass_per_thread_t *rt, + ip4_full_reass_kv_t *kv, u8 *do_handoff) { ip4_full_reass_t *reass; f64 now; @@ -510,6 +569,8 @@ again: if (now > reass->last_heard + rm->timeout) { + vlib_node_increment_counter (vm, node->node_index, + IP4_ERROR_REASS_TIMEOUT, 1); ip4_full_reass_drop_all (vm, node, reass); ip4_full_reass_free (rm, rt, reass); reass = NULL; @@ -538,8 +599,7 @@ again: ++rt->reass_n; } - reass->key.as_u64[0] = kv->kv.key[0]; - reass->key.as_u64[1] = kv->kv.key[1]; + clib_memcpy_fast (&reass->key, &kv->kv.key, sizeof (reass->key)); kv->v.reass_index = (reass - rt->pool); kv->v.memory_owner_thread_index = vm->thread_index; reass->last_heard = now; @@ -568,7 +628,6 @@ ip4_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_buffer_t *last_b = NULL; u32 sub_chain_bi = reass->first_bi; u32 total_length = 0; - u32 buf_cnt = 0; do { u32 tmp_bi = sub_chain_bi; @@ -605,7 +664,6 @@ ip4_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_buffer_length_in_chain (vm, tmp) - trim_front - trim_end; while (1) { - ++buf_cnt; if (trim_front) { if (trim_front > tmp->current_length) @@ -755,6 +813,16 @@ ip4_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node, *next0 = reass->next_index; } vnet_buffer (first_b)->ip.reass.estimated_mtu = reass->min_fragment_length; + + /* Keep track of number of successfully reassembled packets and number of + * fragments reassembled */ + vlib_node_increment_counter (vm, node->node_index, IP4_ERROR_REASS_SUCCESS, + 1); + + vlib_node_increment_counter (vm, node->node_index, + IP4_ERROR_REASS_FRAGMENTS_REASSEMBLED, + reass->fragments_n); + *error0 = IP4_ERROR_NONE; ip4_full_reass_free (rm, rt, reass); reass = NULL; @@ -1090,199 +1158,216 @@ ip4_full_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node, } always_inline uword -ip4_full_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_frame_t * frame, ip4_full_reass_node_type_t type) +ip4_full_reass_inline (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame, ip4_full_reass_node_type_t type, + bool is_local) { u32 *from = vlib_frame_vector_args (frame); - u32 n_left_from, n_left_to_next, *to_next, next_index; + u32 n_left, n_next = 0, to_next[VLIB_FRAME_SIZE]; ip4_full_reass_main_t *rm = &ip4_full_reass_main; ip4_full_reass_per_thread_t *rt = &rm->per_thread_data[vm->thread_index]; + u16 nexts[VLIB_FRAME_SIZE]; + clib_spinlock_lock (&rt->lock); - n_left_from = frame->n_vectors; - next_index = node->cached_next_index; - while (n_left_from > 0) + n_left = frame->n_vectors; + while (n_left > 0) { - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0; - vlib_buffer_t *b0; - u32 next0; - u32 error0 = IP4_ERROR_NONE; + u32 bi0; + vlib_buffer_t *b0; + u32 next0; + u32 error0 = IP4_ERROR_NONE; - bi0 = from[0]; - b0 = vlib_get_buffer (vm, bi0); + bi0 = from[0]; + b0 = vlib_get_buffer (vm, bi0); - ip4_header_t *ip0 = vlib_buffer_get_current (b0); - if (!ip4_get_fragment_more (ip0) && !ip4_get_fragment_offset (ip0)) + ip4_header_t *ip0 = vlib_buffer_get_current (b0); + if (!ip4_get_fragment_more (ip0) && !ip4_get_fragment_offset (ip0)) + { + // this is a whole packet - no fragmentation + if (CUSTOM != type) { - // this is a whole packet - no fragmentation - if (CUSTOM != type) - { - next0 = IP4_FULL_REASS_NEXT_INPUT; - } - else - { - next0 = vnet_buffer (b0)->ip.reass.next_index; - } - goto packet_enqueue; + next0 = IP4_FULL_REASS_NEXT_INPUT; } - const u32 fragment_first = ip4_get_fragment_offset_bytes (ip0); - const u32 fragment_length = - clib_net_to_host_u16 (ip0->length) - ip4_header_bytes (ip0); - const u32 fragment_last = fragment_first + fragment_length - 1; - if (fragment_first > fragment_last || fragment_first + fragment_length > UINT16_MAX - 20 || (fragment_length < 8 && ip4_get_fragment_more (ip0))) // 8 is minimum frag length per RFC 791 + else { - next0 = IP4_FULL_REASS_NEXT_DROP; - error0 = IP4_ERROR_REASS_MALFORMED_PACKET; - goto packet_enqueue; + next0 = vnet_buffer (b0)->ip.reass.next_index; } - ip4_full_reass_kv_t kv; - u8 do_handoff = 0; - - kv.k.as_u64[0] = - (u64) vec_elt (ip4_main.fib_index_by_sw_if_index, - vnet_buffer (b0)->sw_if_index[VLIB_RX]) | - (u64) ip0->src_address.as_u32 << 32; - kv.k.as_u64[1] = - (u64) ip0->dst_address. - as_u32 | (u64) ip0->fragment_id << 32 | (u64) ip0->protocol << 48; - - ip4_full_reass_t *reass = - ip4_full_reass_find_or_create (vm, node, rm, rt, &kv, - &do_handoff); - - if (reass) + ip4_full_reass_add_trace (vm, node, NULL, bi0, PASSTHROUGH, 0, ~0); + goto packet_enqueue; + } + + if (is_local && !rm->is_local_reass_enabled) + { + next0 = IP4_FULL_REASS_NEXT_DROP; + goto packet_enqueue; + } + + const u32 fragment_first = ip4_get_fragment_offset_bytes (ip0); + const u32 fragment_length = + clib_net_to_host_u16 (ip0->length) - ip4_header_bytes (ip0); + const u32 fragment_last = fragment_first + fragment_length - 1; + + /* Keep track of received fragments */ + vlib_node_increment_counter (vm, node->node_index, + IP4_ERROR_REASS_FRAGMENTS_RCVD, 1); + + if (fragment_first > fragment_last || + fragment_first + fragment_length > UINT16_MAX - 20 || + (fragment_length < 8 && // 8 is minimum frag length per RFC 791 + ip4_get_fragment_more (ip0))) + { + next0 = IP4_FULL_REASS_NEXT_DROP; + error0 = IP4_ERROR_REASS_MALFORMED_PACKET; + goto packet_enqueue; + } + + u32 fib_index = (vnet_buffer (b0)->sw_if_index[VLIB_TX] == (u32) ~0) ? + vec_elt (ip4_main.fib_index_by_sw_if_index, + vnet_buffer (b0)->sw_if_index[VLIB_RX]) : + vnet_buffer (b0)->sw_if_index[VLIB_TX]; + + ip4_full_reass_kv_t kv = { .k.fib_index = fib_index, + .k.src.as_u32 = ip0->src_address.as_u32, + .k.dst.as_u32 = ip0->dst_address.as_u32, + .k.frag_id = ip0->fragment_id, + .k.proto = ip0->protocol + + }; + u8 do_handoff = 0; + + ip4_full_reass_t *reass = + ip4_full_reass_find_or_create (vm, node, rm, rt, &kv, &do_handoff); + + if (reass) + { + const u32 fragment_first = ip4_get_fragment_offset_bytes (ip0); + if (0 == fragment_first) { - const u32 fragment_first = ip4_get_fragment_offset_bytes (ip0); - if (0 == fragment_first) - { - reass->sendout_thread_index = vm->thread_index; - } + reass->sendout_thread_index = vm->thread_index; } + } - if (PREDICT_FALSE (do_handoff)) + if (PREDICT_FALSE (do_handoff)) + { + next0 = IP4_FULL_REASS_NEXT_HANDOFF; + vnet_buffer (b0)->ip.reass.owner_thread_index = + kv.v.memory_owner_thread_index; + } + else if (reass) + { + u32 handoff_thread_idx; + u32 counter = ~0; + switch (ip4_full_reass_update (vm, node, rm, rt, reass, &bi0, &next0, + &error0, CUSTOM == type, + &handoff_thread_idx)) { + case IP4_REASS_RC_OK: + /* nothing to do here */ + break; + case IP4_REASS_RC_HANDOFF: next0 = IP4_FULL_REASS_NEXT_HANDOFF; + b0 = vlib_get_buffer (vm, bi0); vnet_buffer (b0)->ip.reass.owner_thread_index = - kv.v.memory_owner_thread_index; - } - else if (reass) - { - u32 handoff_thread_idx; - switch (ip4_full_reass_update - (vm, node, rm, rt, reass, &bi0, &next0, - &error0, CUSTOM == type, &handoff_thread_idx)) - { - case IP4_REASS_RC_OK: - /* nothing to do here */ - break; - case IP4_REASS_RC_HANDOFF: - next0 = IP4_FULL_REASS_NEXT_HANDOFF; - b0 = vlib_get_buffer (vm, bi0); - vnet_buffer (b0)->ip.reass.owner_thread_index = - handoff_thread_idx; - break; - case IP4_REASS_RC_TOO_MANY_FRAGMENTS: - vlib_node_increment_counter (vm, node->node_index, - IP4_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG, - 1); - ip4_full_reass_drop_all (vm, node, reass); - ip4_full_reass_free (rm, rt, reass); - goto next_packet; - break; - case IP4_REASS_RC_NO_BUF: - vlib_node_increment_counter (vm, node->node_index, - IP4_ERROR_REASS_NO_BUF, 1); - ip4_full_reass_drop_all (vm, node, reass); - ip4_full_reass_free (rm, rt, reass); - goto next_packet; - break; - case IP4_REASS_RC_INTERNAL_ERROR: - /* drop everything and start with a clean slate */ - vlib_node_increment_counter (vm, node->node_index, - IP4_ERROR_REASS_INTERNAL_ERROR, - 1); - ip4_full_reass_drop_all (vm, node, reass); - ip4_full_reass_free (rm, rt, reass); - goto next_packet; - break; - } + handoff_thread_idx; + break; + case IP4_REASS_RC_TOO_MANY_FRAGMENTS: + counter = IP4_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG; + break; + case IP4_REASS_RC_NO_BUF: + counter = IP4_ERROR_REASS_NO_BUF; + break; + case IP4_REASS_RC_INTERNAL_ERROR: + counter = IP4_ERROR_REASS_INTERNAL_ERROR; + /* Sanitization is needed in internal error cases only, as + * the incoming packet is already dropped in other cases, + * also adding bi0 back to the reassembly list, fixes the + * leaking of buffers during internal errors. + * + * Also it doesnt make sense to send these buffers custom + * app, these fragments are with internal errors */ + sanitize_reass_buffers_add_missing (vm, reass, &bi0); + reass->error_next_index = ~0; + break; } - else + + if (~0 != counter) { - next0 = IP4_FULL_REASS_NEXT_DROP; - error0 = IP4_ERROR_REASS_LIMIT_REACHED; + vlib_node_increment_counter (vm, node->node_index, counter, 1); + ip4_full_reass_drop_all (vm, node, reass); + ip4_full_reass_free (rm, rt, reass); + goto next_packet; } + } + else + { + next0 = IP4_FULL_REASS_NEXT_DROP; + error0 = IP4_ERROR_REASS_LIMIT_REACHED; + } + packet_enqueue: - packet_enqueue: - - if (bi0 != ~0) + if (bi0 != ~0) + { + /* bi0 might have been updated by reass_finalize, reload */ + b0 = vlib_get_buffer (vm, bi0); + if (IP4_ERROR_NONE != error0) { - to_next[0] = bi0; - to_next += 1; - n_left_to_next -= 1; + b0->error = node->errors[error0]; + } - /* bi0 might have been updated by reass_finalize, reload */ - b0 = vlib_get_buffer (vm, bi0); - if (IP4_ERROR_NONE != error0) + if (next0 == IP4_FULL_REASS_NEXT_HANDOFF) + { + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { - b0->error = node->errors[error0]; + ip4_full_reass_add_trace ( + vm, node, NULL, bi0, HANDOFF, 0, + vnet_buffer (b0)->ip.reass.owner_thread_index); } + } + else if (FEATURE == type && IP4_ERROR_NONE == error0) + { + vnet_feature_next (&next0, b0); + } - if (next0 == IP4_FULL_REASS_NEXT_HANDOFF) - { - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - ip4_full_reass_add_trace ( - vm, node, NULL, bi0, HANDOFF, 0, - vnet_buffer (b0)->ip.reass.owner_thread_index); - } - } - else if (FEATURE == type && IP4_ERROR_NONE == error0) - { - vnet_feature_next (&next0, b0); - } - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi0, next0); - IP4_REASS_DEBUG_BUFFER (bi0, enqueue_next); + /* Increment the counter to-custom-app also as this fragment is + * also going to application */ + if (CUSTOM == type) + { + vlib_node_increment_counter (vm, node->node_index, + IP4_ERROR_REASS_TO_CUSTOM_APP, 1); } - next_packet: - from += 1; - n_left_from -= 1; + to_next[n_next] = bi0; + nexts[n_next] = next0; + n_next++; + IP4_REASS_DEBUG_BUFFER (bi0, enqueue_next); } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); + next_packet: + from += 1; + n_left -= 1; } clib_spinlock_unlock (&rt->lock); + + vlib_buffer_enqueue_to_next (vm, node, to_next, nexts, n_next); return frame->n_vectors; } -static char *ip4_full_reass_error_strings[] = { -#define _(sym, string) string, - foreach_ip4_error -#undef _ -}; - VLIB_NODE_FN (ip4_full_reass_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return ip4_full_reass_inline (vm, node, frame, NORMAL); + return ip4_full_reass_inline (vm, node, frame, NORMAL, false /* is_local */); } VLIB_REGISTER_NODE (ip4_full_reass_node) = { .name = "ip4-full-reassembly", .vector_size = sizeof (u32), .format_trace = format_ip4_full_reass_trace, - .n_errors = ARRAY_LEN (ip4_full_reass_error_strings), - .error_strings = ip4_full_reass_error_strings, + .n_errors = IP4_N_ERROR, + .error_counters = ip4_error_counters, .n_next_nodes = IP4_FULL_REASS_N_NEXT, .next_nodes = { @@ -1293,19 +1378,42 @@ VLIB_REGISTER_NODE (ip4_full_reass_node) = { }, }; +VLIB_NODE_FN (ip4_local_full_reass_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return ip4_full_reass_inline (vm, node, frame, NORMAL, true /* is_local */); +} + +VLIB_REGISTER_NODE (ip4_local_full_reass_node) = { + .name = "ip4-local-full-reassembly", + .vector_size = sizeof (u32), + .format_trace = format_ip4_full_reass_trace, + .n_errors = IP4_N_ERROR, + .error_counters = ip4_error_counters, + .n_next_nodes = IP4_FULL_REASS_N_NEXT, + .next_nodes = + { + [IP4_FULL_REASS_NEXT_INPUT] = "ip4-input", + [IP4_FULL_REASS_NEXT_DROP] = "ip4-drop", + [IP4_FULL_REASS_NEXT_HANDOFF] = "ip4-local-full-reassembly-handoff", + + }, +}; + VLIB_NODE_FN (ip4_full_reass_node_feature) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return ip4_full_reass_inline (vm, node, frame, FEATURE); + return ip4_full_reass_inline (vm, node, frame, FEATURE, + false /* is_local */); } VLIB_REGISTER_NODE (ip4_full_reass_node_feature) = { .name = "ip4-full-reassembly-feature", .vector_size = sizeof (u32), .format_trace = format_ip4_full_reass_trace, - .n_errors = ARRAY_LEN (ip4_full_reass_error_strings), - .error_strings = ip4_full_reass_error_strings, + .n_errors = IP4_N_ERROR, + .error_counters = ip4_error_counters, .n_next_nodes = IP4_FULL_REASS_N_NEXT, .next_nodes = { @@ -1316,26 +1424,26 @@ VLIB_REGISTER_NODE (ip4_full_reass_node_feature) = { }; VNET_FEATURE_INIT (ip4_full_reass_feature, static) = { - .arc_name = "ip4-unicast", - .node_name = "ip4-full-reassembly-feature", - .runs_before = VNET_FEATURES ("ip4-lookup", - "ipsec4-input-feature"), - .runs_after = 0, + .arc_name = "ip4-unicast", + .node_name = "ip4-full-reassembly-feature", + .runs_before = VNET_FEATURES ("ip4-lookup", "ipsec4-input-feature", + "ip4-sv-reassembly-feature"), + .runs_after = 0, }; VLIB_NODE_FN (ip4_full_reass_node_custom) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return ip4_full_reass_inline (vm, node, frame, CUSTOM); + return ip4_full_reass_inline (vm, node, frame, CUSTOM, false /* is_local */); } VLIB_REGISTER_NODE (ip4_full_reass_node_custom) = { .name = "ip4-full-reassembly-custom", .vector_size = sizeof (u32), .format_trace = format_ip4_full_reass_trace, - .n_errors = ARRAY_LEN (ip4_full_reass_error_strings), - .error_strings = ip4_full_reass_error_strings, + .n_errors = IP4_N_ERROR, + .error_counters = ip4_error_counters, .n_next_nodes = IP4_FULL_REASS_N_NEXT, .next_nodes = { @@ -1345,15 +1453,6 @@ VLIB_REGISTER_NODE (ip4_full_reass_node_custom) = { }, }; -VNET_FEATURE_INIT (ip4_full_reass_custom, static) = { - .arc_name = "ip4-unicast", - .node_name = "ip4-full-reassembly-feature", - .runs_before = VNET_FEATURES ("ip4-lookup", - "ipsec4-input-feature"), - .runs_after = 0, -}; - - #ifndef CLIB_MARCH_VARIANT uword ip4_full_reass_custom_register_next_node (uword node_index) @@ -1369,7 +1468,9 @@ ip4_full_reass_get_nbuckets () u32 nbuckets; u8 i; - nbuckets = (u32) (rm->max_reass_n / IP4_REASS_HT_LOAD_FACTOR); + /* need more mem with more workers */ + nbuckets = (u32) (rm->max_reass_n * (vlib_num_workers () + 1) / + IP4_REASS_HT_LOAD_FACTOR); for (i = 0; i < 31; i++) if ((1 << i) >= nbuckets) @@ -1495,17 +1596,17 @@ ip4_full_reass_init_function (vlib_main_t * vm) nbuckets = ip4_full_reass_get_nbuckets (); clib_bihash_init_16_8 (&rm->hash, "ip4-dr", nbuckets, nbuckets * 1024); - node = vlib_get_node_by_name (vm, (u8 *) "ip4-drop"); - ASSERT (node); - rm->ip4_drop_idx = node->index; - rm->fq_index = vlib_frame_queue_main_init (ip4_full_reass_node.index, 0); + rm->fq_local_index = + vlib_frame_queue_main_init (ip4_local_full_reass_node.index, 0); rm->fq_feature_index = vlib_frame_queue_main_init (ip4_full_reass_node_feature.index, 0); rm->fq_custom_index = vlib_frame_queue_main_init (ip4_full_reass_node_custom.index, 0); rm->feature_use_refcount_per_intf = NULL; + rm->is_local_reass_enabled = 1; + return error; } @@ -1547,6 +1648,7 @@ ip4_full_reass_walk_expired (vlib_main_t *vm, vlib_node_runtime_t *node, uword thread_index = 0; int index; const uword nthreads = vlib_num_workers () + 1; + for (thread_index = 0; thread_index < nthreads; ++thread_index) { ip4_full_reass_per_thread_t *rt = @@ -1554,13 +1656,39 @@ ip4_full_reass_walk_expired (vlib_main_t *vm, vlib_node_runtime_t *node, clib_spinlock_lock (&rt->lock); vec_reset_length (pool_indexes_to_free); - pool_foreach_index (index, rt->pool) { - reass = pool_elt_at_index (rt->pool, index); - if (now > reass->last_heard + rm->timeout) - { - vec_add1 (pool_indexes_to_free, index); - } - } + + /* Pace the number of timeouts handled per thread,to avoid barrier + * sync issues in real world scenarios */ + + u32 beg = rt->last_id; + /* to ensure we walk at least once per sec per context */ + u32 end = + beg + (IP4_REASS_MAX_REASSEMBLIES_DEFAULT * + IP4_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS / MSEC_PER_SEC + + 1); + if (end > vec_len (rt->pool)) + { + end = vec_len (rt->pool); + rt->last_id = 0; + } + else + { + rt->last_id = end; + } + + pool_foreach_stepping_index (index, beg, end, rt->pool) + { + reass = pool_elt_at_index (rt->pool, index); + if (now > reass->last_heard + rm->timeout) + { + vec_add1 (pool_indexes_to_free, index); + } + } + + if (vec_len (pool_indexes_to_free)) + vlib_node_increment_counter (vm, node->node_index, + IP4_ERROR_REASS_TIMEOUT, + vec_len (pool_indexes_to_free)); int *i; vec_foreach (i, pool_indexes_to_free) { @@ -1575,7 +1703,7 @@ ip4_full_reass_walk_expired (vlib_main_t *vm, vlib_node_runtime_t *node, vec_free (pool_indexes_to_free); if (event_data) { - _vec_len (event_data) = 0; + vec_set_len (event_data, 0); } } @@ -1583,13 +1711,12 @@ ip4_full_reass_walk_expired (vlib_main_t *vm, vlib_node_runtime_t *node, } VLIB_REGISTER_NODE (ip4_full_reass_expire_node) = { - .function = ip4_full_reass_walk_expired, - .type = VLIB_NODE_TYPE_PROCESS, - .name = "ip4-full-reassembly-expire-walk", - .format_trace = format_ip4_full_reass_trace, - .n_errors = ARRAY_LEN (ip4_full_reass_error_strings), - .error_strings = ip4_full_reass_error_strings, - + .function = ip4_full_reass_walk_expired, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "ip4-full-reassembly-expire-walk", + .format_trace = format_ip4_full_reass_trace, + .n_errors = IP4_N_ERROR, + .error_counters = ip4_error_counters, }; static u8 * @@ -1597,9 +1724,8 @@ format_ip4_full_reass_key (u8 * s, va_list * args) { ip4_full_reass_key_t *key = va_arg (*args, ip4_full_reass_key_t *); s = - format (s, - "xx_id: %u, src: %U, dst: %U, frag_id: %u, proto: %u", - key->xx_id, format_ip4_address, &key->src, format_ip4_address, + format (s, "fib_index: %u, src: %U, dst: %U, frag_id: %u, proto: %u", + key->fib_index, format_ip4_address, &key->src, format_ip4_address, &key->dst, clib_net_to_host_u16 (key->frag_id), key->proto); return s; } @@ -1750,10 +1876,10 @@ format_ip4_full_reass_handoff_trace (u8 * s, va_list * args) } always_inline uword -ip4_full_reass_handoff_node_inline (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame, - ip4_full_reass_node_type_t type) +ip4_full_reass_handoff_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame, + ip4_full_reass_node_type_t type, + bool is_local) { ip4_full_reass_main_t *rm = &ip4_full_reass_main; @@ -1772,7 +1898,14 @@ ip4_full_reass_handoff_node_inline (vlib_main_t * vm, switch (type) { case NORMAL: - fq_index = rm->fq_index; + if (is_local) + { + fq_index = rm->fq_local_index; + } + else + { + fq_index = rm->fq_index; + } break; case FEATURE: fq_index = rm->fq_feature_index; @@ -1782,7 +1915,6 @@ ip4_full_reass_handoff_node_inline (vlib_main_t * vm, break; default: clib_warning ("Unexpected `type' (%d)!", type); - ASSERT (0); } while (n_left_from > 0) @@ -1816,7 +1948,8 @@ VLIB_NODE_FN (ip4_full_reass_handoff_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return ip4_full_reass_handoff_node_inline (vm, node, frame, NORMAL); + return ip4_full_reass_handoff_node_inline (vm, node, frame, NORMAL, + false /* is_local */); } @@ -1834,16 +1967,36 @@ VLIB_REGISTER_NODE (ip4_full_reass_handoff_node) = { }, }; +VLIB_NODE_FN (ip4_local_full_reass_handoff_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return ip4_full_reass_handoff_node_inline (vm, node, frame, NORMAL, + true /* is_local */); +} + +VLIB_REGISTER_NODE (ip4_local_full_reass_handoff_node) = { + .name = "ip4-local-full-reassembly-handoff", + .vector_size = sizeof (u32), + .n_errors = ARRAY_LEN(ip4_full_reass_handoff_error_strings), + .error_strings = ip4_full_reass_handoff_error_strings, + .format_trace = format_ip4_full_reass_handoff_trace, + + .n_next_nodes = 1, + + .next_nodes = { + [0] = "error-drop", + }, +}; VLIB_NODE_FN (ip4_full_reass_feature_handoff_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return ip4_full_reass_handoff_node_inline (vm, node, frame, FEATURE); + return ip4_full_reass_handoff_node_inline (vm, node, frame, FEATURE, + false /* is_local */); } - VLIB_REGISTER_NODE (ip4_full_reass_feature_handoff_node) = { .name = "ip4-full-reass-feature-hoff", .vector_size = sizeof (u32), @@ -1863,10 +2016,10 @@ VLIB_NODE_FN (ip4_full_reass_custom_handoff_node) (vlib_main_t * vm, node, vlib_frame_t * frame) { - return ip4_full_reass_handoff_node_inline (vm, node, frame, CUSTOM); + return ip4_full_reass_handoff_node_inline (vm, node, frame, CUSTOM, + false /* is_local */); } - VLIB_REGISTER_NODE (ip4_full_reass_custom_handoff_node) = { .name = "ip4-full-reass-custom-hoff", .vector_size = sizeof (u32), @@ -1906,8 +2059,28 @@ ip4_full_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable) "ip4-full-reassembly-feature", sw_if_index, 0, 0, 0); } - return -1; + return 0; } + +void +ip4_local_full_reass_enable_disable (int enable) +{ + if (enable) + { + ip4_full_reass_main.is_local_reass_enabled = 1; + } + else + { + ip4_full_reass_main.is_local_reass_enabled = 0; + } +} + +int +ip4_local_full_reass_enabled () +{ + return ip4_full_reass_main.is_local_reass_enabled; +} + #endif /* diff --git a/src/vnet/ip/reass/ip4_full_reass.h b/src/vnet/ip/reass/ip4_full_reass.h index 000c80c5906..5df8107ca48 100644 --- a/src/vnet/ip/reass/ip4_full_reass.h +++ b/src/vnet/ip/reass/ip4_full_reass.h @@ -47,6 +47,9 @@ int ip4_full_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable); uword ip4_full_reass_custom_register_next_node (uword node_index); + +void ip4_local_full_reass_enable_disable (int enable); +int ip4_local_full_reass_enabled (); #endif /* __included_ip4_full_reass_h__ */ /* diff --git a/src/vnet/ip/reass/ip4_sv_reass.c b/src/vnet/ip/reass/ip4_sv_reass.c index cd5e19b65d3..7c3c2fff217 100644 --- a/src/vnet/ip/reass/ip4_sv_reass.c +++ b/src/vnet/ip/reass/ip4_sv_reass.c @@ -48,7 +48,7 @@ typedef struct { struct { - u32 xx_id; + u32 fib_index; ip4_address_t src; ip4_address_t dst; u16 frag_id; @@ -150,6 +150,7 @@ typedef struct /** Worker handoff */ u32 fq_index; u32 fq_feature_index; + u32 fq_custom_context_index; // reference count for enabling/disabling feature - per interface u32 *feature_use_refcount_per_intf; @@ -189,6 +190,7 @@ typedef struct u8 ip_proto; u16 l4_src_port; u16 l4_dst_port; + int l4_layer_truncated; } ip4_sv_reass_trace_t; extern vlib_node_registration_t ip4_sv_reass_node; @@ -225,6 +227,10 @@ format_ip4_sv_reass_trace (u8 * s, va_list * args) s = format (s, "[not-fragmented]"); break; } + if (t->l4_layer_truncated) + { + s = format (s, " [l4-layer-truncated]"); + } return s; } @@ -232,7 +238,8 @@ static void ip4_sv_reass_add_trace (vlib_main_t *vm, vlib_node_runtime_t *node, ip4_sv_reass_t *reass, u32 bi, ip4_sv_reass_trace_operation_e action, u32 ip_proto, - u16 l4_src_port, u16 l4_dst_port) + u16 l4_src_port, u16 l4_dst_port, + int l4_layer_truncated) { vlib_buffer_t *b = vlib_get_buffer (vm, bi); if (pool_is_free_index @@ -253,6 +260,7 @@ ip4_sv_reass_add_trace (vlib_main_t *vm, vlib_node_runtime_t *node, t->ip_proto = ip_proto; t->l4_src_port = l4_src_port; t->l4_dst_port = l4_dst_port; + t->l4_layer_truncated = l4_layer_truncated; #if 0 static u8 *s = NULL; s = format (s, "%U", format_ip4_sv_reass_trace, NULL, NULL, t); @@ -314,6 +322,8 @@ ip4_sv_reass_find_or_create (vlib_main_t * vm, ip4_sv_reass_main_t * rm, ip4_sv_reass_t *reass = NULL; f64 now = vlib_time_now (vm); +again: + if (!clib_bihash_search_16_8 (&rm->hash, &kv->kv, &kv->kv)) { if (vm->thread_index != kv->v.thread_index) @@ -368,10 +378,14 @@ ip4_sv_reass_find_or_create (vlib_main_t * vm, ip4_sv_reass_main_t * rm, kv->v.thread_index = vm->thread_index; reass->last_heard = now; - if (clib_bihash_add_del_16_8 (&rm->hash, &kv->kv, 1)) + int rv = clib_bihash_add_del_16_8 (&rm->hash, &kv->kv, 2); + if (rv) { ip4_sv_reass_free (vm, rm, rt, reass); reass = NULL; + // if other worker created a context already work with the other copy + if (-2 == rv) + goto again; } return reass; @@ -407,9 +421,10 @@ ip4_sv_reass_update (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { - ip4_sv_reass_add_trace (vm, node, reass, bi0, REASS_FINISH, - reass->ip_proto, reass->l4_src_port, - reass->l4_dst_port); + ip4_sv_reass_add_trace ( + vm, node, reass, bi0, REASS_FINISH, reass->ip_proto, + reass->l4_src_port, reass->l4_dst_port, + vnet_buffer (b0)->ip.reass.l4_layer_truncated); } } vec_add1 (reass->cached_buffers, bi0); @@ -417,8 +432,9 @@ ip4_sv_reass_update (vlib_main_t *vm, vlib_node_runtime_t *node, { if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { - ip4_sv_reass_add_trace (vm, node, reass, bi0, REASS_FRAGMENT_CACHE, - ~0, ~0, ~0); + ip4_sv_reass_add_trace ( + vm, node, reass, bi0, REASS_FRAGMENT_CACHE, ~0, ~0, ~0, + vnet_buffer (b0)->ip.reass.l4_layer_truncated); } if (vec_len (reass->cached_buffers) > rm->max_reass_len) { @@ -428,15 +444,33 @@ ip4_sv_reass_update (vlib_main_t *vm, vlib_node_runtime_t *node, return rc; } +always_inline int +l4_layer_truncated (ip4_header_t *ip) +{ + static const int l4_layer_length[256] = { + [IP_PROTOCOL_TCP] = sizeof (tcp_header_t), + [IP_PROTOCOL_UDP] = sizeof (udp_header_t), + [IP_PROTOCOL_ICMP] = sizeof (icmp46_header_t), + }; + + return ((u8 *) ip + ip4_header_bytes (ip) + l4_layer_length[ip->protocol] > + (u8 *) ip + clib_net_to_host_u16 (ip->length)); +} + always_inline uword -ip4_sv_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_frame_t * frame, bool is_feature, - bool is_output_feature, bool is_custom) +ip4_sv_reass_inline (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame, bool is_feature, + bool is_output_feature, bool is_custom, + bool with_custom_context) { u32 *from = vlib_frame_vector_args (frame); - u32 n_left_from, n_left_to_next, *to_next, next_index; + u32 n_left_from, n_left_to_next, *to_next, *to_next_aux, next_index; ip4_sv_reass_main_t *rm = &ip4_sv_reass_main; ip4_sv_reass_per_thread_t *rt = &rm->per_thread_data[vm->thread_index]; + u32 *context; + if (with_custom_context) + context = vlib_frame_aux_args (frame); + clib_spinlock_lock (&rt->lock); n_left_from = frame->n_vectors; @@ -482,6 +516,7 @@ ip4_sv_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node, (is_output_feature ? 1 : 0) * vnet_buffer (b1)-> ip.save_rewrite_length); + if (PREDICT_FALSE (ip4_get_fragment_more (ip0) || ip4_get_fragment_offset (ip0)) || (ip4_get_fragment_more (ip1) || ip4_get_fragment_offset (ip1))) @@ -506,29 +541,40 @@ ip4_sv_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node, } vnet_buffer (b0)->ip.reass.is_non_first_fragment = 0; vnet_buffer (b0)->ip.reass.ip_proto = ip0->protocol; - if (IP_PROTOCOL_TCP == ip0->protocol) + if (l4_layer_truncated (ip0)) { - vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags = - ((tcp_header_t *) (ip0 + 1))->flags; - vnet_buffer (b0)->ip.reass.tcp_ack_number = - ((tcp_header_t *) (ip0 + 1))->ack_number; - vnet_buffer (b0)->ip.reass.tcp_seq_number = - ((tcp_header_t *) (ip0 + 1))->seq_number; + vnet_buffer (b0)->ip.reass.l4_layer_truncated = 1; + vnet_buffer (b0)->ip.reass.l4_src_port = 0; + vnet_buffer (b0)->ip.reass.l4_dst_port = 0; } - else if (IP_PROTOCOL_ICMP == ip0->protocol) + else { - vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags = - ((icmp46_header_t *) (ip0 + 1))->type; + vnet_buffer (b0)->ip.reass.l4_layer_truncated = 0; + if (IP_PROTOCOL_TCP == ip0->protocol) + { + vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags = + ((tcp_header_t *) (ip0 + 1))->flags; + vnet_buffer (b0)->ip.reass.tcp_ack_number = + ((tcp_header_t *) (ip0 + 1))->ack_number; + vnet_buffer (b0)->ip.reass.tcp_seq_number = + ((tcp_header_t *) (ip0 + 1))->seq_number; + } + else if (IP_PROTOCOL_ICMP == ip0->protocol) + { + vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags = + ((icmp46_header_t *) (ip0 + 1))->type; + } + vnet_buffer (b0)->ip.reass.l4_src_port = ip4_get_port (ip0, 1); + vnet_buffer (b0)->ip.reass.l4_dst_port = ip4_get_port (ip0, 0); } - vnet_buffer (b0)->ip.reass.l4_src_port = ip4_get_port (ip0, 1); - vnet_buffer (b0)->ip.reass.l4_dst_port = ip4_get_port (ip0, 0); if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { - ip4_sv_reass_add_trace (vm, node, NULL, from[(b - 2) - bufs], - REASS_PASSTHROUGH, - vnet_buffer (b0)->ip.reass.ip_proto, - vnet_buffer (b0)->ip.reass.l4_src_port, - vnet_buffer (b0)->ip.reass.l4_dst_port); + ip4_sv_reass_add_trace ( + vm, node, NULL, from[(b - 2) - bufs], REASS_PASSTHROUGH, + vnet_buffer (b0)->ip.reass.ip_proto, + vnet_buffer (b0)->ip.reass.l4_src_port, + vnet_buffer (b0)->ip.reass.l4_dst_port, + vnet_buffer (b0)->ip.reass.l4_layer_truncated); } if (is_feature) { @@ -541,35 +587,48 @@ ip4_sv_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node, } vnet_buffer (b1)->ip.reass.is_non_first_fragment = 0; vnet_buffer (b1)->ip.reass.ip_proto = ip1->protocol; - if (IP_PROTOCOL_TCP == ip1->protocol) + if (l4_layer_truncated (ip1)) { - vnet_buffer (b1)->ip.reass.icmp_type_or_tcp_flags = - ((tcp_header_t *) (ip1 + 1))->flags; - vnet_buffer (b1)->ip.reass.tcp_ack_number = - ((tcp_header_t *) (ip1 + 1))->ack_number; - vnet_buffer (b1)->ip.reass.tcp_seq_number = - ((tcp_header_t *) (ip1 + 1))->seq_number; + vnet_buffer (b1)->ip.reass.l4_layer_truncated = 1; + vnet_buffer (b1)->ip.reass.l4_src_port = 0; + vnet_buffer (b1)->ip.reass.l4_dst_port = 0; } - else if (IP_PROTOCOL_ICMP == ip1->protocol) + else { - vnet_buffer (b1)->ip.reass.icmp_type_or_tcp_flags = - ((icmp46_header_t *) (ip1 + 1))->type; + vnet_buffer (b1)->ip.reass.l4_layer_truncated = 0; + if (IP_PROTOCOL_TCP == ip1->protocol) + { + vnet_buffer (b1)->ip.reass.icmp_type_or_tcp_flags = + ((tcp_header_t *) (ip1 + 1))->flags; + vnet_buffer (b1)->ip.reass.tcp_ack_number = + ((tcp_header_t *) (ip1 + 1))->ack_number; + vnet_buffer (b1)->ip.reass.tcp_seq_number = + ((tcp_header_t *) (ip1 + 1))->seq_number; + } + else if (IP_PROTOCOL_ICMP == ip1->protocol) + { + vnet_buffer (b1)->ip.reass.icmp_type_or_tcp_flags = + ((icmp46_header_t *) (ip1 + 1))->type; + } + vnet_buffer (b1)->ip.reass.l4_src_port = ip4_get_port (ip1, 1); + vnet_buffer (b1)->ip.reass.l4_dst_port = ip4_get_port (ip1, 0); } - vnet_buffer (b1)->ip.reass.l4_src_port = ip4_get_port (ip1, 1); - vnet_buffer (b1)->ip.reass.l4_dst_port = ip4_get_port (ip1, 0); if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) { - ip4_sv_reass_add_trace (vm, node, NULL, from[(b - 1) - bufs], - REASS_PASSTHROUGH, - vnet_buffer (b1)->ip.reass.ip_proto, - vnet_buffer (b1)->ip.reass.l4_src_port, - vnet_buffer (b1)->ip.reass.l4_dst_port); + ip4_sv_reass_add_trace ( + vm, node, NULL, from[(b - 1) - bufs], REASS_PASSTHROUGH, + vnet_buffer (b1)->ip.reass.ip_proto, + vnet_buffer (b1)->ip.reass.l4_src_port, + vnet_buffer (b1)->ip.reass.l4_dst_port, + vnet_buffer (b1)->ip.reass.l4_layer_truncated); } n_left_from -= 2; next[0] = next0; next[1] = next1; next += 2; + if (with_custom_context) + context += 2; } while (n_left_from > 0) @@ -608,34 +667,45 @@ ip4_sv_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node, } vnet_buffer (b0)->ip.reass.is_non_first_fragment = 0; vnet_buffer (b0)->ip.reass.ip_proto = ip0->protocol; - if (IP_PROTOCOL_TCP == ip0->protocol) + if (l4_layer_truncated (ip0)) { - vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags = - ((tcp_header_t *) (ip0 + 1))->flags; - vnet_buffer (b0)->ip.reass.tcp_ack_number = - ((tcp_header_t *) (ip0 + 1))->ack_number; - vnet_buffer (b0)->ip.reass.tcp_seq_number = - ((tcp_header_t *) (ip0 + 1))->seq_number; + vnet_buffer (b0)->ip.reass.l4_layer_truncated = 1; } - else if (IP_PROTOCOL_ICMP == ip0->protocol) + else { - vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags = - ((icmp46_header_t *) (ip0 + 1))->type; + vnet_buffer (b0)->ip.reass.l4_layer_truncated = 0; + if (IP_PROTOCOL_TCP == ip0->protocol) + { + vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags = + ((tcp_header_t *) (ip0 + 1))->flags; + vnet_buffer (b0)->ip.reass.tcp_ack_number = + ((tcp_header_t *) (ip0 + 1))->ack_number; + vnet_buffer (b0)->ip.reass.tcp_seq_number = + ((tcp_header_t *) (ip0 + 1))->seq_number; + } + else if (IP_PROTOCOL_ICMP == ip0->protocol) + { + vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags = + ((icmp46_header_t *) (ip0 + 1))->type; + } + vnet_buffer (b0)->ip.reass.l4_src_port = ip4_get_port (ip0, 1); + vnet_buffer (b0)->ip.reass.l4_dst_port = ip4_get_port (ip0, 0); } - vnet_buffer (b0)->ip.reass.l4_src_port = ip4_get_port (ip0, 1); - vnet_buffer (b0)->ip.reass.l4_dst_port = ip4_get_port (ip0, 0); if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { - ip4_sv_reass_add_trace (vm, node, NULL, from[(b - 1) - bufs], - REASS_PASSTHROUGH, - vnet_buffer (b0)->ip.reass.ip_proto, - vnet_buffer (b0)->ip.reass.l4_src_port, - vnet_buffer (b0)->ip.reass.l4_dst_port); + ip4_sv_reass_add_trace ( + vm, node, NULL, from[(b - 1) - bufs], REASS_PASSTHROUGH, + vnet_buffer (b0)->ip.reass.ip_proto, + vnet_buffer (b0)->ip.reass.l4_src_port, + vnet_buffer (b0)->ip.reass.l4_dst_port, + vnet_buffer (b0)->ip.reass.l4_layer_truncated); } n_left_from -= 1; next[0] = next0; next += 1; + if (with_custom_context) + context += 1; } vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts, @@ -649,7 +719,11 @@ slow_path: while (n_left_from > 0) { - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + if (with_custom_context) + vlib_get_next_frame_with_aux_safe (vm, node, next_index, to_next, + to_next_aux, n_left_to_next); + else + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); while (n_left_from > 0 && n_left_to_next > 0) { @@ -657,6 +731,7 @@ slow_path: vlib_buffer_t *b0; u32 next0; u32 error0 = IP4_ERROR_NONE; + u8 forward_context = 0; bi0 = from[0]; b0 = vlib_get_buffer (vm, bi0); @@ -679,29 +754,42 @@ slow_path: } vnet_buffer (b0)->ip.reass.is_non_first_fragment = 0; vnet_buffer (b0)->ip.reass.ip_proto = ip0->protocol; - if (IP_PROTOCOL_TCP == ip0->protocol) + if (l4_layer_truncated (ip0)) { - vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags = - ((tcp_header_t *) (ip0 + 1))->flags; - vnet_buffer (b0)->ip.reass.tcp_ack_number = - ((tcp_header_t *) (ip0 + 1))->ack_number; - vnet_buffer (b0)->ip.reass.tcp_seq_number = - ((tcp_header_t *) (ip0 + 1))->seq_number; + vnet_buffer (b0)->ip.reass.l4_layer_truncated = 1; + vnet_buffer (b0)->ip.reass.l4_src_port = 0; + vnet_buffer (b0)->ip.reass.l4_dst_port = 0; } - else if (IP_PROTOCOL_ICMP == ip0->protocol) + else { - vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags = - ((icmp46_header_t *) (ip0 + 1))->type; + vnet_buffer (b0)->ip.reass.l4_layer_truncated = 0; + if (IP_PROTOCOL_TCP == ip0->protocol) + { + vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags = + ((tcp_header_t *) (ip0 + 1))->flags; + vnet_buffer (b0)->ip.reass.tcp_ack_number = + ((tcp_header_t *) (ip0 + 1))->ack_number; + vnet_buffer (b0)->ip.reass.tcp_seq_number = + ((tcp_header_t *) (ip0 + 1))->seq_number; + } + else if (IP_PROTOCOL_ICMP == ip0->protocol) + { + vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags = + ((icmp46_header_t *) (ip0 + 1))->type; + } + vnet_buffer (b0)->ip.reass.l4_src_port = + ip4_get_port (ip0, 1); + vnet_buffer (b0)->ip.reass.l4_dst_port = + ip4_get_port (ip0, 0); } - vnet_buffer (b0)->ip.reass.l4_src_port = ip4_get_port (ip0, 1); - vnet_buffer (b0)->ip.reass.l4_dst_port = ip4_get_port (ip0, 0); if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { ip4_sv_reass_add_trace ( vm, node, NULL, bi0, REASS_PASSTHROUGH, vnet_buffer (b0)->ip.reass.ip_proto, vnet_buffer (b0)->ip.reass.l4_src_port, - vnet_buffer (b0)->ip.reass.l4_dst_port); + vnet_buffer (b0)->ip.reass.l4_dst_port, + vnet_buffer (b0)->ip.reass.l4_layer_truncated); } goto packet_enqueue; } @@ -719,13 +807,17 @@ slow_path: ip4_sv_reass_kv_t kv; u8 do_handoff = 0; - kv.k.as_u64[0] = - (u64) vec_elt (ip4_main.fib_index_by_sw_if_index, - vnet_buffer (b0)->sw_if_index[VLIB_RX]) | - (u64) ip0->src_address.as_u32 << 32; - kv.k.as_u64[1] = - (u64) ip0->dst_address. - as_u32 | (u64) ip0->fragment_id << 32 | (u64) ip0->protocol << 48; + if (with_custom_context) + kv.k.as_u64[0] = (u64) *context | (u64) ip0->src_address.as_u32 + << 32; + else + kv.k.as_u64[0] = + (u64) vec_elt (ip4_main.fib_index_by_sw_if_index, + vnet_buffer (b0)->sw_if_index[VLIB_RX]) | + (u64) ip0->src_address.as_u32 << 32; + kv.k.as_u64[1] = (u64) ip0->dst_address.as_u32 | + (u64) ip0->fragment_id << 32 | + (u64) ip0->protocol << 48; ip4_sv_reass_t *reass = ip4_sv_reass_find_or_create (vm, rm, rt, &kv, &do_handoff); @@ -735,6 +827,8 @@ slow_path: next0 = IP4_SV_REASSEMBLY_NEXT_HANDOFF; vnet_buffer (b0)->ip.reass.owner_thread_index = kv.v.thread_index; + if (with_custom_context) + forward_context = 1; goto packet_enqueue; } @@ -771,31 +865,32 @@ slow_path: { ip4_sv_reass_add_trace ( vm, node, reass, bi0, REASS_FRAGMENT_FORWARD, - reass->ip_proto, reass->l4_src_port, reass->l4_dst_port); + reass->ip_proto, reass->l4_src_port, reass->l4_dst_port, + vnet_buffer (b0)->ip.reass.l4_layer_truncated); } goto packet_enqueue; } ip4_sv_reass_rc_t rc = ip4_sv_reass_update (vm, node, rm, ip0, reass, bi0); + u32 counter = ~0; switch (rc) { case IP4_SV_REASS_RC_OK: /* nothing to do here */ break; case IP4_SV_REASS_RC_TOO_MANY_FRAGMENTS: - vlib_node_increment_counter (vm, node->node_index, - IP4_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG, - 1); - ip4_sv_reass_free (vm, rm, rt, reass); - goto next_packet; + counter = IP4_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG; break; case IP4_SV_REASS_RC_UNSUPP_IP_PROTO: - vlib_node_increment_counter (vm, node->node_index, - IP4_ERROR_REASS_UNSUPP_IP_PROT, 1); + counter = IP4_ERROR_REASS_UNSUPP_IP_PROT; + break; + } + if (~0 != counter) + { + vlib_node_increment_counter (vm, node->node_index, counter, 1); ip4_sv_reass_free (vm, rm, rt, reass); goto next_packet; - break; } if (reass->is_complete) { @@ -843,13 +938,15 @@ slow_path: { ip4_sv_reass_add_trace ( vm, node, reass, bi0, REASS_FRAGMENT_FORWARD, - reass->ip_proto, reass->l4_src_port, reass->l4_dst_port); + reass->ip_proto, reass->l4_src_port, reass->l4_dst_port, + vnet_buffer (b0)->ip.reass.l4_layer_truncated); } vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, n_left_to_next, bi0, next0); } - _vec_len (reass->cached_buffers) = 0; // buffers are owned by frame now + vec_set_len (reass->cached_buffers, + 0); // buffers are owned by frame now } goto next_packet; @@ -862,13 +959,26 @@ slow_path: b0 = vlib_get_buffer (vm, bi0); vnet_feature_next (&next0, b0); } - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi0, next0); + if (with_custom_context && forward_context) + { + if (to_next_aux) + { + to_next_aux[0] = *context; + to_next_aux += 1; + } + vlib_validate_buffer_enqueue_with_aux_x1 ( + vm, node, next_index, to_next, to_next_aux, n_left_to_next, + bi0, *context, next0); + } + else + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); next_packet: from += 1; n_left_from -= 1; + if (with_custom_context) + context += 1; } vlib_put_next_frame (vm, node, next_index, n_left_to_next); @@ -879,28 +989,21 @@ done: return frame->n_vectors; } -static char *ip4_sv_reass_error_strings[] = { -#define _(sym, string) string, - foreach_ip4_error -#undef _ -}; - VLIB_NODE_FN (ip4_sv_reass_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return ip4_sv_reass_inline (vm, node, frame, false /* is_feature */ , - false /* is_output_feature */ , - false /* is_custom */ ); + return ip4_sv_reass_inline ( + vm, node, frame, false /* is_feature */, false /* is_output_feature */, + false /* is_custom */, false /* with_custom_context */); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_sv_reass_node) = { .name = "ip4-sv-reassembly", .vector_size = sizeof (u32), .format_trace = format_ip4_sv_reass_trace, - .n_errors = ARRAY_LEN (ip4_sv_reass_error_strings), - .error_strings = ip4_sv_reass_error_strings, + .n_errors = IP4_N_ERROR, + .error_counters = ip4_error_counters, .n_next_nodes = IP4_SV_REASSEMBLY_N_NEXT, .next_nodes = { @@ -910,24 +1013,22 @@ VLIB_REGISTER_NODE (ip4_sv_reass_node) = { }, }; -/* *INDENT-ON* */ VLIB_NODE_FN (ip4_sv_reass_node_feature) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return ip4_sv_reass_inline (vm, node, frame, true /* is_feature */ , - false /* is_output_feature */ , - false /* is_custom */ ); + return ip4_sv_reass_inline ( + vm, node, frame, true /* is_feature */, false /* is_output_feature */, + false /* is_custom */, false /* with_custom_context */); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_sv_reass_node_feature) = { .name = "ip4-sv-reassembly-feature", .vector_size = sizeof (u32), .format_trace = format_ip4_sv_reass_trace, - .n_errors = ARRAY_LEN (ip4_sv_reass_error_strings), - .error_strings = ip4_sv_reass_error_strings, + .n_errors = IP4_N_ERROR, + .error_counters = ip4_error_counters, .n_next_nodes = IP4_SV_REASSEMBLY_N_NEXT, .next_nodes = { @@ -936,34 +1037,30 @@ VLIB_REGISTER_NODE (ip4_sv_reass_node_feature) = { [IP4_SV_REASSEMBLY_NEXT_HANDOFF] = "ip4-sv-reass-feature-hoff", }, }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VNET_FEATURE_INIT (ip4_sv_reass_feature) = { .arc_name = "ip4-unicast", .node_name = "ip4-sv-reassembly-feature", .runs_before = VNET_FEATURES ("ip4-lookup"), .runs_after = 0, }; -/* *INDENT-ON* */ VLIB_NODE_FN (ip4_sv_reass_node_output_feature) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return ip4_sv_reass_inline (vm, node, frame, true /* is_feature */ , - true /* is_output_feature */ , - false /* is_custom */ ); + return ip4_sv_reass_inline ( + vm, node, frame, true /* is_feature */, true /* is_output_feature */, + false /* is_custom */, false /* with_custom_context */); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_sv_reass_node_output_feature) = { .name = "ip4-sv-reassembly-output-feature", .vector_size = sizeof (u32), .format_trace = format_ip4_sv_reass_trace, - .n_errors = ARRAY_LEN (ip4_sv_reass_error_strings), - .error_strings = ip4_sv_reass_error_strings, + .n_errors = IP4_N_ERROR, + .error_counters = ip4_error_counters, .n_next_nodes = IP4_SV_REASSEMBLY_N_NEXT, .next_nodes = { @@ -972,24 +1069,20 @@ VLIB_REGISTER_NODE (ip4_sv_reass_node_output_feature) = { [IP4_SV_REASSEMBLY_NEXT_HANDOFF] = "ip4-sv-reass-feature-hoff", }, }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VNET_FEATURE_INIT (ip4_sv_reass_output_feature) = { .arc_name = "ip4-output", .node_name = "ip4-sv-reassembly-output-feature", .runs_before = 0, .runs_after = 0, }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_sv_reass_custom_node) = { .name = "ip4-sv-reassembly-custom-next", .vector_size = sizeof (u32), .format_trace = format_ip4_sv_reass_trace, - .n_errors = ARRAY_LEN (ip4_sv_reass_error_strings), - .error_strings = ip4_sv_reass_error_strings, + .n_errors = IP4_N_ERROR, + .error_counters = ip4_error_counters, .n_next_nodes = IP4_SV_REASSEMBLY_N_NEXT, .next_nodes = { @@ -999,15 +1092,39 @@ VLIB_REGISTER_NODE (ip4_sv_reass_custom_node) = { }, }; -/* *INDENT-ON* */ VLIB_NODE_FN (ip4_sv_reass_custom_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return ip4_sv_reass_inline (vm, node, frame, false /* is_feature */ , - false /* is_output_feature */ , - true /* is_custom */ ); + return ip4_sv_reass_inline ( + vm, node, frame, false /* is_feature */, false /* is_output_feature */, + true /* is_custom */, false /* with_custom_context */); +} + +VLIB_REGISTER_NODE (ip4_sv_reass_custom_context_node) = { + .name = "ip4-sv-reassembly-custom-context", + .vector_size = sizeof (u32), + .aux_size = sizeof(u32), + .format_trace = format_ip4_sv_reass_trace, + .n_errors = IP4_N_ERROR, + .error_counters = ip4_error_counters, + .n_next_nodes = IP4_SV_REASSEMBLY_N_NEXT, + .next_nodes = + { + [IP4_SV_REASSEMBLY_NEXT_INPUT] = "ip4-input", + [IP4_SV_REASSEMBLY_NEXT_DROP] = "ip4-drop", + [IP4_SV_REASSEMBLY_NEXT_HANDOFF] = "ip4-sv-reassembly-custom-context-handoff", + + }, +}; + +VLIB_NODE_FN (ip4_sv_reass_custom_context_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return ip4_sv_reass_inline ( + vm, node, frame, false /* is_feature */, false /* is_output_feature */, + true /* is_custom */, true /* with_custom_context */); } #ifndef CLIB_MARCH_VARIANT @@ -1152,6 +1269,8 @@ ip4_sv_reass_init_function (vlib_main_t * vm) rm->fq_index = vlib_frame_queue_main_init (ip4_sv_reass_node.index, 0); rm->fq_feature_index = vlib_frame_queue_main_init (ip4_sv_reass_node_feature.index, 0); + rm->fq_custom_context_index = + vlib_frame_queue_main_init (ip4_sv_reass_custom_context_node.index, 0); rm->feature_use_refcount_per_intf = NULL; rm->output_feature_use_refcount_per_intf = NULL; @@ -1204,7 +1323,6 @@ ip4_sv_reass_walk_expired (vlib_main_t *vm, clib_spinlock_lock (&rt->lock); vec_reset_length (pool_indexes_to_free); - /* *INDENT-OFF* */ pool_foreach_index (index, rt->pool) { reass = pool_elt_at_index (rt->pool, index); if (now > reass->last_heard + rm->timeout) @@ -1212,15 +1330,12 @@ ip4_sv_reass_walk_expired (vlib_main_t *vm, vec_add1 (pool_indexes_to_free, index); } } - /* *INDENT-ON* */ int *i; - /* *INDENT-OFF* */ vec_foreach (i, pool_indexes_to_free) { ip4_sv_reass_t *reass = pool_elt_at_index (rt->pool, i[0]); ip4_sv_reass_free (vm, rm, rt, reass); } - /* *INDENT-ON* */ clib_spinlock_unlock (&rt->lock); } @@ -1228,33 +1343,29 @@ ip4_sv_reass_walk_expired (vlib_main_t *vm, vec_free (pool_indexes_to_free); if (event_data) { - _vec_len (event_data) = 0; + vec_set_len (event_data, 0); } } return 0; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_sv_reass_expire_node) = { - .function = ip4_sv_reass_walk_expired, - .type = VLIB_NODE_TYPE_PROCESS, - .name = "ip4-sv-reassembly-expire-walk", - .format_trace = format_ip4_sv_reass_trace, - .n_errors = ARRAY_LEN (ip4_sv_reass_error_strings), - .error_strings = ip4_sv_reass_error_strings, - + .function = ip4_sv_reass_walk_expired, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "ip4-sv-reassembly-expire-walk", + .format_trace = format_ip4_sv_reass_trace, + .n_errors = IP4_N_ERROR, + .error_counters = ip4_error_counters, }; -/* *INDENT-ON* */ static u8 * format_ip4_sv_reass_key (u8 * s, va_list * args) { ip4_sv_reass_key_t *key = va_arg (*args, ip4_sv_reass_key_t *); s = - format (s, - "xx_id: %u, src: %U, dst: %U, frag_id: %u, proto: %u", - key->xx_id, format_ip4_address, &key->src, format_ip4_address, + format (s, "fib_index: %u, src: %U, dst: %U, frag_id: %u, proto: %u", + key->fib_index, format_ip4_address, &key->src, format_ip4_address, &key->dst, clib_net_to_host_u16 (key->frag_id), key->proto); return s; } @@ -1313,11 +1424,9 @@ show_ip4_reass (vlib_main_t * vm, clib_spinlock_lock (&rt->lock); if (details) { - /* *INDENT-OFF* */ pool_foreach (reass, rt->pool) { vlib_cli_output (vm, "%U", format_ip4_sv_reass, vm, reass); } - /* *INDENT-ON* */ } sum_reass_n += rt->reass_n; clib_spinlock_unlock (&rt->lock); @@ -1341,13 +1450,11 @@ show_ip4_reass (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_ip4_sv_reass_cmd, static) = { .path = "show ip4-sv-reassembly", .short_help = "show ip4-sv-reassembly [details]", .function = show_ip4_reass, }; -/* *INDENT-ON* */ #ifndef CLIB_MARCH_VARIANT vnet_api_error_t @@ -1398,25 +1505,30 @@ format_ip4_sv_reass_handoff_trace (u8 * s, va_list * args) } always_inline uword -ip4_sv_reass_handoff_node_inline (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame, bool is_feature) +ip4_sv_reass_handoff_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame, bool is_feature, + bool is_custom_context) { ip4_sv_reass_main_t *rm = &ip4_sv_reass_main; vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b; - u32 n_enq, n_left_from, *from; + u32 n_enq, n_left_from, *from, *context; u16 thread_indices[VLIB_FRAME_SIZE], *ti; u32 fq_index; from = vlib_frame_vector_args (frame); + if (is_custom_context) + context = vlib_frame_aux_args (frame); + n_left_from = frame->n_vectors; vlib_get_buffers (vm, from, bufs, n_left_from); b = bufs; ti = thread_indices; - fq_index = (is_feature) ? rm->fq_feature_index : rm->fq_index; + fq_index = (is_feature) ? rm->fq_feature_index : + (is_custom_context ? rm->fq_custom_context_index : + rm->fq_index); while (n_left_from > 0) { @@ -1435,8 +1547,12 @@ ip4_sv_reass_handoff_node_inline (vlib_main_t * vm, ti += 1; b += 1; } - n_enq = vlib_buffer_enqueue_to_thread (vm, node, fq_index, from, - thread_indices, frame->n_vectors, 1); + if (is_custom_context) + n_enq = vlib_buffer_enqueue_to_thread_with_aux ( + vm, node, fq_index, from, context, thread_indices, frame->n_vectors, 1); + else + n_enq = vlib_buffer_enqueue_to_thread ( + vm, node, fq_index, from, thread_indices, frame->n_vectors, 1); if (n_enq < frame->n_vectors) vlib_node_increment_counter (vm, node->node_index, @@ -1449,12 +1565,11 @@ VLIB_NODE_FN (ip4_sv_reass_handoff_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return ip4_sv_reass_handoff_node_inline (vm, node, frame, - false /* is_feature */ ); + return ip4_sv_reass_handoff_node_inline ( + vm, node, frame, false /* is_feature */, false /* is_custom_context */); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_sv_reass_handoff_node) = { .name = "ip4-sv-reassembly-handoff", .vector_size = sizeof (u32), @@ -1468,22 +1583,39 @@ VLIB_REGISTER_NODE (ip4_sv_reass_handoff_node) = { [0] = "error-drop", }, }; -/* *INDENT-ON* */ +VLIB_NODE_FN (ip4_sv_reass_custom_context_handoff_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return ip4_sv_reass_handoff_node_inline ( + vm, node, frame, false /* is_feature */, true /* is_custom_context */); +} + +VLIB_REGISTER_NODE (ip4_sv_reass_custom_context_handoff_node) = { + .name = "ip4-sv-reassembly-custom-context-handoff", + .vector_size = sizeof (u32), + .aux_size = sizeof (u32), + .n_errors = ARRAY_LEN(ip4_sv_reass_handoff_error_strings), + .error_strings = ip4_sv_reass_handoff_error_strings, + .format_trace = format_ip4_sv_reass_handoff_trace, + + .n_next_nodes = 1, + + .next_nodes = { + [0] = "error-drop", + }, +}; -/* *INDENT-OFF* */ VLIB_NODE_FN (ip4_sv_reass_feature_handoff_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return ip4_sv_reass_handoff_node_inline (vm, node, frame, - true /* is_feature */ ); + return ip4_sv_reass_handoff_node_inline ( + vm, node, frame, true /* is_feature */, false /* is_custom_context */); } -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_sv_reass_feature_handoff_node) = { .name = "ip4-sv-reass-feature-hoff", .vector_size = sizeof (u32), @@ -1497,7 +1629,6 @@ VLIB_REGISTER_NODE (ip4_sv_reass_feature_handoff_node) = { [0] = "error-drop", }, }; -/* *INDENT-ON* */ #ifndef CLIB_MARCH_VARIANT int @@ -1535,6 +1666,13 @@ ip4_sv_reass_custom_register_next_node (uword node_index) node_index); } +uword +ip4_sv_reass_custom_context_register_next_node (uword node_index) +{ + return vlib_node_add_next ( + vlib_get_main (), ip4_sv_reass_custom_context_node.index, node_index); +} + int ip4_sv_reass_output_enable_disable_with_refcnt (u32 sw_if_index, int is_enable) diff --git a/src/vnet/ip/reass/ip4_sv_reass.h b/src/vnet/ip/reass/ip4_sv_reass.h index e926dbeebcc..3a684eb9809 100644 --- a/src/vnet/ip/reass/ip4_sv_reass.h +++ b/src/vnet/ip/reass/ip4_sv_reass.h @@ -49,6 +49,7 @@ int ip4_sv_reass_output_enable_disable_with_refcnt (u32 sw_if_index, int is_enable); uword ip4_sv_reass_custom_register_next_node (uword node_index); +uword ip4_sv_reass_custom_context_register_next_node (uword node_index); #endif /* __included_ip4_sv_reass_h__ */ diff --git a/src/vnet/ip/reass/ip6_full_reass.c b/src/vnet/ip/reass/ip6_full_reass.c index 9ec40cd347c..27647985877 100644 --- a/src/vnet/ip/reass/ip6_full_reass.c +++ b/src/vnet/ip/reass/ip6_full_reass.c @@ -25,10 +25,14 @@ #include <vnet/ip/ip.h> #include <vppinfra/bihash_48_8.h> #include <vnet/ip/reass/ip6_full_reass.h> +#include <vnet/ip/ip6_inlines.h> #define MSEC_PER_SEC 1000 -#define IP6_FULL_REASS_TIMEOUT_DEFAULT_MS 100 -#define IP6_FULL_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS 10000 // 10 seconds default +#define IP6_FULL_REASS_TIMEOUT_DEFAULT_MS 200 +/* As there are only 1024 reass context per thread, either the DDOS attacks + * or fractions of real timeouts, would consume these contexts quickly and + * running out context space and unable to perform reassembly */ +#define IP6_FULL_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS 50 // 50 ms default #define IP6_FULL_REASS_MAX_REASSEMBLIES_DEFAULT 1024 #define IP6_FULL_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT 3 #define IP6_FULL_REASS_HT_LOAD_FACTOR (0.75) @@ -40,6 +44,8 @@ typedef enum IP6_FULL_REASS_RC_TOO_MANY_FRAGMENTS, IP6_FULL_REASS_RC_NO_BUF, IP6_FULL_REASS_RC_HANDOFF, + IP6_FULL_REASS_RC_INVALID_FRAG_LEN, + IP6_FULL_REASS_RC_OVERLAP, } ip6_full_reass_rc_t; typedef struct @@ -132,6 +138,8 @@ typedef struct ip6_full_reass_t *pool; u32 reass_n; u32 id_counter; + // for pacing the main thread timeouts + u32 last_id; clib_spinlock_t lock; } ip6_full_reass_per_thread_t; @@ -155,17 +163,20 @@ typedef struct // convenience vlib_main_t *vlib_main; - // node index of ip6-drop node - u32 ip6_drop_idx; u32 ip6_icmp_error_idx; u32 ip6_full_reass_expire_node_idx; /** Worker handoff */ u32 fq_index; + u32 fq_local_index; u32 fq_feature_index; + u32 fq_custom_index; // reference count for enabling/disabling feature - per interface u32 *feature_use_refcount_per_intf; + + // whether local fragmented packets are reassembled or not + int is_local_reass_enabled; } ip6_full_reass_main_t; extern ip6_full_reass_main_t ip6_full_reass_main; @@ -185,13 +196,22 @@ typedef enum typedef enum { + NORMAL, + FEATURE, + CUSTOM +} ip6_full_reass_node_type_t; + +typedef enum +{ RANGE_NEW, + RANGE_DISCARD, RANGE_OVERLAP, ICMP_ERROR_RT_EXCEEDED, ICMP_ERROR_FL_TOO_BIG, ICMP_ERROR_FL_NOT_MULT_8, FINALIZE, HANDOFF, + PASSTHROUGH, } ip6_full_reass_trace_operation_e; typedef struct @@ -278,6 +298,10 @@ format_ip6_full_reass_trace (u8 * s, va_list * args) s = format (s, "\n%Unew %U", format_white_space, indent, format_ip6_full_reass_range_trace, &t->trace_range); break; + case RANGE_DISCARD: + s = format (s, "\n%Udiscard %U", format_white_space, indent, + format_ip6_full_reass_range_trace, &t->trace_range); + break; case RANGE_OVERLAP: s = format (s, "\n%Uoverlap %U", format_white_space, indent, format_ip6_full_reass_range_trace, &t->trace_range); @@ -304,6 +328,9 @@ format_ip6_full_reass_trace (u8 * s, va_list * args) format (s, "handoff from thread #%u to thread #%u", t->thread_id, t->thread_id_to); break; + case PASSTHROUGH: + s = format (s, "passthrough - not a fragment"); + break; } return s; } @@ -396,59 +423,69 @@ ip6_full_reass_free (ip6_full_reass_main_t * rm, ip6_full_reass_free_ctx (rt, reass); } +/* n_left_to_next, and to_next are taken as input params, as this function + * could be called from a graphnode, where its managing local copy of these + * variables, and ignoring those and still trying to enqueue the buffers + * with local variables would cause either buffer leak or corruption */ always_inline void ip6_full_reass_drop_all (vlib_main_t *vm, vlib_node_runtime_t *node, - ip6_full_reass_t *reass) + ip6_full_reass_t *reass, u32 *n_left_to_next, + u32 **to_next) { u32 range_bi = reass->first_bi; vlib_buffer_t *range_b; vnet_buffer_opaque_t *range_vnb; u32 *to_free = NULL; + while (~0 != range_bi) { range_b = vlib_get_buffer (vm, range_bi); range_vnb = vnet_buffer (range_b); - u32 bi = range_bi; - while (~0 != bi) + + if (~0 != range_bi) { - vec_add1 (to_free, bi); - vlib_buffer_t *b = vlib_get_buffer (vm, bi); - if (b->flags & VLIB_BUFFER_NEXT_PRESENT) - { - bi = b->next_buffer; - b->flags &= ~VLIB_BUFFER_NEXT_PRESENT; - } - else - { - bi = ~0; - } + vec_add1 (to_free, range_bi); } range_bi = range_vnb->ip.reass.next_range_bi; } + /* send to next_error_index */ - if (~0 != reass->error_next_index) + if (~0 != reass->error_next_index && + reass->error_next_index < node->n_next_nodes) { - u32 n_left_to_next, *to_next, next_index; + u32 next_index; next_index = reass->error_next_index; u32 bi = ~0; + /* record number of packets sent to custom app */ + vlib_node_increment_counter (vm, node->node_index, + IP6_ERROR_REASS_TO_CUSTOM_APP, + vec_len (to_free)); + while (vec_len (to_free) > 0) { - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + vlib_get_next_frame (vm, node, next_index, *to_next, + (*n_left_to_next)); - while (vec_len (to_free) > 0 && n_left_to_next > 0) + while (vec_len (to_free) > 0 && (*n_left_to_next) > 0) { bi = vec_pop (to_free); if (~0 != bi) { - to_next[0] = bi; - to_next += 1; - n_left_to_next -= 1; + vlib_buffer_t *b = vlib_get_buffer (vm, bi); + if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED)) + { + ip6_full_reass_add_trace (vm, node, reass, bi, NULL, + RANGE_DISCARD, ~0); + } + *to_next[0] = bi; + (*to_next) += 1; + (*n_left_to_next) -= 1; } } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); + vlib_put_next_frame (vm, node, next_index, (*n_left_to_next)); } } else @@ -459,8 +496,65 @@ ip6_full_reass_drop_all (vlib_main_t *vm, vlib_node_runtime_t *node, } always_inline void -ip6_full_reass_on_timeout (vlib_main_t * vm, vlib_node_runtime_t * node, - ip6_full_reass_t * reass, u32 * icmp_bi) +sanitize_reass_buffers_add_missing (vlib_main_t *vm, ip6_full_reass_t *reass, + u32 *bi0) +{ + u32 range_bi = reass->first_bi; + vlib_buffer_t *range_b; + vnet_buffer_opaque_t *range_vnb; + + while (~0 != range_bi) + { + range_b = vlib_get_buffer (vm, range_bi); + range_vnb = vnet_buffer (range_b); + u32 bi = range_bi; + if (~0 != bi) + { + if (bi == *bi0) + *bi0 = ~0; + if (range_b->flags & VLIB_BUFFER_NEXT_PRESENT) + { + u32 _bi = bi; + vlib_buffer_t *_b = vlib_get_buffer (vm, _bi); + while (_b->flags & VLIB_BUFFER_NEXT_PRESENT) + { + if (_b->next_buffer != range_vnb->ip.reass.next_range_bi) + { + _bi = _b->next_buffer; + _b = vlib_get_buffer (vm, _bi); + } + else + { + _b->flags &= ~VLIB_BUFFER_NEXT_PRESENT; + break; + } + } + } + range_bi = range_vnb->ip.reass.next_range_bi; + } + } + if (*bi0 != ~0) + { + vlib_buffer_t *fb = vlib_get_buffer (vm, *bi0); + vnet_buffer_opaque_t *fvnb = vnet_buffer (fb); + if (~0 != reass->first_bi) + { + fvnb->ip.reass.next_range_bi = reass->first_bi; + reass->first_bi = *bi0; + } + else + { + reass->first_bi = *bi0; + fvnb->ip.reass.next_range_bi = ~0; + } + *bi0 = ~0; + } +} + +always_inline void +ip6_full_reass_on_timeout (vlib_main_t *vm, vlib_node_runtime_t *node, + ip6_full_reass_t *reass, u32 *icmp_bi, + u32 *n_left_to_next, u32 **to_next) { if (~0 == reass->first_bi) { @@ -493,15 +587,16 @@ ip6_full_reass_on_timeout (vlib_main_t * vm, vlib_node_runtime_t * node, 0); } } - ip6_full_reass_drop_all (vm, node, reass); + ip6_full_reass_drop_all (vm, node, reass, n_left_to_next, to_next); } always_inline ip6_full_reass_t * -ip6_full_reass_find_or_create (vlib_main_t * vm, vlib_node_runtime_t * node, - ip6_full_reass_main_t * rm, - ip6_full_reass_per_thread_t * rt, - ip6_full_reass_kv_t * kv, u32 * icmp_bi, - u8 * do_handoff) +ip6_full_reass_find_or_create (vlib_main_t *vm, vlib_node_runtime_t *node, + ip6_full_reass_main_t *rm, + ip6_full_reass_per_thread_t *rt, + ip6_full_reass_kv_t *kv, u32 *icmp_bi, + u8 *do_handoff, int skip_bihash, + u32 *n_left_to_next, u32 **to_next) { ip6_full_reass_t *reass; f64 now; @@ -511,7 +606,7 @@ again: reass = NULL; now = vlib_time_now (vm); - if (!clib_bihash_search_48_8 (&rm->hash, &kv->kv, &kv->kv)) + if (!skip_bihash && !clib_bihash_search_48_8 (&rm->hash, &kv->kv, &kv->kv)) { if (vm->thread_index != kv->v.memory_owner_thread_index) { @@ -526,7 +621,10 @@ again: if (now > reass->last_heard + rm->timeout) { - ip6_full_reass_on_timeout (vm, node, reass, icmp_bi); + vlib_node_increment_counter (vm, node->node_index, + IP6_ERROR_REASS_TIMEOUT, 1); + ip6_full_reass_on_timeout (vm, node, reass, icmp_bi, n_left_to_next, + to_next); ip6_full_reass_free (rm, rt, reass); reass = NULL; } @@ -554,27 +652,41 @@ again: reass->data_len = 0; reass->next_index = ~0; reass->error_next_index = ~0; + reass->memory_owner_thread_index = vm->thread_index; ++rt->reass_n; } - reass->key.as_u64[0] = kv->kv.key[0]; - reass->key.as_u64[1] = kv->kv.key[1]; - reass->key.as_u64[2] = kv->kv.key[2]; - reass->key.as_u64[3] = kv->kv.key[3]; - reass->key.as_u64[4] = kv->kv.key[4]; - reass->key.as_u64[5] = kv->kv.key[5]; kv->v.reass_index = (reass - rt->pool); kv->v.memory_owner_thread_index = vm->thread_index; reass->last_heard = now; - int rv = clib_bihash_add_del_48_8 (&rm->hash, &kv->kv, 2); - if (rv) + if (!skip_bihash) { - ip6_full_reass_free (rm, rt, reass); - reass = NULL; - // if other worker created a context already work with the other copy - if (-2 == rv) - goto again; + reass->key.as_u64[0] = kv->kv.key[0]; + reass->key.as_u64[1] = kv->kv.key[1]; + reass->key.as_u64[2] = kv->kv.key[2]; + reass->key.as_u64[3] = kv->kv.key[3]; + reass->key.as_u64[4] = kv->kv.key[4]; + reass->key.as_u64[5] = kv->kv.key[5]; + + int rv = clib_bihash_add_del_48_8 (&rm->hash, &kv->kv, 2); + if (rv) + { + ip6_full_reass_free (rm, rt, reass); + reass = NULL; + // if other worker created a context already work with the other copy + if (-2 == rv) + goto again; + } + } + else + { + reass->key.as_u64[0] = ~0; + reass->key.as_u64[1] = ~0; + reass->key.as_u64[2] = ~0; + reass->key.as_u64[3] = ~0; + reass->key.as_u64[4] = ~0; + reass->key.as_u64[5] = ~0; } return reass; @@ -593,8 +705,6 @@ ip6_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_buffer_t *last_b = NULL; u32 sub_chain_bi = reass->first_bi; u32 total_length = 0; - u32 buf_cnt = 0; - u32 dropped_cnt = 0; u32 *vec_drop_compress = NULL; ip6_full_reass_rc_t rv = IP6_FULL_REASS_RC_OK; do @@ -636,19 +746,18 @@ ip6_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_buffer_length_in_chain (vm, tmp) - trim_front - trim_end; while (1) { - ++buf_cnt; if (trim_front) { if (trim_front > tmp->current_length) { /* drop whole buffer */ - vec_add1 (vec_drop_compress, tmp_bi); - trim_front -= tmp->current_length; if (!(tmp->flags & VLIB_BUFFER_NEXT_PRESENT)) { rv = IP6_FULL_REASS_RC_INTERNAL_ERROR; goto free_buffers_and_return; } + trim_front -= tmp->current_length; + vec_add1 (vec_drop_compress, tmp_bi); tmp->flags &= ~VLIB_BUFFER_NEXT_PRESENT; tmp_bi = tmp->next_buffer; tmp = vlib_get_buffer (vm, tmp_bi); @@ -686,13 +795,12 @@ ip6_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node, } else { - vec_add1 (vec_drop_compress, tmp_bi); if (reass->first_bi == tmp_bi) { rv = IP6_FULL_REASS_RC_INTERNAL_ERROR; goto free_buffers_and_return; } - ++dropped_cnt; + vec_add1 (vec_drop_compress, tmp_bi); } if (tmp->flags & VLIB_BUFFER_NEXT_PRESENT) { @@ -729,19 +837,27 @@ ip6_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node, vnet_buffer_opaque_t *first_b_vnb = vnet_buffer (first_b); ip6_header_t *ip = vlib_buffer_get_current (first_b); u16 ip6_frag_hdr_offset = first_b_vnb->ip.reass.ip6_frag_hdr_offset; - ip6_ext_header_t *prev_hdr; - frag_hdr = - ip6_ext_header_find (vm, first_b, ip, IP_PROTOCOL_IPV6_FRAGMENTATION, - &prev_hdr); - if (prev_hdr) + ip6_ext_hdr_chain_t hdr_chain; + ip6_ext_header_t *prev_hdr = 0; + int res = ip6_ext_header_walk (first_b, ip, IP_PROTOCOL_IPV6_FRAGMENTATION, + &hdr_chain); + if (res < 0 || + (hdr_chain.eh[res].protocol != IP_PROTOCOL_IPV6_FRAGMENTATION)) { + rv = IP6_FULL_REASS_RC_INTERNAL_ERROR; + goto free_buffers_and_return; + } + frag_hdr = ip6_ext_next_header_offset (ip, hdr_chain.eh[res].offset); + if (res > 0) + { + prev_hdr = ip6_ext_next_header_offset (ip, hdr_chain.eh[res - 1].offset); prev_hdr->next_hdr = frag_hdr->next_hdr; } else { ip->protocol = frag_hdr->next_hdr; } - if (!((u8 *) frag_hdr - (u8 *) ip == ip6_frag_hdr_offset)) + if (hdr_chain.eh[res].offset != ip6_frag_hdr_offset) { rv = IP6_FULL_REASS_RC_INTERNAL_ERROR; goto free_buffers_and_return; @@ -799,6 +915,15 @@ ip6_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node, *next0 = reass->next_index; } vnet_buffer (first_b)->ip.reass.estimated_mtu = reass->min_fragment_length; + /* Keep track of number of successfully reassembled packets and number of + * fragments reassembled */ + vlib_node_increment_counter (vm, node->node_index, IP6_ERROR_REASS_SUCCESS, + 1); + + vlib_node_increment_counter (vm, node->node_index, + IP6_ERROR_REASS_FRAGMENTS_REASSEMBLED, + reass->fragments_n); + ip6_full_reass_free (rm, rt, reass); reass = NULL; free_buffers_and_return: @@ -834,12 +959,13 @@ ip6_full_reass_insert_range_in_chain (vlib_main_t * vm, } always_inline ip6_full_reass_rc_t -ip6_full_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node, - ip6_full_reass_main_t * rm, - ip6_full_reass_per_thread_t * rt, - ip6_full_reass_t * reass, u32 * bi0, u32 * next0, - u32 * error0, ip6_frag_hdr_t * frag_hdr, - bool is_custom_app, u32 * handoff_thread_idx) +ip6_full_reass_update (vlib_main_t *vm, vlib_node_runtime_t *node, + ip6_full_reass_main_t *rm, + ip6_full_reass_per_thread_t *rt, + ip6_full_reass_t *reass, u32 *bi0, u32 *next0, + u32 *error0, ip6_frag_hdr_t *frag_hdr, + bool is_custom_app, u32 *handoff_thread_idx, + int skip_bihash) { int consumed = 0; vlib_buffer_t *fb = vlib_get_buffer (vm, *bi0); @@ -865,6 +991,10 @@ ip6_full_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node, u32 fragment_length = vlib_buffer_length_in_chain (vm, fb) - (fvnb->ip.reass.ip6_frag_hdr_offset + sizeof (*frag_hdr)); + if (0 == fragment_length) + { + return IP6_FULL_REASS_RC_INVALID_FRAG_LEN; + } u32 fragment_last = fvnb->ip.reass.fragment_last = fragment_first + fragment_length - 1; int more_fragments = ip6_frag_hdr_more (frag_hdr); @@ -929,11 +1059,7 @@ ip6_full_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node, ip6_full_reass_add_trace (vm, node, reass, *bi0, frag_hdr, RANGE_OVERLAP, ~0); } - ip6_full_reass_drop_all (vm, node, reass); - ip6_full_reass_free (rm, rt, reass); - *next0 = IP6_FULL_REASSEMBLY_NEXT_DROP; - *error0 = IP6_ERROR_REASS_OVERLAPPING_FRAGMENT; - return IP6_FULL_REASS_RC_OK; + return IP6_FULL_REASS_RC_OVERLAP; } break; } @@ -947,6 +1073,12 @@ check_if_done_maybe: ~0); } } + else if (skip_bihash) + { + // if this reassembly is not in bihash, then the packet must have been + // consumed + return IP6_FULL_REASS_RC_INTERNAL_ERROR; + } if (~0 != reass->last_packet_octet && reass->data_len == reass->last_packet_octet + 1) { @@ -964,6 +1096,12 @@ check_if_done_maybe: } else { + if (skip_bihash) + { + // if this reassembly is not in bihash, it should've been an atomic + // fragment and thus finalized + return IP6_FULL_REASS_RC_INTERNAL_ERROR; + } if (consumed) { *bi0 = ~0; @@ -982,31 +1120,28 @@ check_if_done_maybe: } always_inline bool -ip6_full_reass_verify_upper_layer_present (vlib_node_runtime_t * node, - vlib_buffer_t * b, - ip6_frag_hdr_t * frag_hdr) +ip6_full_reass_verify_upper_layer_present (vlib_node_runtime_t *node, + vlib_buffer_t *b, + ip6_ext_hdr_chain_t *hc) { - ip6_ext_header_t *tmp = (ip6_ext_header_t *) frag_hdr; - while (ip6_ext_hdr (tmp->next_hdr)) - { - tmp = ip6_ext_next_header (tmp); - } - if (IP_PROTOCOL_IP6_NONXT == tmp->next_hdr) + int nh = hc->eh[hc->length - 1].protocol; + /* Checking to see if it's a terminating header */ + if (ip6_ext_hdr (nh)) { - icmp6_error_set_vnet_buffer (b, ICMP6_parameter_problem, - ICMP6_parameter_problem_first_fragment_has_incomplete_header_chain, - 0); + icmp6_error_set_vnet_buffer ( + b, ICMP6_parameter_problem, + ICMP6_parameter_problem_first_fragment_has_incomplete_header_chain, 0); b->error = node->errors[IP6_ERROR_REASS_MISSING_UPPER]; - return false; } return true; } always_inline bool -ip6_full_reass_verify_fragment_multiple_8 (vlib_main_t * vm, - vlib_buffer_t * b, - ip6_frag_hdr_t * frag_hdr) +ip6_full_reass_verify_fragment_multiple_8 (vlib_main_t *vm, + vlib_node_runtime_t *node, + vlib_buffer_t *b, + ip6_frag_hdr_t *frag_hdr) { vnet_buffer_opaque_t *vnb = vnet_buffer (b); ip6_header_t *ip = vlib_buffer_get_current (b); @@ -1019,15 +1154,17 @@ ip6_full_reass_verify_fragment_multiple_8 (vlib_main_t * vm, icmp6_error_set_vnet_buffer (b, ICMP6_parameter_problem, ICMP6_parameter_problem_erroneous_header_field, (u8 *) & ip->payload_length - (u8 *) ip); + b->error = node->errors[IP6_ERROR_REASS_INVALID_FRAG_SIZE]; return false; } return true; } always_inline bool -ip6_full_reass_verify_packet_size_lt_64k (vlib_main_t * vm, - vlib_buffer_t * b, - ip6_frag_hdr_t * frag_hdr) +ip6_full_reass_verify_packet_size_lt_64k (vlib_main_t *vm, + vlib_node_runtime_t *node, + vlib_buffer_t *b, + ip6_frag_hdr_t *frag_hdr) { vnet_buffer_opaque_t *vnb = vnet_buffer (b); u32 fragment_first = ip6_frag_hdr_offset_bytes (frag_hdr); @@ -1041,16 +1178,16 @@ ip6_full_reass_verify_packet_size_lt_64k (vlib_main_t * vm, ICMP6_parameter_problem_erroneous_header_field, (u8 *) & frag_hdr->fragment_offset_and_more - (u8 *) ip0); + b->error = node->errors[IP6_ERROR_REASS_INVALID_FRAG_SIZE]; return false; } return true; } always_inline uword -ip6_full_reassembly_inline (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame, bool is_feature, - bool is_custom_app) +ip6_full_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame, bool is_feature, + bool is_custom_app, bool is_local) { u32 *from = vlib_frame_vector_args (frame); u32 n_left_from, n_left_to_next, *to_next, next_index; @@ -1077,55 +1214,95 @@ ip6_full_reassembly_inline (vlib_main_t * vm, ip6_header_t *ip0 = vlib_buffer_get_current (b0); ip6_frag_hdr_t *frag_hdr = NULL; - ip6_ext_header_t *prev_hdr; - if (ip6_ext_hdr (ip0->protocol)) + ip6_ext_hdr_chain_t hdr_chain; + vnet_buffer_opaque_t *fvnb = vnet_buffer (b0); + + int res = ip6_ext_header_walk ( + b0, ip0, IP_PROTOCOL_IPV6_FRAGMENTATION, &hdr_chain); + if (res < 0 || + hdr_chain.eh[res].protocol != IP_PROTOCOL_IPV6_FRAGMENTATION) { - frag_hdr = - ip6_ext_header_find (vm, b0, ip0, - IP_PROTOCOL_IPV6_FRAGMENTATION, - &prev_hdr); + vlib_node_increment_counter (vm, node->node_index, + IP6_ERROR_REASS_NO_FRAG_HDR, 1); + // this is a mangled packet - no fragmentation + next0 = is_custom_app ? fvnb->ip.reass.error_next_index : + IP6_FULL_REASSEMBLY_NEXT_DROP; + ip6_full_reass_add_trace (vm, node, NULL, bi0, NULL, PASSTHROUGH, + ~0); + goto skip_reass; } - if (!frag_hdr) + if (is_local && !rm->is_local_reass_enabled) { - // this is a regular packet - no fragmentation - next0 = IP6_FULL_REASSEMBLY_NEXT_INPUT; + next0 = IP6_FULL_REASSEMBLY_NEXT_DROP; goto skip_reass; } + + /* Keep track of received fragments */ + vlib_node_increment_counter (vm, node->node_index, + IP6_ERROR_REASS_FRAGMENTS_RCVD, 1); + frag_hdr = + ip6_ext_next_header_offset (ip0, hdr_chain.eh[res].offset); vnet_buffer (b0)->ip.reass.ip6_frag_hdr_offset = - (u8 *) frag_hdr - (u8 *) ip0; + hdr_chain.eh[res].offset; if (0 == ip6_frag_hdr_offset (frag_hdr)) { // first fragment - verify upper-layer is present - if (!ip6_full_reass_verify_upper_layer_present - (node, b0, frag_hdr)) + if (!ip6_full_reass_verify_upper_layer_present (node, b0, + &hdr_chain)) { - next0 = IP6_FULL_REASSEMBLY_NEXT_ICMP_ERROR; + next0 = is_custom_app ? fvnb->ip.reass.error_next_index : + IP6_FULL_REASSEMBLY_NEXT_ICMP_ERROR; goto skip_reass; } } - if (!ip6_full_reass_verify_fragment_multiple_8 (vm, b0, frag_hdr) || - !ip6_full_reass_verify_packet_size_lt_64k (vm, b0, frag_hdr)) + + if (!ip6_full_reass_verify_fragment_multiple_8 (vm, node, b0, + frag_hdr) || + !ip6_full_reass_verify_packet_size_lt_64k (vm, node, b0, + frag_hdr)) { - next0 = IP6_FULL_REASSEMBLY_NEXT_ICMP_ERROR; + next0 = is_custom_app ? fvnb->ip.reass.error_next_index : + IP6_FULL_REASSEMBLY_NEXT_ICMP_ERROR; goto skip_reass; } + + int skip_bihash = 0; ip6_full_reass_kv_t kv; u8 do_handoff = 0; - kv.k.as_u64[0] = ip0->src_address.as_u64[0]; - kv.k.as_u64[1] = ip0->src_address.as_u64[1]; - kv.k.as_u64[2] = ip0->dst_address.as_u64[0]; - kv.k.as_u64[3] = ip0->dst_address.as_u64[1]; - kv.k.as_u64[4] = - ((u64) vec_elt (ip6_main.fib_index_by_sw_if_index, - vnet_buffer (b0)->sw_if_index[VLIB_RX])) << 32 | - (u64) frag_hdr->identification; - kv.k.as_u64[5] = ip0->protocol; + if (0 == ip6_frag_hdr_offset (frag_hdr) && + !ip6_frag_hdr_more (frag_hdr)) + { + // this is atomic fragment and needs to be processed separately + skip_bihash = 1; + } + else + { + u32 fib_index = + (vnet_buffer (b0)->sw_if_index[VLIB_TX] == (u32) ~0) ? + vec_elt (ip6_main.fib_index_by_sw_if_index, + vnet_buffer (b0)->sw_if_index[VLIB_RX]) : + vnet_buffer (b0)->sw_if_index[VLIB_TX]; + kv.k.as_u64[0] = ip0->src_address.as_u64[0]; + kv.k.as_u64[1] = ip0->src_address.as_u64[1]; + kv.k.as_u64[2] = ip0->dst_address.as_u64[0]; + kv.k.as_u64[3] = ip0->dst_address.as_u64[1]; + kv.k.as_u64[4] = + ((u64) fib_index) << 32 | (u64) frag_hdr->identification; + /* RFC 8200: The Next Header values in the Fragment headers of + * different fragments of the same original packet may differ. + * Only the value from the Offset zero fragment packet is used + * for reassembly. + * + * Also, IPv6 Header doesnt contain the protocol value unlike + * IPv4.*/ + kv.k.as_u64[5] = 0; + } - ip6_full_reass_t *reass = - ip6_full_reass_find_or_create (vm, node, rm, rt, &kv, &icmp_bi, - &do_handoff); + ip6_full_reass_t *reass = ip6_full_reass_find_or_create ( + vm, node, rm, rt, &kv, &icmp_bi, &do_handoff, skip_bihash, + &n_left_to_next, &to_next); if (reass) { @@ -1144,9 +1321,10 @@ ip6_full_reassembly_inline (vlib_main_t * vm, else if (reass) { u32 handoff_thread_idx; - switch (ip6_full_reass_update - (vm, node, rm, rt, reass, &bi0, &next0, &error0, - frag_hdr, is_custom_app, &handoff_thread_idx)) + u32 counter = ~0; + switch (ip6_full_reass_update ( + vm, node, rm, rt, reass, &bi0, &next0, &error0, frag_hdr, + is_custom_app, &handoff_thread_idx, skip_bihash)) { case IP6_FULL_REASS_RC_OK: /* nothing to do here */ @@ -1158,25 +1336,36 @@ ip6_full_reassembly_inline (vlib_main_t * vm, handoff_thread_idx; break; case IP6_FULL_REASS_RC_TOO_MANY_FRAGMENTS: - vlib_node_increment_counter (vm, node->node_index, - IP6_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG, - 1); - ip6_full_reass_drop_all (vm, node, reass); - ip6_full_reass_free (rm, rt, reass); - goto next_packet; + counter = IP6_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG; break; case IP6_FULL_REASS_RC_NO_BUF: - vlib_node_increment_counter (vm, node->node_index, - IP6_ERROR_REASS_NO_BUF, 1); - ip6_full_reass_drop_all (vm, node, reass); - ip6_full_reass_free (rm, rt, reass); - goto next_packet; + counter = IP6_ERROR_REASS_NO_BUF; + break; + case IP6_FULL_REASS_RC_INVALID_FRAG_LEN: + counter = IP6_ERROR_REASS_INVALID_FRAG_LEN; + break; + case IP6_FULL_REASS_RC_OVERLAP: + counter = IP6_ERROR_REASS_OVERLAPPING_FRAGMENT; break; case IP6_FULL_REASS_RC_INTERNAL_ERROR: - vlib_node_increment_counter (vm, node->node_index, - IP6_ERROR_REASS_INTERNAL_ERROR, + counter = IP6_ERROR_REASS_INTERNAL_ERROR; + /* Sanitization is needed in internal error cases only, as + * the incoming packet is already dropped in other cases, + * also adding bi0 back to the reassembly list, fixes the + * leaking of buffers during internal errors. + * + * Also it doesnt make sense to send these buffers custom + * app, these fragments are with internal errors */ + sanitize_reass_buffers_add_missing (vm, reass, &bi0); + reass->error_next_index = ~0; + break; + } + if (~0 != counter) + { + vlib_node_increment_counter (vm, node->node_index, counter, 1); - ip6_full_reass_drop_all (vm, node, reass); + ip6_full_reass_drop_all (vm, node, reass, &n_left_to_next, + &to_next); ip6_full_reass_free (rm, rt, reass); goto next_packet; break; @@ -1190,7 +1379,6 @@ ip6_full_reassembly_inline (vlib_main_t * vm, } else { - vnet_buffer_opaque_t *fvnb = vnet_buffer (b0); next0 = fvnb->ip.reass.error_next_index; } error0 = IP6_ERROR_REASS_LIMIT_REACHED; @@ -1223,6 +1411,15 @@ ip6_full_reassembly_inline (vlib_main_t * vm, { vnet_feature_next (&next0, b0); } + + /* Increment the counter to-custom-app also as this fragment is + * also going to application */ + if (is_custom_app) + { + vlib_node_increment_counter ( + vm, node->node_index, IP6_ERROR_REASS_TO_CUSTOM_APP, 1); + } + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, n_left_to_next, bi0, next0); } @@ -1249,26 +1446,21 @@ ip6_full_reassembly_inline (vlib_main_t * vm, return frame->n_vectors; } -static char *ip6_full_reassembly_error_strings[] = { -#define _(sym, string) string, - foreach_ip6_error -#undef _ -}; - VLIB_NODE_FN (ip6_full_reass_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return ip6_full_reassembly_inline (vm, node, frame, false /* is_feature */ , - false /* is_custom_app */ ); + return ip6_full_reassembly_inline (vm, node, frame, false /* is_feature */, + false /* is_custom_app */, + false /* is_local */); } VLIB_REGISTER_NODE (ip6_full_reass_node) = { .name = "ip6-full-reassembly", .vector_size = sizeof (u32), .format_trace = format_ip6_full_reass_trace, - .n_errors = ARRAY_LEN (ip6_full_reassembly_error_strings), - .error_strings = ip6_full_reassembly_error_strings, + .n_errors = IP6_N_ERROR, + .error_counters = ip6_error_counters, .n_next_nodes = IP6_FULL_REASSEMBLY_N_NEXT, .next_nodes = { @@ -1279,20 +1471,45 @@ VLIB_REGISTER_NODE (ip6_full_reass_node) = { }, }; +VLIB_NODE_FN (ip6_local_full_reass_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return ip6_full_reassembly_inline (vm, node, frame, false /* is_feature */, + false /* is_custom_app */, + true /* is_local */); +} + +VLIB_REGISTER_NODE (ip6_local_full_reass_node) = { + .name = "ip6-local-full-reassembly", + .vector_size = sizeof (u32), + .format_trace = format_ip6_full_reass_trace, + .n_errors = IP6_N_ERROR, + .error_counters = ip6_error_counters, + .n_next_nodes = IP6_FULL_REASSEMBLY_N_NEXT, + .next_nodes = + { + [IP6_FULL_REASSEMBLY_NEXT_INPUT] = "ip6-input", + [IP6_FULL_REASSEMBLY_NEXT_DROP] = "ip6-drop", + [IP6_FULL_REASSEMBLY_NEXT_ICMP_ERROR] = "ip6-icmp-error", + [IP6_FULL_REASSEMBLY_NEXT_HANDOFF] = "ip6-local-full-reassembly-handoff", + }, +}; + VLIB_NODE_FN (ip6_full_reass_node_feature) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return ip6_full_reassembly_inline (vm, node, frame, true /* is_feature */ , - false /* is_custom_app */ ); + return ip6_full_reassembly_inline (vm, node, frame, true /* is_feature */, + false /* is_custom_app */, + false /* is_local */); } VLIB_REGISTER_NODE (ip6_full_reass_node_feature) = { .name = "ip6-full-reassembly-feature", .vector_size = sizeof (u32), .format_trace = format_ip6_full_reass_trace, - .n_errors = ARRAY_LEN (ip6_full_reassembly_error_strings), - .error_strings = ip6_full_reassembly_error_strings, + .n_errors = IP6_N_ERROR, + .error_counters = ip6_error_counters, .n_next_nodes = IP6_FULL_REASSEMBLY_N_NEXT, .next_nodes = { @@ -1311,6 +1528,30 @@ VNET_FEATURE_INIT (ip6_full_reassembly_feature, static) = { .runs_after = 0, }; +VLIB_NODE_FN (ip6_full_reass_node_custom) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return ip6_full_reassembly_inline (vm, node, frame, false /* is_feature */, + true /* is_custom_app */, + false /* is_local */); +} + +VLIB_REGISTER_NODE (ip6_full_reass_node_custom) = { + .name = "ip6-full-reassembly-custom", + .vector_size = sizeof (u32), + .format_trace = format_ip6_full_reass_trace, + .n_errors = IP6_N_ERROR, + .error_counters = ip6_error_counters, + .n_next_nodes = IP6_FULL_REASSEMBLY_N_NEXT, + .next_nodes = + { + [IP6_FULL_REASSEMBLY_NEXT_INPUT] = "ip6-input", + [IP6_FULL_REASSEMBLY_NEXT_DROP] = "ip6-drop", + [IP6_FULL_REASSEMBLY_NEXT_ICMP_ERROR] = "ip6-icmp-error", + [IP6_FULL_REASSEMBLY_NEXT_HANDOFF] = "ip6-full-reass-custom-hoff", + }, +}; + #ifndef CLIB_MARCH_VARIANT static u32 ip6_full_reass_get_nbuckets () @@ -1319,7 +1560,9 @@ ip6_full_reass_get_nbuckets () u32 nbuckets; u8 i; - nbuckets = (u32) (rm->max_reass_n / IP6_FULL_REASS_HT_LOAD_FACTOR); + /* need more mem with more workers */ + nbuckets = (u32) (rm->max_reass_n * (vlib_num_workers () + 1) / + IP6_FULL_REASS_HT_LOAD_FACTOR); for (i = 0; i < 31; i++) if ((1 << i) >= nbuckets) @@ -1446,9 +1689,6 @@ ip6_full_reass_init_function (vlib_main_t * vm) clib_bihash_init_48_8 (&rm->hash, "ip6-full-reass", nbuckets, nbuckets * 1024); - node = vlib_get_node_by_name (vm, (u8 *) "ip6-drop"); - ASSERT (node); - rm->ip6_drop_idx = node->index; node = vlib_get_node_by_name (vm, (u8 *) "ip6-icmp-error"); ASSERT (node); rm->ip6_icmp_error_idx = node->index; @@ -1456,11 +1696,16 @@ ip6_full_reass_init_function (vlib_main_t * vm) if ((error = vlib_call_init_function (vm, ip_main_init))) return error; ip6_register_protocol (IP_PROTOCOL_IPV6_FRAGMENTATION, - ip6_full_reass_node.index); + ip6_local_full_reass_node.index); + rm->is_local_reass_enabled = 1; rm->fq_index = vlib_frame_queue_main_init (ip6_full_reass_node.index, 0); + rm->fq_local_index = + vlib_frame_queue_main_init (ip6_local_full_reass_node.index, 0); rm->fq_feature_index = vlib_frame_queue_main_init (ip6_full_reass_node_feature.index, 0); + rm->fq_custom_index = + vlib_frame_queue_main_init (ip6_full_reass_node_custom.index, 0); rm->feature_use_refcount_per_intf = NULL; return error; @@ -1504,26 +1749,53 @@ ip6_full_reass_walk_expired (vlib_main_t *vm, vlib_node_runtime_t *node, int index; const uword nthreads = vlib_num_workers () + 1; u32 *vec_icmp_bi = NULL; + u32 n_left_to_next, *to_next; + for (thread_index = 0; thread_index < nthreads; ++thread_index) { ip6_full_reass_per_thread_t *rt = &rm->per_thread_data[thread_index]; + u32 reass_timeout_cnt = 0; clib_spinlock_lock (&rt->lock); vec_reset_length (pool_indexes_to_free); - pool_foreach_index (index, rt->pool) { - reass = pool_elt_at_index (rt->pool, index); - if (now > reass->last_heard + rm->timeout) - { - vec_add1 (pool_indexes_to_free, index); - } - } + /* Pace the number of timeouts handled per thread,to avoid barrier + * sync issues in real world scenarios */ + + u32 beg = rt->last_id; + /* to ensure we walk at least once per sec per context */ + u32 end = beg + (IP6_FULL_REASS_MAX_REASSEMBLIES_DEFAULT * + IP6_FULL_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS / + MSEC_PER_SEC + + 1); + if (end > vec_len (rt->pool)) + { + end = vec_len (rt->pool); + rt->last_id = 0; + } + else + { + rt->last_id = end; + } + + pool_foreach_stepping_index (index, beg, end, rt->pool) + { + reass = pool_elt_at_index (rt->pool, index); + if (now > reass->last_heard + rm->timeout) + { + vec_add1 (pool_indexes_to_free, index); + } + } + int *i; vec_foreach (i, pool_indexes_to_free) { ip6_full_reass_t *reass = pool_elt_at_index (rt->pool, i[0]); u32 icmp_bi = ~0; - ip6_full_reass_on_timeout (vm, node, reass, &icmp_bi); + + reass_timeout_cnt += reass->fragments_n; + ip6_full_reass_on_timeout (vm, node, reass, &icmp_bi, + &n_left_to_next, &to_next); if (~0 != icmp_bi) vec_add1 (vec_icmp_bi, icmp_bi); @@ -1531,6 +1803,10 @@ ip6_full_reass_walk_expired (vlib_main_t *vm, vlib_node_runtime_t *node, } clib_spinlock_unlock (&rt->lock); + if (reass_timeout_cnt) + vlib_node_increment_counter (vm, node->node_index, + IP6_ERROR_REASS_TIMEOUT, + reass_timeout_cnt); } while (vec_len (vec_icmp_bi) > 0) @@ -1546,7 +1822,6 @@ ip6_full_reass_walk_expired (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_buffer_t *b = vlib_get_buffer (vm, bi); if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED)) trace_frame = 1; - b->error = node->errors[IP6_ERROR_REASS_TIMEOUT]; to_next[0] = bi; ++f->n_vectors; to_next += 1; @@ -1560,7 +1835,7 @@ ip6_full_reass_walk_expired (vlib_main_t *vm, vlib_node_runtime_t *node, vec_free (vec_icmp_bi); if (event_data) { - _vec_len (event_data) = 0; + vec_set_len (event_data, 0); } } @@ -1568,14 +1843,13 @@ ip6_full_reass_walk_expired (vlib_main_t *vm, vlib_node_runtime_t *node, } VLIB_REGISTER_NODE (ip6_full_reass_expire_node) = { - .function = ip6_full_reass_walk_expired, - .format_trace = format_ip6_full_reass_trace, - .type = VLIB_NODE_TYPE_PROCESS, - .name = "ip6-full-reassembly-expire-walk", - - .n_errors = ARRAY_LEN (ip6_full_reassembly_error_strings), - .error_strings = ip6_full_reassembly_error_strings, + .function = ip6_full_reass_walk_expired, + .format_trace = format_ip6_full_reass_trace, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "ip6-full-reassembly-expire-walk", + .n_errors = IP6_N_ERROR, + .error_counters = ip6_error_counters, }; static u8 * @@ -1733,9 +2007,10 @@ format_ip6_full_reassembly_handoff_trace (u8 * s, va_list * args) } always_inline uword -ip6_full_reassembly_handoff_inline (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame, bool is_feature) +ip6_full_reassembly_handoff_inline (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame, + ip6_full_reass_node_type_t type, + bool is_local) { ip6_full_reass_main_t *rm = &ip6_full_reass_main; @@ -1751,8 +2026,28 @@ ip6_full_reassembly_handoff_inline (vlib_main_t * vm, b = bufs; ti = thread_indices; - fq_index = (is_feature) ? rm->fq_feature_index : rm->fq_index; - + switch (type) + { + case NORMAL: + if (is_local) + { + fq_index = rm->fq_local_index; + } + else + { + fq_index = rm->fq_index; + } + break; + case FEATURE: + fq_index = rm->fq_feature_index; + break; + case CUSTOM: + fq_index = rm->fq_custom_index; + break; + default: + clib_warning ("Unexpected `type' (%d)!", type); + ASSERT (0); + } while (n_left_from > 0) { ti[0] = vnet_buffer (b[0])->ip.reass.owner_thread_index; @@ -1784,8 +2079,8 @@ VLIB_NODE_FN (ip6_full_reassembly_handoff_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return ip6_full_reassembly_handoff_inline (vm, node, frame, - false /* is_feature */ ); + return ip6_full_reassembly_handoff_inline (vm, node, frame, NORMAL, + false /* is_local */); } VLIB_REGISTER_NODE (ip6_full_reassembly_handoff_node) = { @@ -1802,14 +2097,34 @@ VLIB_REGISTER_NODE (ip6_full_reassembly_handoff_node) = { }, }; +VLIB_NODE_FN (ip6_local_full_reassembly_handoff_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return ip6_full_reassembly_handoff_inline (vm, node, frame, NORMAL, + true /* is_feature */); +} + +VLIB_REGISTER_NODE (ip6_local_full_reassembly_handoff_node) = { + .name = "ip6-local-full-reassembly-handoff", + .vector_size = sizeof (u32), + .n_errors = ARRAY_LEN(ip6_full_reassembly_handoff_error_strings), + .error_strings = ip6_full_reassembly_handoff_error_strings, + .format_trace = format_ip6_full_reassembly_handoff_trace, + + .n_next_nodes = 1, + + .next_nodes = { + [0] = "error-drop", + }, +}; VLIB_NODE_FN (ip6_full_reassembly_feature_handoff_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return ip6_full_reassembly_handoff_inline (vm, node, frame, true /* is_feature */ ); + return ip6_full_reassembly_handoff_inline (vm, node, frame, FEATURE, + false /* is_local */); } - VLIB_REGISTER_NODE (ip6_full_reassembly_feature_handoff_node) = { .name = "ip6-full-reass-feature-hoff", .vector_size = sizeof (u32), @@ -1824,6 +2139,27 @@ VLIB_REGISTER_NODE (ip6_full_reassembly_feature_handoff_node) = { }, }; +VLIB_NODE_FN (ip6_full_reassembly_custom_handoff_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return ip6_full_reassembly_handoff_inline (vm, node, frame, CUSTOM, + false /* is_local */); +} + +VLIB_REGISTER_NODE (ip6_full_reassembly_custom_handoff_node) = { + .name = "ip6-full-reass-custom-hoff", + .vector_size = sizeof (u32), + .n_errors = ARRAY_LEN(ip6_full_reassembly_handoff_error_strings), + .error_strings = ip6_full_reassembly_handoff_error_strings, + .format_trace = format_ip6_full_reassembly_handoff_trace, + + .n_next_nodes = 1, + + .next_nodes = { + [0] = "error-drop", + }, +}; + #ifndef CLIB_MARCH_VARIANT int ip6_full_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable) @@ -1849,8 +2185,37 @@ ip6_full_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable) "ip6-full-reassembly-feature", sw_if_index, 0, 0, 0); } - return -1; + return 0; +} + +void +ip6_local_full_reass_enable_disable (int enable) +{ + if (enable) + { + if (!ip6_full_reass_main.is_local_reass_enabled) + { + ip6_full_reass_main.is_local_reass_enabled = 1; + ip6_register_protocol (IP_PROTOCOL_IPV6_FRAGMENTATION, + ip6_local_full_reass_node.index); + } + } + else + { + if (ip6_full_reass_main.is_local_reass_enabled) + { + ip6_full_reass_main.is_local_reass_enabled = 0; + ip6_unregister_protocol (IP_PROTOCOL_IPV6_FRAGMENTATION); + } + } +} + +int +ip6_local_full_reass_enabled () +{ + return ip6_full_reass_main.is_local_reass_enabled; } + #endif /* diff --git a/src/vnet/ip/reass/ip6_full_reass.h b/src/vnet/ip/reass/ip6_full_reass.h index 546075b04b4..f66cb67d796 100644 --- a/src/vnet/ip/reass/ip6_full_reass.h +++ b/src/vnet/ip/reass/ip6_full_reass.h @@ -46,6 +46,8 @@ vnet_api_error_t ip6_full_reass_enable_disable (u32 sw_if_index, int ip6_full_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable); +void ip6_local_full_reass_enable_disable (int enable); +int ip6_local_full_reass_enabled (); #endif /* __included_ip6_full_reass_h */ /* diff --git a/src/vnet/ip/reass/ip6_sv_reass.c b/src/vnet/ip/reass/ip6_sv_reass.c index 28941311f50..fe2ed05555c 100644 --- a/src/vnet/ip/reass/ip6_sv_reass.c +++ b/src/vnet/ip/reass/ip6_sv_reass.c @@ -26,6 +26,7 @@ #include <vnet/ip/ip6_to_ip4.h> #include <vppinfra/bihash_48_8.h> #include <vnet/ip/reass/ip6_sv_reass.h> +#include <vnet/ip/ip6_inlines.h> #define MSEC_PER_SEC 1000 #define IP6_SV_REASS_TIMEOUT_DEFAULT_MS 100 @@ -40,6 +41,7 @@ typedef enum IP6_SV_REASS_RC_TOO_MANY_FRAGMENTS, IP6_SV_REASS_RC_INTERNAL_ERROR, IP6_SV_REASS_RC_UNSUPP_IP_PROTO, + IP6_SV_REASS_RC_INVALID_FRAG_LEN, } ip6_sv_reass_rc_t; typedef struct @@ -50,7 +52,7 @@ typedef struct { ip6_address_t src; ip6_address_t dst; - u32 xx_id; + u32 fib_index; u32 frag_id; u8 unused[7]; u8 proto; @@ -148,6 +150,7 @@ typedef struct /** Worker handoff */ u32 fq_index; u32 fq_feature_index; + u32 fq_custom_context_index; // reference count for enabling/disabling feature - per interface u32 *feature_use_refcount_per_intf; @@ -214,7 +217,7 @@ format_ip6_sv_reass_trace (u8 * s, va_list * args) clib_net_to_host_u16 (t->l4_dst_port)); break; case REASS_PASSTHROUGH: - s = format (s, "[not-fragmented]"); + s = format (s, "[not fragmented or atomic fragment]"); break; } return s; @@ -309,6 +312,8 @@ ip6_sv_reass_find_or_create (vlib_main_t *vm, ip6_sv_reass_main_t *rm, ip6_sv_reass_t *reass = NULL; f64 now = vlib_time_now (vm); +again: + if (!clib_bihash_search_48_8 (&rm->hash, &kv->kv, &kv->kv)) { if (vm->thread_index != kv->v.thread_index) @@ -368,10 +373,14 @@ ip6_sv_reass_find_or_create (vlib_main_t *vm, ip6_sv_reass_main_t *rm, kv->v.thread_index = vm->thread_index; reass->last_heard = now; - if (clib_bihash_add_del_48_8 (&rm->hash, &kv->kv, 1)) + int rv = clib_bihash_add_del_48_8 (&rm->hash, &kv->kv, 2); + if (rv) { ip6_sv_reass_free (vm, rm, rt, reass); reass = NULL; + // if other worker created a context already work with the other copy + if (-2 == rv) + goto again; } return reass; @@ -399,6 +408,10 @@ ip6_sv_reass_update (vlib_main_t *vm, vlib_node_runtime_t *node, u32 fragment_length = vlib_buffer_length_in_chain (vm, fb) - (fvnb->ip.reass.ip6_frag_hdr_offset + sizeof (*frag_hdr)); + if (0 == fragment_length) + { + return IP6_SV_REASS_RC_INVALID_FRAG_LEN; + } u32 fragment_last = fvnb->ip.reass.fragment_last = fragment_first + fragment_length - 1; fvnb->ip.reass.range_first = fragment_first; @@ -440,22 +453,18 @@ ip6_sv_reass_update (vlib_main_t *vm, vlib_node_runtime_t *node, } always_inline bool -ip6_sv_reass_verify_upper_layer_present (vlib_node_runtime_t * node, - vlib_buffer_t * b, - ip6_frag_hdr_t * frag_hdr) +ip6_sv_reass_verify_upper_layer_present (vlib_node_runtime_t *node, + vlib_buffer_t *b, + ip6_ext_hdr_chain_t *hc) { - ip6_ext_header_t *tmp = (ip6_ext_header_t *) frag_hdr; - while (ip6_ext_hdr (tmp->next_hdr)) + int nh = hc->eh[hc->length - 1].protocol; + /* Checking to see if it's a terminating header */ + if (ip6_ext_hdr (nh)) { - tmp = ip6_ext_next_header (tmp); - } - if (IP_PROTOCOL_IP6_NONXT == tmp->next_hdr) - { - icmp6_error_set_vnet_buffer (b, ICMP6_parameter_problem, - ICMP6_parameter_problem_first_fragment_has_incomplete_header_chain, - 0); + icmp6_error_set_vnet_buffer ( + b, ICMP6_parameter_problem, + ICMP6_parameter_problem_first_fragment_has_incomplete_header_chain, 0); b->error = node->errors[IP6_ERROR_REASS_MISSING_UPPER]; - return false; } return true; @@ -505,14 +514,18 @@ ip6_sv_reass_verify_packet_size_lt_64k (vlib_main_t * vm, } always_inline uword -ip6_sv_reassembly_inline (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame, bool is_feature) +ip6_sv_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame, bool is_feature, + bool custom_next, bool custom_context) { u32 *from = vlib_frame_vector_args (frame); - u32 n_left_from, n_left_to_next, *to_next, next_index; + u32 n_left_from, n_left_to_next, *to_next, *to_next_aux, next_index; ip6_sv_reass_main_t *rm = &ip6_sv_reass_main; ip6_sv_reass_per_thread_t *rt = &rm->per_thread_data[vm->thread_index]; + u32 *context; + if (custom_context) + context = vlib_frame_aux_args (frame); + clib_spinlock_lock (&rt->lock); n_left_from = frame->n_vectors; @@ -520,7 +533,11 @@ ip6_sv_reassembly_inline (vlib_main_t * vm, while (n_left_from > 0) { - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + if (custom_context) + vlib_get_next_frame_with_aux_safe (vm, node, next_index, to_next, + to_next_aux, n_left_to_next); + else + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); while (n_left_from > 0 && n_left_to_next > 0) { @@ -528,23 +545,31 @@ ip6_sv_reassembly_inline (vlib_main_t * vm, vlib_buffer_t *b0; u32 next0 = IP6_SV_REASSEMBLY_NEXT_DROP; u32 error0 = IP6_ERROR_NONE; - + u8 forward_context = 0; bi0 = from[0]; b0 = vlib_get_buffer (vm, bi0); ip6_header_t *ip0 = vlib_buffer_get_current (b0); - ip6_frag_hdr_t *frag_hdr = NULL; - ip6_ext_header_t *prev_hdr; - if (ip6_ext_hdr (ip0->protocol)) + ip6_frag_hdr_t *frag_hdr; + ip6_ext_hdr_chain_t hdr_chain; + bool is_atomic_fragment = false; + + int res = ip6_ext_header_walk ( + b0, ip0, IP_PROTOCOL_IPV6_FRAGMENTATION, &hdr_chain); + if (res >= 0 && + hdr_chain.eh[res].protocol == IP_PROTOCOL_IPV6_FRAGMENTATION) { frag_hdr = - ip6_ext_header_find (vm, b0, ip0, - IP_PROTOCOL_IPV6_FRAGMENTATION, - &prev_hdr); + ip6_ext_next_header_offset (ip0, hdr_chain.eh[res].offset); + is_atomic_fragment = (0 == ip6_frag_hdr_offset (frag_hdr) && + !ip6_frag_hdr_more (frag_hdr)); } - if (!frag_hdr) + + if (res < 0 || + hdr_chain.eh[res].protocol != IP_PROTOCOL_IPV6_FRAGMENTATION || + is_atomic_fragment) { - // this is a regular packet - no fragmentation + // this is a regular unfragmented packet or an atomic fragment if (!ip6_get_port (vm, b0, ip0, b0->current_length, &(vnet_buffer (b0)->ip.reass.ip_proto), @@ -560,7 +585,8 @@ ip6_sv_reassembly_inline (vlib_main_t * vm, goto packet_enqueue; } vnet_buffer (b0)->ip.reass.is_non_first_fragment = 0; - next0 = IP6_SV_REASSEMBLY_NEXT_INPUT; + next0 = custom_next ? vnet_buffer (b0)->ip.reass.next_index : + IP6_SV_REASSEMBLY_NEXT_INPUT; if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { ip6_sv_reass_add_trace ( @@ -571,13 +597,15 @@ ip6_sv_reassembly_inline (vlib_main_t * vm, } goto packet_enqueue; } + vnet_buffer (b0)->ip.reass.ip6_frag_hdr_offset = - (u8 *) frag_hdr - (u8 *) ip0; + hdr_chain.eh[res].offset; + if (0 == ip6_frag_hdr_offset (frag_hdr)) { // first fragment - verify upper-layer is present - if (!ip6_sv_reass_verify_upper_layer_present - (node, b0, frag_hdr)) + if (!ip6_sv_reass_verify_upper_layer_present (node, b0, + &hdr_chain)) { next0 = IP6_SV_REASSEMBLY_NEXT_ICMP_ERROR; goto packet_enqueue; @@ -597,10 +625,15 @@ ip6_sv_reassembly_inline (vlib_main_t * vm, kv.k.as_u64[1] = ip0->src_address.as_u64[1]; kv.k.as_u64[2] = ip0->dst_address.as_u64[0]; kv.k.as_u64[3] = ip0->dst_address.as_u64[1]; - kv.k.as_u64[4] = - ((u64) vec_elt (ip6_main.fib_index_by_sw_if_index, - vnet_buffer (b0)->sw_if_index[VLIB_RX])) << 32 | - (u64) frag_hdr->identification; + if (custom_context) + kv.k.as_u64[4] = + (u64) *context << 32 | (u64) frag_hdr->identification; + else + kv.k.as_u64[4] = + ((u64) vec_elt (ip6_main.fib_index_by_sw_if_index, + vnet_buffer (b0)->sw_if_index[VLIB_RX])) + << 32 | + (u64) frag_hdr->identification; kv.k.as_u64[5] = ip0->protocol; ip6_sv_reass_t *reass = @@ -611,6 +644,8 @@ ip6_sv_reassembly_inline (vlib_main_t * vm, next0 = IP6_SV_REASSEMBLY_NEXT_HANDOFF; vnet_buffer (b0)->ip.reass.owner_thread_index = kv.v.thread_index; + if (custom_context) + forward_context = 1; goto packet_enqueue; } @@ -635,7 +670,8 @@ ip6_sv_reassembly_inline (vlib_main_t * vm, reass->tcp_seq_number; vnet_buffer (b0)->ip.reass.l4_src_port = reass->l4_src_port; vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port; - next0 = IP6_SV_REASSEMBLY_NEXT_INPUT; + next0 = custom_next ? vnet_buffer (b0)->ip.reass.next_index : + IP6_SV_REASSEMBLY_NEXT_INPUT; if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { ip6_sv_reass_add_trace ( @@ -645,31 +681,30 @@ ip6_sv_reassembly_inline (vlib_main_t * vm, goto packet_enqueue; } + u32 counter = ~0; switch (ip6_sv_reass_update (vm, node, rm, reass, bi0, frag_hdr)) { case IP6_SV_REASS_RC_OK: /* nothing to do here */ break; case IP6_SV_REASS_RC_TOO_MANY_FRAGMENTS: - vlib_node_increment_counter (vm, node->node_index, - IP6_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG, - 1); - ip6_sv_reass_free (vm, rm, rt, reass); - goto next_packet; + counter = IP6_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG; break; case IP6_SV_REASS_RC_UNSUPP_IP_PROTO: - vlib_node_increment_counter (vm, node->node_index, - IP6_ERROR_REASS_UNSUPP_IP_PROTO, - 1); - ip6_sv_reass_free (vm, rm, rt, reass); - goto next_packet; + counter = IP6_ERROR_REASS_UNSUPP_IP_PROTO; break; case IP6_SV_REASS_RC_INTERNAL_ERROR: - vlib_node_increment_counter (vm, node->node_index, - IP6_ERROR_REASS_INTERNAL_ERROR, 1); + counter = IP6_ERROR_REASS_INTERNAL_ERROR; + break; + case IP6_SV_REASS_RC_INVALID_FRAG_LEN: + counter = IP6_ERROR_REASS_INVALID_FRAG_LEN; + break; + } + if (~0 != counter) + { + vlib_node_increment_counter (vm, node->node_index, counter, 1); ip6_sv_reass_free (vm, rm, rt, reass); goto next_packet; - break; } if (reass->is_complete) @@ -717,7 +752,8 @@ ip6_sv_reassembly_inline (vlib_main_t * vm, to_next, n_left_to_next, bi0, next0); } - _vec_len (reass->cached_buffers) = 0; // buffers are owned by frame now + vec_set_len (reass->cached_buffers, + 0); // buffers are owned by frame now } goto next_packet; @@ -730,11 +766,25 @@ ip6_sv_reassembly_inline (vlib_main_t * vm, b0 = vlib_get_buffer (vm, bi0); vnet_feature_next (&next0, b0); } - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, bi0, next0); + if (custom_context && forward_context) + { + if (to_next_aux) + { + to_next_aux[0] = *context; + to_next_aux += 1; + } + vlib_validate_buffer_enqueue_with_aux_x1 ( + vm, node, next_index, to_next, to_next_aux, n_left_to_next, + bi0, *context, next0); + } + else + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); next_packet: from += 1; + if (custom_context) + context += 1; n_left_from -= 1; } @@ -745,26 +795,21 @@ ip6_sv_reassembly_inline (vlib_main_t * vm, return frame->n_vectors; } -static char *ip6_sv_reassembly_error_strings[] = { -#define _(sym, string) string, - foreach_ip6_error -#undef _ -}; - VLIB_NODE_FN (ip6_sv_reass_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return ip6_sv_reassembly_inline (vm, node, frame, false /* is_feature */ ); + return ip6_sv_reassembly_inline (vm, node, frame, false /* is_feature */, + false /* custom next */, + false /* custom context */); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_sv_reass_node) = { .name = "ip6-sv-reassembly", .vector_size = sizeof (u32), .format_trace = format_ip6_sv_reass_trace, - .n_errors = ARRAY_LEN (ip6_sv_reassembly_error_strings), - .error_strings = ip6_sv_reassembly_error_strings, + .n_errors = IP6_N_ERROR, + .error_counters = ip6_error_counters, .n_next_nodes = IP6_SV_REASSEMBLY_N_NEXT, .next_nodes = { @@ -774,22 +819,22 @@ VLIB_REGISTER_NODE (ip6_sv_reass_node) = { [IP6_SV_REASSEMBLY_NEXT_HANDOFF] = "ip6-sv-reassembly-handoff", }, }; -/* *INDENT-ON* */ VLIB_NODE_FN (ip6_sv_reass_node_feature) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return ip6_sv_reassembly_inline (vm, node, frame, true /* is_feature */ ); + return ip6_sv_reassembly_inline (vm, node, frame, true /* is_feature */, + false /* custom next */, + false /* custom context */); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_sv_reass_node_feature) = { .name = "ip6-sv-reassembly-feature", .vector_size = sizeof (u32), .format_trace = format_ip6_sv_reass_trace, - .n_errors = ARRAY_LEN (ip6_sv_reassembly_error_strings), - .error_strings = ip6_sv_reassembly_error_strings, + .n_errors = IP6_N_ERROR, + .error_counters = ip6_error_counters, .n_next_nodes = IP6_SV_REASSEMBLY_N_NEXT, .next_nodes = { @@ -799,16 +844,38 @@ VLIB_REGISTER_NODE (ip6_sv_reass_node_feature) = { [IP6_SV_REASSEMBLY_NEXT_HANDOFF] = "ip6-sv-reass-feature-hoff", }, }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VNET_FEATURE_INIT (ip6_sv_reassembly_feature) = { .arc_name = "ip6-unicast", .node_name = "ip6-sv-reassembly-feature", .runs_before = VNET_FEATURES ("ip6-lookup"), .runs_after = 0, }; -/* *INDENT-ON* */ + +VLIB_NODE_FN (ip6_sv_reass_custom_context_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return ip6_sv_reassembly_inline (vm, node, frame, false /* is_feature */, + true /* custom next */, + true /* custom context */); +} + +VLIB_REGISTER_NODE (ip6_sv_reass_custom_context_node) = { + .name = "ip6-sv-reassembly-custom-context", + .vector_size = sizeof (u32), + .aux_size = sizeof (u32), + .format_trace = format_ip6_sv_reass_trace, + .n_errors = IP6_N_ERROR, + .error_counters = ip6_error_counters, + .n_next_nodes = IP6_SV_REASSEMBLY_N_NEXT, + .next_nodes = + { + [IP6_SV_REASSEMBLY_NEXT_INPUT] = "ip6-input", + [IP6_SV_REASSEMBLY_NEXT_DROP] = "ip6-drop", + [IP6_SV_REASSEMBLY_NEXT_ICMP_ERROR] = "ip6-icmp-error", + [IP6_SV_REASSEMBLY_NEXT_HANDOFF] = "ip6-sv-reassembly-custom-context-handoff", + }, +}; #ifndef CLIB_MARCH_VARIANT static u32 @@ -959,6 +1026,8 @@ ip6_sv_reass_init_function (vlib_main_t * vm) rm->fq_index = vlib_frame_queue_main_init (ip6_sv_reass_node.index, 0); rm->fq_feature_index = vlib_frame_queue_main_init (ip6_sv_reass_node_feature.index, 0); + rm->fq_custom_context_index = + vlib_frame_queue_main_init (ip6_sv_reass_custom_context_node.index, 0); rm->feature_use_refcount_per_intf = NULL; @@ -1009,7 +1078,6 @@ ip6_sv_reass_walk_expired (vlib_main_t *vm, clib_spinlock_lock (&rt->lock); vec_reset_length (pool_indexes_to_free); - /* *INDENT-OFF* */ pool_foreach_index (index, rt->pool) { reass = pool_elt_at_index (rt->pool, index); if (now > reass->last_heard + rm->timeout) @@ -1017,15 +1085,12 @@ ip6_sv_reass_walk_expired (vlib_main_t *vm, vec_add1 (pool_indexes_to_free, index); } } - /* *INDENT-ON* */ int *i; - /* *INDENT-OFF* */ vec_foreach (i, pool_indexes_to_free) { ip6_sv_reass_t *reass = pool_elt_at_index (rt->pool, i[0]); ip6_sv_reass_free (vm, rm, rt, reass); } - /* *INDENT-ON* */ clib_spinlock_unlock (&rt->lock); } @@ -1033,33 +1098,31 @@ ip6_sv_reass_walk_expired (vlib_main_t *vm, vec_free (pool_indexes_to_free); if (event_data) { - _vec_len (event_data) = 0; + vec_set_len (event_data, 0); } } return 0; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_sv_reass_expire_node) = { - .function = ip6_sv_reass_walk_expired, - .format_trace = format_ip6_sv_reass_trace, - .type = VLIB_NODE_TYPE_PROCESS, - .name = "ip6-sv-reassembly-expire-walk", - - .n_errors = ARRAY_LEN (ip6_sv_reassembly_error_strings), - .error_strings = ip6_sv_reassembly_error_strings, + .function = ip6_sv_reass_walk_expired, + .format_trace = format_ip6_sv_reass_trace, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "ip6-sv-reassembly-expire-walk", + .n_errors = IP6_N_ERROR, + .error_counters = ip6_error_counters, }; -/* *INDENT-ON* */ static u8 * format_ip6_sv_reass_key (u8 * s, va_list * args) { ip6_sv_reass_key_t *key = va_arg (*args, ip6_sv_reass_key_t *); - s = format (s, "xx_id: %u, src: %U, dst: %U, frag_id: %u, proto: %u", - key->xx_id, format_ip6_address, &key->src, format_ip6_address, - &key->dst, clib_net_to_host_u16 (key->frag_id), key->proto); + s = + format (s, "fib_index: %u, src: %U, dst: %U, frag_id: %u, proto: %u", + key->fib_index, format_ip6_address, &key->src, format_ip6_address, + &key->dst, clib_net_to_host_u16 (key->frag_id), key->proto); return s; } @@ -1116,11 +1179,9 @@ show_ip6_sv_reass (vlib_main_t * vm, unformat_input_t * input, clib_spinlock_lock (&rt->lock); if (details) { - /* *INDENT-OFF* */ pool_foreach (reass, rt->pool) { vlib_cli_output (vm, "%U", format_ip6_sv_reass, vm, reass); } - /* *INDENT-ON* */ } sum_reass_n += rt->reass_n; clib_spinlock_unlock (&rt->lock); @@ -1146,13 +1207,11 @@ show_ip6_sv_reass (vlib_main_t * vm, unformat_input_t * input, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_ip6_sv_reassembly_cmd, static) = { .path = "show ip6-sv-reassembly", .short_help = "show ip6-sv-reassembly [details]", .function = show_ip6_sv_reass, }; -/* *INDENT-ON* */ #ifndef CLIB_MARCH_VARIANT vnet_api_error_t @@ -1202,25 +1261,29 @@ format_ip6_sv_reassembly_handoff_trace (u8 * s, va_list * args) } always_inline uword -ip6_sv_reassembly_handoff_inline (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame, bool is_feature) +ip6_sv_reassembly_handoff_inline (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame, bool is_feature, + bool custom_context) { ip6_sv_reass_main_t *rm = &ip6_sv_reass_main; vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b; - u32 n_enq, n_left_from, *from; + u32 n_enq, n_left_from, *from, *context; u16 thread_indices[VLIB_FRAME_SIZE], *ti; u32 fq_index; from = vlib_frame_vector_args (frame); + if (custom_context) + context = vlib_frame_aux_args (frame); n_left_from = frame->n_vectors; vlib_get_buffers (vm, from, bufs, n_left_from); b = bufs; ti = thread_indices; - fq_index = (is_feature) ? rm->fq_feature_index : rm->fq_index; + fq_index = (is_feature) ? + rm->fq_feature_index : + (custom_context ? rm->fq_custom_context_index : rm->fq_index); while (n_left_from > 0) { @@ -1239,8 +1302,12 @@ ip6_sv_reassembly_handoff_inline (vlib_main_t * vm, ti += 1; b += 1; } - n_enq = vlib_buffer_enqueue_to_thread (vm, node, fq_index, from, - thread_indices, frame->n_vectors, 1); + if (custom_context) + n_enq = vlib_buffer_enqueue_to_thread_with_aux ( + vm, node, fq_index, from, context, thread_indices, frame->n_vectors, 1); + else + n_enq = vlib_buffer_enqueue_to_thread ( + vm, node, fq_index, from, thread_indices, frame->n_vectors, 1); if (n_enq < frame->n_vectors) vlib_node_increment_counter (vm, node->node_index, @@ -1253,11 +1320,10 @@ VLIB_NODE_FN (ip6_sv_reassembly_handoff_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return ip6_sv_reassembly_handoff_inline (vm, node, frame, - false /* is_feature */ ); + return ip6_sv_reassembly_handoff_inline ( + vm, node, frame, false /* is_feature */, false /* custom_context */); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_sv_reassembly_handoff_node) = { .name = "ip6-sv-reassembly-handoff", .vector_size = sizeof (u32), @@ -1276,11 +1342,11 @@ VLIB_REGISTER_NODE (ip6_sv_reassembly_handoff_node) = { VLIB_NODE_FN (ip6_sv_reassembly_feature_handoff_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return ip6_sv_reassembly_handoff_inline (vm, node, frame, true /* is_feature */ ); + return ip6_sv_reassembly_handoff_inline ( + vm, node, frame, true /* is_feature */, false /* custom_context */); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_sv_reassembly_feature_handoff_node) = { .name = "ip6-sv-reass-feature-hoff", .vector_size = sizeof (u32), @@ -1294,7 +1360,28 @@ VLIB_REGISTER_NODE (ip6_sv_reassembly_feature_handoff_node) = { [0] = "error-drop", }, }; -/* *INDENT-ON* */ + +VLIB_NODE_FN (ip6_sv_reassembly_custom_context_handoff_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return ip6_sv_reassembly_handoff_inline ( + vm, node, frame, false /* is_feature */, true /* custom_context */); +} + +VLIB_REGISTER_NODE (ip6_sv_reassembly_custom_context_handoff_node) = { + .name = "ip6-sv-reassembly-custom-context-handoff", + .vector_size = sizeof (u32), + .aux_size = sizeof (u32), + .n_errors = ARRAY_LEN(ip6_sv_reassembly_handoff_error_strings), + .error_strings = ip6_sv_reassembly_handoff_error_strings, + .format_trace = format_ip6_sv_reassembly_handoff_trace, + + .n_next_nodes = 1, + + .next_nodes = { + [0] = "error-drop", + }, +}; #ifndef CLIB_MARCH_VARIANT int @@ -1323,6 +1410,14 @@ ip6_sv_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable) } return 0; } + +uword +ip6_sv_reass_custom_context_register_next_node (uword node_index) +{ + return vlib_node_add_next ( + vlib_get_main (), ip6_sv_reassembly_custom_context_handoff_node.index, + node_index); +} #endif /* diff --git a/src/vnet/ip/reass/ip6_sv_reass.h b/src/vnet/ip/reass/ip6_sv_reass.h index 81ac2312bdf..7dc9df132dd 100644 --- a/src/vnet/ip/reass/ip6_sv_reass.h +++ b/src/vnet/ip/reass/ip6_sv_reass.h @@ -44,6 +44,7 @@ vnet_api_error_t ip6_sv_reass_enable_disable (u32 sw_if_index, u8 enable_disable); int ip6_sv_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable); +uword ip6_sv_reass_custom_context_register_next_node (uword node_index); #endif /* __included_ip6_sv_reass_h */ diff --git a/src/vnet/ip/reass/reassembly.rst b/src/vnet/ip/reass/reassembly.rst new file mode 100644 index 00000000000..49e0a8de6e6 --- /dev/null +++ b/src/vnet/ip/reass/reassembly.rst @@ -0,0 +1,221 @@ +.. _reassembly: + +IP Reassembly +============= + +Some VPP functions need access to whole packet and/or stream +classification based on L4 headers. Reassembly functionality allows +both former and latter. + +Full reassembly vs shallow (virtual) reassembly +----------------------------------------------- + +There are two kinds of reassembly available in VPP: + +1. Full reassembly changes a stream of packet fragments into one +packet containing all data reassembled with fragment bits cleared +and fragment header stripped (in case of ip6). Note that resulting +packet may come out of reassembly as a buffer chain. Because it's +impractical to parse headers which are split over multiple vnet +buffers, vnet_buffer_chain_linearize() is called after reassembly so +that L2/L3/L4 headers can be found in first buffer. Full reassembly +is costly and shouldn't be used unless necessary. Full reassembly is by +default enabled for both ipv4 and ipv6 "for us" traffic +- that is packets aimed at VPP addresses. This can be disabled via API +if desired, in which case "for us" fragments are dropped. + +2. Shallow (virtual) reassembly allows various classifying and/or +translating features to work with fragments without having to +understand fragmentation. It works by extracting L4 data and adding +them to vnet_buffer for each packet/fragment passing through SVR +nodes. This operation is performed for both fragments and regular +packets, allowing consuming code to treat all packets in same way. SVR +caches incoming packet fragments (buffers) until first fragment is +seen. Then it extracts L4 data from that first fragment, fills it for +any cached fragments and transmits them in the same order as they were +received. From that point on, any other passing fragments get L4 data +populated in vnet_buffer based on reassembly context. + +Multi-worker behaviour +^^^^^^^^^^^^^^^^^^^^^^ + +Both reassembly types deal with fragments arriving on different workers +via handoff mechanism. All reassembly contexts are stored in pools. +Bihash mapping 5-tuple key to a value containing pool index and thread +index is used for lookups. When a lookup finds an existing reassembly on +a different thread, it hands off the fragment to that thread. If lookup +fails, a new reassembly context is created and current worker becomes +owner of that context. Further fragments received on other worker +threads are then handed off owner worker thread. + +Full reassembly also remembers thread index where first fragment (as in +fragment with fragment offset 0) was seen and uses handoff mechanism to +send the reassembled packet out on that thread even if pool owner is +a different thread. This then requires an additional handoff to free +reassembly context as only pool owner can do that in a thread-safe way. + +Limits +^^^^^^ + +Because reassembly could be an attack vector, there is a configurable +limit on the number of concurrent reassemblies and also maximum +fragments per packet. + +Custom applications +^^^^^^^^^^^^^^^^^^^ + +Both reassembly features allow to be used by custom application which +are not part of VPP source tree. Be it patches or 3rd party plugins, +they can build their own graph paths by using "-custom*" versions of +nodes. Reassembly then reads next_index and error_next_index for each +buffer from vnet_buffer, allowing custom application to steer +both reassembled packets and any packets which are considered an error +in a way the custom application requires. + +Full reassembly +--------------- + +Configuration +^^^^^^^^^^^^^ + +Configuration is via API (``ip_reassembly_enable_disable``) or CLI: + +``set interface reassembly <interface-name> [on|off|ip4|ip6]`` + +here ``on`` means both ip4 and ip6. + +A show command is provided to see reassembly contexts: + +For ip4: + +``show ip4-full-reassembly [details]`` + +For ip6: + +``show ip6-full-reassembly [details]`` + +Global full reassembly parameters can be modified using API +``ip_reassembly_set`` and retrieved using ``ip_reassembly_get``. + +Defaults +"""""""" + +For defaults values, see #defines in + +`ip4_full_reass.c <__REPOSITORY_URL__/src/vnet/ip/reass/ip4_full_reass.c>`_ + +========================================= ========================================== +#define description +----------------------------------------- ------------------------------------------ +IP4_REASS_TIMEOUT_DEFAULT_MS timeout in milliseconds +IP4_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS interval between reaping expired sessions +IP4_REASS_MAX_REASSEMBLIES_DEFAULT maximum number of concurrent reassemblies +IP4_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT maximum number of fragments per reassembly +========================================= ========================================== + +and + +`ip6_full_reass.c <__REPOSITORY_URL__/src/vnet/ip/reass/ip6_full_reass.c>`_ + +========================================= ========================================== +#define description +----------------------------------------- ------------------------------------------ +IP6_REASS_TIMEOUT_DEFAULT_MS timeout in milliseconds +IP6_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS interval between reaping expired sessions +IP6_REASS_MAX_REASSEMBLIES_DEFAULT maximum number of concurrent reassemblies +IP6_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT maximum number of fragments per reassembly +========================================= ========================================== + +Finished/expired contexts +^^^^^^^^^^^^^^^^^^^^^^^^^ + +Reassembly contexts are freed either when reassembly is finished - when +all data has been received or in case of timeout. There is a process +walking all reassemblies, freeing any expired ones. + +Shallow (virtual) reassembly +---------------------------- + +Configuration +^^^^^^^^^^^^^ + +Configuration is via API (``ip_reassembly_enable_disable``) only as +there is no value in turning SVR on by hand without a feature consuming +buffer metadata. SVR is designed to be turned on by a feature requiring +it in a programmatic way. + +A show command is provided to see reassembly contexts: + +For ip4: + +``show ip4-sv-reassembly [details]`` + +For ip6: + +``show ip6-sv-reassembly [details]`` + +Global shallow reassembly parameters can be modified using API +``ip_reassembly_set`` and retrieved using ``ip_reassembly_get``. + +Defaults +"""""""" + +For defaults values, see #defines in + +`ip4_sv_reass.c <__REPOSITORY_URL__/src/vnet/ip/reass/ip4_sv_reass.c>`_ + +============================================ ========================================== +#define description +-------------------------------------------- ------------------------------------------ +IP4_SV_REASS_TIMEOUT_DEFAULT_MS timeout in milliseconds +IP4_SV_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS interval between reaping expired sessions +IP4_SV_REASS_MAX_REASSEMBLIES_DEFAULT maximum number of concurrent reassemblies +IP4_SV_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT maximum number of fragments per reassembly +============================================ ========================================== + +and + +`ip6_sv_reass.c <__REPOSITORY_URL__/src/vnet/ip/reass/ip6_sv_reass.c>`_ + +============================================ ========================================== +#define description +-------------------------------------------- ------------------------------------------ +IP6_SV_REASS_TIMEOUT_DEFAULT_MS timeout in milliseconds +IP6_SV_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS interval between reaping expired sessions +IP6_SV_REASS_MAX_REASSEMBLIES_DEFAULT maximum number of concurrent reassemblies +IP6_SV_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT maximum number of fragments per reassembly +============================================ ========================================== + +Expiring contexts +^^^^^^^^^^^^^^^^^ + +There is no way of knowing when a reassembly is finished without +performing (an almost) full reassembly, so contexts in SVR cannot be +freed in the same way as in full reassembly. Instead a different +approach is taken. Least recently used (LRU) list is maintained where +reassembly contexts are ordered based on last update. The oldest +context is then freed whenever SVR hits limit on number of concurrent +reassembly contexts. There is also a process reaping expired sessions +similar as in full reassembly. + +Truncated packets +^^^^^^^^^^^^^^^^^ + +When SVR detects that a packet has been truncated in a way where L4 +headers are not available, it will mark it as such in vnet_buffer, +allowing downstream features to handle such packets as they deem fit. + +Fast path/slow path +^^^^^^^^^^^^^^^^^^^ + +SVR runs is implemented fast path/slow path way. By default, it assumes +that any passing traffic doesn't contain fragments, processing buffers +in a dual-loop. If it sees a fragment, it then jumps to single-loop +processing. + +Feature enabled by other features/reference counting +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +SVR feature is enabled by some other features, like NAT, when those +features are enabled. For this to work, it implements a reference +counted API for enabling/disabling SVR. diff --git a/src/vnet/ip/vtep.h b/src/vnet/ip/vtep.h index 92e8002e55a..97e74429e88 100644 --- a/src/vnet/ip/vtep.h +++ b/src/vnet/ip/vtep.h @@ -29,7 +29,6 @@ * processing and go directly to the tunnel protocol handler node. */ -/* *INDENT-OFF* */ typedef CLIB_PACKED (struct { union { @@ -40,7 +39,6 @@ typedef CLIB_PACKED u64 as_u64; }; }) vtep4_key_t; -/* *INDENT-ON* */ /** * @brief Tunnel endpoint key (IPv6) @@ -51,13 +49,11 @@ typedef CLIB_PACKED * processing and go directly to the tunnel protocol handler node. */ -/* *INDENT-OFF* */ typedef CLIB_PACKED (struct { ip6_address_t addr; u32 fib_index; }) vtep6_key_t; -/* *INDENT-ON* */ typedef struct { @@ -111,13 +107,13 @@ vtep4_check (vtep_table_t * t, vlib_buffer_t * b0, ip4_header_t * ip40, return VTEP_CHECK_PASS; } -#ifdef CLIB_HAVE_VEC512 typedef struct { vtep4_key_t vtep4_cache[8]; int idx; } vtep4_cache_t; +#ifdef CLIB_HAVE_VEC512 always_inline u8 vtep4_check_vector (vtep_table_t * t, vlib_buffer_t * b0, ip4_header_t * ip40, vtep4_key_t * last_k4, vtep4_cache_t * vtep4_u512) |