diff options
Diffstat (limited to 'src/vnet/ip')
-rw-r--r-- | src/vnet/ip/ip4_input.c | 305 | ||||
-rw-r--r-- | src/vnet/ip/ip4_input.h | 156 |
2 files changed, 336 insertions, 125 deletions
diff --git a/src/vnet/ip/ip4_input.c b/src/vnet/ip/ip4_input.c index 61c86ef112d..7af2bee187a 100644 --- a/src/vnet/ip/ip4_input.c +++ b/src/vnet/ip/ip4_input.c @@ -47,6 +47,7 @@ typedef struct u8 packet_data[64]; } ip4_input_trace_t; +#ifndef CLIB_MULTIARCH_VARIANT static u8 * format_ip4_input_trace (u8 * s, va_list * va) { @@ -59,6 +60,61 @@ format_ip4_input_trace (u8 * s, va_list * va) return s; } +#endif + +static_always_inline u32 +ip4_input_set_next (u32 sw_if_index, vlib_buffer_t * b, int arc_enabled) +{ + ip4_main_t *im = &ip4_main; + ip_lookup_main_t *lm = &im->lookup_main; + u32 next; + u8 arc; + + ip4_header_t *ip = vlib_buffer_get_current (b); + + if (PREDICT_FALSE (ip4_address_is_multicast (&ip->dst_address))) + { + next = IP4_INPUT_NEXT_LOOKUP_MULTICAST; + arc = lm->mcast_feature_arc_index; + } + else + { + next = IP4_INPUT_NEXT_LOOKUP; + arc = lm->ucast_feature_arc_index; + } + + if (arc_enabled) + vnet_feature_arc_start (arc, sw_if_index, &next, b); + + return next; +} + +static_always_inline void +ip4_input_check_sw_if_index (vlib_simple_counter_main_t * cm, u32 sw_if_index, + u32 * last_sw_if_index, u32 * cnt, + int *arc_enabled) +{ + ip4_main_t *im = &ip4_main; + ip_lookup_main_t *lm = &im->lookup_main; + u32 thread_index; + if (*last_sw_if_index == sw_if_index) + { + (*cnt)++; + return; + } + + thread_index = vlib_get_thread_index (); + if (*cnt) + vlib_increment_simple_counter (cm, thread_index, *last_sw_if_index, *cnt); + *cnt = 1; + *last_sw_if_index = sw_if_index; + + if (vnet_have_features (lm->ucast_feature_arc_index, sw_if_index) || + vnet_have_features (lm->mcast_feature_arc_index, sw_if_index)) + *arc_enabled = 1; + else + *arc_enabled = 0; +} /* Validate IP v4 packets and pass them either to forwarding code or drop/punt exception packets. */ @@ -67,19 +123,22 @@ ip4_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame, int verify_checksum) { - ip4_main_t *im = &ip4_main; vnet_main_t *vnm = vnet_get_main (); - ip_lookup_main_t *lm = &im->lookup_main; - u32 n_left_from, *from, *to_next; - ip4_input_next_t next_index; + u32 n_left_from, *from; + u32 thread_index = vlib_get_thread_index (); vlib_node_runtime_t *error_node = vlib_node_get_runtime (vm, ip4_input_node.index); vlib_simple_counter_main_t *cm; - u32 thread_index = vlib_get_thread_index (); + vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b; + ip4_header_t *ip[4]; + u16 nexts[VLIB_FRAME_SIZE], *next; + u32 sw_if_index[4]; + u32 last_sw_if_index = ~0; + u32 cnt = 0; + int arc_enabled = 0; from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; - next_index = node->cached_next_index; if (node->flags & VLIB_NODE_FLAG_TRACE) vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors, @@ -89,133 +148,112 @@ ip4_input_inline (vlib_main_t * vm, cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, VNET_INTERFACE_COUNTER_IP4); - while (n_left_from > 0) + vlib_get_buffers (vm, from, bufs, n_left_from); + b = bufs; + next = nexts; + while (n_left_from >= 4) { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + u32 x = 0; - while (n_left_from >= 4 && n_left_to_next >= 2) + /* Prefetch next iteration. */ + if (n_left_from >= 12) { - vlib_buffer_t *p0, *p1; - ip4_header_t *ip0, *ip1; - u32 sw_if_index0, pi0, next0; - u32 sw_if_index1, pi1, next1; - u8 arc0, arc1; - - /* Prefetch next iteration. */ - { - vlib_buffer_t *p2, *p3; - - p2 = vlib_get_buffer (vm, from[2]); - p3 = vlib_get_buffer (vm, from[3]); - - vlib_prefetch_buffer_header (p2, LOAD); - vlib_prefetch_buffer_header (p3, LOAD); - - CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD); - CLIB_PREFETCH (p3->data, sizeof (ip1[0]), LOAD); - } - - to_next[0] = pi0 = from[0]; - to_next[1] = pi1 = from[1]; - from += 2; - to_next += 2; - n_left_from -= 2; - n_left_to_next -= 2; - - p0 = vlib_get_buffer (vm, pi0); - p1 = vlib_get_buffer (vm, pi1); + vlib_prefetch_buffer_header (b[8], LOAD); + vlib_prefetch_buffer_header (b[9], LOAD); + vlib_prefetch_buffer_header (b[10], LOAD); + vlib_prefetch_buffer_header (b[11], LOAD); + + CLIB_PREFETCH (b[4]->data, sizeof (ip4_header_t), LOAD); + CLIB_PREFETCH (b[5]->data, sizeof (ip4_header_t), LOAD); + CLIB_PREFETCH (b[6]->data, sizeof (ip4_header_t), LOAD); + CLIB_PREFETCH (b[7]->data, sizeof (ip4_header_t), LOAD); + } - ip0 = vlib_buffer_get_current (p0); - ip1 = vlib_buffer_get_current (p1); + vnet_buffer (b[0])->ip.adj_index[VLIB_RX] = ~0; + vnet_buffer (b[1])->ip.adj_index[VLIB_RX] = ~0; + vnet_buffer (b[2])->ip.adj_index[VLIB_RX] = ~0; + vnet_buffer (b[3])->ip.adj_index[VLIB_RX] = ~0; - sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX]; - sw_if_index1 = vnet_buffer (p1)->sw_if_index[VLIB_RX]; + sw_if_index[0] = vnet_buffer (b[0])->sw_if_index[VLIB_RX]; + sw_if_index[1] = vnet_buffer (b[1])->sw_if_index[VLIB_RX]; + sw_if_index[2] = vnet_buffer (b[2])->sw_if_index[VLIB_RX]; + sw_if_index[3] = vnet_buffer (b[3])->sw_if_index[VLIB_RX]; - if (PREDICT_FALSE (ip4_address_is_multicast (&ip0->dst_address))) - { - arc0 = lm->mcast_feature_arc_index; - next0 = IP4_INPUT_NEXT_LOOKUP_MULTICAST; - } - else - { - arc0 = lm->ucast_feature_arc_index; - next0 = IP4_INPUT_NEXT_LOOKUP; - } + x |= sw_if_index[0] ^ last_sw_if_index; + x |= sw_if_index[1] ^ last_sw_if_index; + x |= sw_if_index[2] ^ last_sw_if_index; + x |= sw_if_index[3] ^ last_sw_if_index; - if (PREDICT_FALSE (ip4_address_is_multicast (&ip1->dst_address))) + if (PREDICT_TRUE (x == 0)) + { + /* we deal with 4 more packets sharing the same sw_if_index + with the previous one, so we can optimize */ + cnt += 4; + if (arc_enabled) { - arc1 = lm->mcast_feature_arc_index; - next1 = IP4_INPUT_NEXT_LOOKUP_MULTICAST; + next[0] = ip4_input_set_next (sw_if_index[0], b[0], 1); + next[1] = ip4_input_set_next (sw_if_index[1], b[1], 1); + next[2] = ip4_input_set_next (sw_if_index[2], b[2], 1); + next[3] = ip4_input_set_next (sw_if_index[3], b[3], 1); } else { - arc1 = lm->ucast_feature_arc_index; - next1 = IP4_INPUT_NEXT_LOOKUP; + next[0] = ip4_input_set_next (sw_if_index[0], b[0], 0); + next[1] = ip4_input_set_next (sw_if_index[1], b[1], 0); + next[2] = ip4_input_set_next (sw_if_index[2], b[2], 0); + next[3] = ip4_input_set_next (sw_if_index[3], b[3], 0); } - - vnet_buffer (p0)->ip.adj_index[VLIB_RX] = ~0; - vnet_buffer (p1)->ip.adj_index[VLIB_RX] = ~0; - - vnet_feature_arc_start (arc0, sw_if_index0, &next0, p0); - vnet_feature_arc_start (arc1, sw_if_index1, &next1, p1); - - vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1); - vlib_increment_simple_counter (cm, thread_index, sw_if_index1, 1); - ip4_input_check_x2 (vm, error_node, - p0, p1, ip0, ip1, - &next0, &next1, verify_checksum); - - vlib_validate_buffer_enqueue_x2 (vm, node, next_index, - to_next, n_left_to_next, - pi0, pi1, next0, next1); } - while (n_left_from > 0 && n_left_to_next > 0) + else { - vlib_buffer_t *p0; - ip4_header_t *ip0; - u32 sw_if_index0, pi0, next0; - u8 arc0; - - pi0 = from[0]; - to_next[0] = pi0; - from += 1; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; - - p0 = vlib_get_buffer (vm, pi0); - ip0 = vlib_buffer_get_current (p0); - - sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX]; - - if (PREDICT_FALSE (ip4_address_is_multicast (&ip0->dst_address))) - { - arc0 = lm->mcast_feature_arc_index; - next0 = IP4_INPUT_NEXT_LOOKUP_MULTICAST; - } - else - { - arc0 = lm->ucast_feature_arc_index; - next0 = IP4_INPUT_NEXT_LOOKUP; - } - - vnet_buffer (p0)->ip.adj_index[VLIB_RX] = ~0; - vnet_feature_arc_start (arc0, sw_if_index0, &next0, p0); + ip4_input_check_sw_if_index (cm, sw_if_index[0], &last_sw_if_index, + &cnt, &arc_enabled); + ip4_input_check_sw_if_index (cm, sw_if_index[1], &last_sw_if_index, + &cnt, &arc_enabled); + ip4_input_check_sw_if_index (cm, sw_if_index[2], &last_sw_if_index, + &cnt, &arc_enabled); + ip4_input_check_sw_if_index (cm, sw_if_index[3], &last_sw_if_index, + &cnt, &arc_enabled); + + next[0] = ip4_input_set_next (sw_if_index[0], b[0], 1); + next[1] = ip4_input_set_next (sw_if_index[1], b[1], 1); + next[2] = ip4_input_set_next (sw_if_index[2], b[2], 1); + next[3] = ip4_input_set_next (sw_if_index[3], b[3], 1); + } - vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1); - ip4_input_check_x1 (vm, error_node, p0, ip0, &next0, - verify_checksum); + ip[0] = vlib_buffer_get_current (b[0]); + ip[1] = vlib_buffer_get_current (b[1]); + ip[2] = vlib_buffer_get_current (b[2]); + ip[3] = vlib_buffer_get_current (b[3]); - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - pi0, next0); - } + ip4_input_check_x4 (vm, error_node, b, ip, next, verify_checksum); - vlib_put_next_frame (vm, node, next_index, n_left_to_next); + /* next */ + b += 4; + next += 4; + n_left_from -= 4; + } + while (n_left_from) + { + u32 next0; + vnet_buffer (b[0])->ip.adj_index[VLIB_RX] = ~0; + sw_if_index[0] = vnet_buffer (b[0])->sw_if_index[VLIB_RX]; + ip4_input_check_sw_if_index (cm, sw_if_index[0], &last_sw_if_index, + &cnt, &arc_enabled); + next0 = ip4_input_set_next (sw_if_index[0], b[0], arc_enabled); + ip[0] = vlib_buffer_get_current (b[0]); + ip4_input_check_x1 (vm, error_node, b[0], ip[0], &next0, + verify_checksum); + next[0] = next0; + + /* next */ + b += 1; + next += 1; + n_left_from -= 1; } + vlib_increment_simple_counter (cm, thread_index, last_sw_if_index, cnt); + vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors); return frame->n_vectors; } @@ -254,19 +292,22 @@ ip4_input_inline (vlib_main_t * vm, <code> vnet_get_config_data (... &next0 ...); </code> or @c error-drop */ -static uword -ip4_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) +uword CLIB_CPU_OPTIMIZED +CLIB_MULTIARCH_FN (ip4_input) (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame) { return ip4_input_inline (vm, node, frame, /* verify_checksum */ 1); } -static uword -ip4_input_no_checksum (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) +uword CLIB_CPU_OPTIMIZED +CLIB_MULTIARCH_FN (ip4_input_no_checksum) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) { return ip4_input_inline (vm, node, frame, /* verify_checksum */ 0); } +#ifndef CLIB_MULTIARCH_VARIANT char *ip4_error_strings[] = { #define _(sym,string) string, foreach_ip4_error @@ -295,11 +336,7 @@ VLIB_REGISTER_NODE (ip4_input_node) = { .format_buffer = format_ip4_header, .format_trace = format_ip4_input_trace, }; -/* *INDENT-ON* */ - -VLIB_NODE_FUNCTION_MULTIARCH (ip4_input_node, ip4_input); -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_input_no_checksum_node,static) = { .function = ip4_input_no_checksum, .name = "ip4-input-no-checksum", @@ -320,8 +357,25 @@ VLIB_REGISTER_NODE (ip4_input_no_checksum_node,static) = { }; /* *INDENT-ON* */ -VLIB_NODE_FUNCTION_MULTIARCH (ip4_input_no_checksum_node, - ip4_input_no_checksum); +#if __x86_64__ +vlib_node_function_t __clib_weak ip4_input_avx512; +vlib_node_function_t __clib_weak ip4_input_avx2; +vlib_node_function_t __clib_weak ip4_input_no_checksum_avx512; +vlib_node_function_t __clib_weak ip4_input_no_checksum_avx2; +static void __clib_constructor +ip4_input_multiarch_select (void) +{ + if (ip4_input_no_checksum_avx512 && clib_cpu_supports_avx512f ()) + ip4_input_no_checksum_node.function = ip4_input_no_checksum_avx512; + else if (ip4_input_no_checksum_avx2 && clib_cpu_supports_avx2 ()) + ip4_input_no_checksum_node.function = ip4_input_no_checksum_avx2; + + if (ip4_input_avx512 && clib_cpu_supports_avx512f ()) + ip4_input_node.function = ip4_input_avx512; + else if (ip4_input_avx2 && clib_cpu_supports_avx2 ()) + ip4_input_node.function = ip4_input_avx2; +} +#endif static clib_error_t * ip4_init (vlib_main_t * vm) @@ -360,6 +414,7 @@ ip4_init (vlib_main_t * vm) } VLIB_INIT_FUNCTION (ip4_init); +#endif /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/ip/ip4_input.h b/src/vnet/ip/ip4_input.h index 600d6937dd8..e0873039990 100644 --- a/src/vnet/ip/ip4_input.h +++ b/src/vnet/ip/ip4_input.h @@ -57,6 +57,162 @@ typedef enum } ip4_input_next_t; always_inline void +ip4_input_check_x4 (vlib_main_t * vm, + vlib_node_runtime_t * error_node, + vlib_buffer_t ** p, ip4_header_t ** ip, + u16 * next, int verify_checksum) +{ + u8 error0, error1, error2, error3; + u32 ip_len0, cur_len0; + u32 ip_len1, cur_len1; + u32 ip_len2, cur_len2; + u32 ip_len3, cur_len3; + i32 len_diff0, len_diff1, len_diff2, len_diff3; + + error0 = error1 = error2 = error3 = IP4_ERROR_NONE; + + /* Punt packets with options or wrong version. */ + if (PREDICT_FALSE (ip[0]->ip_version_and_header_length != 0x45)) + error0 = (ip[0]->ip_version_and_header_length & 0xf) != 5 ? + IP4_ERROR_OPTIONS : IP4_ERROR_VERSION; + + if (PREDICT_FALSE (ip[1]->ip_version_and_header_length != 0x45)) + error1 = (ip[1]->ip_version_and_header_length & 0xf) != 5 ? + IP4_ERROR_OPTIONS : IP4_ERROR_VERSION; + + if (PREDICT_FALSE (ip[2]->ip_version_and_header_length != 0x45)) + error2 = (ip[2]->ip_version_and_header_length & 0xf) != 5 ? + IP4_ERROR_OPTIONS : IP4_ERROR_VERSION; + + if (PREDICT_FALSE (ip[3]->ip_version_and_header_length != 0x45)) + error3 = (ip[3]->ip_version_and_header_length & 0xf) != 5 ? + IP4_ERROR_OPTIONS : IP4_ERROR_VERSION; + + if (PREDICT_FALSE (ip[0]->ttl < 1)) + error0 = IP4_ERROR_TIME_EXPIRED; + if (PREDICT_FALSE (ip[1]->ttl < 1)) + error1 = IP4_ERROR_TIME_EXPIRED; + if (PREDICT_FALSE (ip[2]->ttl < 1)) + error2 = IP4_ERROR_TIME_EXPIRED; + if (PREDICT_FALSE (ip[3]->ttl < 1)) + error3 = IP4_ERROR_TIME_EXPIRED; + + /* Verify header checksum. */ + if (verify_checksum) + { + ip_csum_t sum0, sum1, sum2, sum3; + + ip4_partial_header_checksum_x1 (ip[0], sum0); + ip4_partial_header_checksum_x1 (ip[1], sum1); + ip4_partial_header_checksum_x1 (ip[2], sum2); + ip4_partial_header_checksum_x1 (ip[3], sum3); + + error0 = 0xffff != ip_csum_fold (sum0) ? + IP4_ERROR_BAD_CHECKSUM : error0; + error1 = 0xffff != ip_csum_fold (sum1) ? + IP4_ERROR_BAD_CHECKSUM : error1; + error2 = 0xffff != ip_csum_fold (sum2) ? + IP4_ERROR_BAD_CHECKSUM : error2; + error3 = 0xffff != ip_csum_fold (sum3) ? + IP4_ERROR_BAD_CHECKSUM : error3; + } + + /* Drop fragmentation offset 1 packets. */ + error0 = ip4_get_fragment_offset (ip[0]) == 1 ? + IP4_ERROR_FRAGMENT_OFFSET_ONE : error0; + error1 = ip4_get_fragment_offset (ip[1]) == 1 ? + IP4_ERROR_FRAGMENT_OFFSET_ONE : error1; + error2 = ip4_get_fragment_offset (ip[2]) == 1 ? + IP4_ERROR_FRAGMENT_OFFSET_ONE : error2; + error3 = ip4_get_fragment_offset (ip[3]) == 1 ? + IP4_ERROR_FRAGMENT_OFFSET_ONE : error3; + + /* Verify lengths. */ + ip_len0 = clib_net_to_host_u16 (ip[0]->length); + ip_len1 = clib_net_to_host_u16 (ip[1]->length); + ip_len2 = clib_net_to_host_u16 (ip[2]->length); + ip_len3 = clib_net_to_host_u16 (ip[3]->length); + + /* IP length must be at least minimal IP header. */ + error0 = ip_len0 < sizeof (ip[0][0]) ? IP4_ERROR_TOO_SHORT : error0; + error1 = ip_len1 < sizeof (ip[1][0]) ? IP4_ERROR_TOO_SHORT : error1; + error2 = ip_len2 < sizeof (ip[2][0]) ? IP4_ERROR_TOO_SHORT : error2; + error3 = ip_len3 < sizeof (ip[3][0]) ? IP4_ERROR_TOO_SHORT : error3; + + cur_len0 = vlib_buffer_length_in_chain (vm, p[0]); + cur_len1 = vlib_buffer_length_in_chain (vm, p[1]); + cur_len2 = vlib_buffer_length_in_chain (vm, p[2]); + cur_len3 = vlib_buffer_length_in_chain (vm, p[3]); + + len_diff0 = cur_len0 - ip_len0; + len_diff1 = cur_len1 - ip_len1; + len_diff2 = cur_len2 - ip_len2; + len_diff3 = cur_len3 - ip_len3; + + error0 = len_diff0 < 0 ? IP4_ERROR_BAD_LENGTH : error0; + error1 = len_diff1 < 0 ? IP4_ERROR_BAD_LENGTH : error1; + error2 = len_diff2 < 0 ? IP4_ERROR_BAD_LENGTH : error2; + error3 = len_diff3 < 0 ? IP4_ERROR_BAD_LENGTH : error3; + + if (PREDICT_FALSE (error0 != IP4_ERROR_NONE)) + { + if (error0 == IP4_ERROR_TIME_EXPIRED) + { + icmp4_error_set_vnet_buffer (p[0], ICMP4_time_exceeded, + ICMP4_time_exceeded_ttl_exceeded_in_transit, + 0); + next[0] = IP4_INPUT_NEXT_ICMP_ERROR; + } + else + next[0] = error0 != IP4_ERROR_OPTIONS ? + IP4_INPUT_NEXT_DROP : IP4_INPUT_NEXT_PUNT; + p[0]->error = error_node->errors[error0]; + } + if (PREDICT_FALSE (error1 != IP4_ERROR_NONE)) + { + if (error1 == IP4_ERROR_TIME_EXPIRED) + { + icmp4_error_set_vnet_buffer (p[1], ICMP4_time_exceeded, + ICMP4_time_exceeded_ttl_exceeded_in_transit, + 0); + next[1] = IP4_INPUT_NEXT_ICMP_ERROR; + } + else + next[1] = error1 != IP4_ERROR_OPTIONS ? + IP4_INPUT_NEXT_DROP : IP4_INPUT_NEXT_PUNT; + p[1]->error = error_node->errors[error1]; + } + if (PREDICT_FALSE (error2 != IP4_ERROR_NONE)) + { + if (error2 == IP4_ERROR_TIME_EXPIRED) + { + icmp4_error_set_vnet_buffer (p[2], ICMP4_time_exceeded, + ICMP4_time_exceeded_ttl_exceeded_in_transit, + 0); + next[2] = IP4_INPUT_NEXT_ICMP_ERROR; + } + else + next[2] = error2 != IP4_ERROR_OPTIONS ? + IP4_INPUT_NEXT_DROP : IP4_INPUT_NEXT_PUNT; + p[2]->error = error_node->errors[error2]; + } + if (PREDICT_FALSE (error3 != IP4_ERROR_NONE)) + { + if (error3 == IP4_ERROR_TIME_EXPIRED) + { + icmp4_error_set_vnet_buffer (p[3], ICMP4_time_exceeded, + ICMP4_time_exceeded_ttl_exceeded_in_transit, + 0); + next[3] = IP4_INPUT_NEXT_ICMP_ERROR; + } + else + next[3] = error3 != IP4_ERROR_OPTIONS ? + IP4_INPUT_NEXT_DROP : IP4_INPUT_NEXT_PUNT; + p[3]->error = error_node->errors[error3]; + } +} + +always_inline void ip4_input_check_x2 (vlib_main_t * vm, vlib_node_runtime_t * error_node, vlib_buffer_t * p0, vlib_buffer_t * p1, |