diff options
Diffstat (limited to 'src/vnet/ethernet/node.c')
-rwxr-xr-x | src/vnet/ethernet/node.c | 539 |
1 files changed, 517 insertions, 22 deletions
diff --git a/src/vnet/ethernet/node.c b/src/vnet/ethernet/node.c index 53d5b4eb02d..c39c6d7e0c7 100755 --- a/src/vnet/ethernet/node.c +++ b/src/vnet/ethernet/node.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015 Cisco and/or its affiliates. + * Copyright (c) 2018 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: @@ -49,7 +49,9 @@ #define foreach_ethernet_input_next \ _ (PUNT, "error-punt") \ _ (DROP, "error-drop") \ - _ (LLC, "llc-input") + _ (LLC, "llc-input") \ + _ (IP4_INPUT, "ip4-input") \ + _ (IP4_INPUT_NCS, "ip4-input-no-checksum") typedef enum { @@ -62,6 +64,8 @@ typedef enum typedef struct { u8 packet_data[32]; + u16 frame_flags; + ethernet_input_frame_t frame_data; } ethernet_input_trace_t; static u8 * @@ -70,7 +74,16 @@ format_ethernet_input_trace (u8 * s, va_list * va) CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); ethernet_input_trace_t *t = va_arg (*va, ethernet_input_trace_t *); + u32 indent = format_get_indent (s); + if (t->frame_flags) + { + s = format (s, "frame: flags 0x%x", t->frame_flags); + if (t->frame_flags & ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX) + s = format (s, ", hw-if-index %u, sw-if-index %u", + t->frame_data.hw_if_index, t->frame_data.sw_if_index); + s = format (s, "\n%U", format_white_space, indent); + } s = format (s, "%U", format_ethernet_header, t->packet_data); return s; @@ -289,16 +302,401 @@ determine_next_node (ethernet_main_t * em, } } -static_always_inline uword +typedef enum +{ + ETYPE_ID_UNKNOWN = 0, + ETYPE_ID_IP4, + ETYPE_ID_IP6, + ETYPE_ID_MPLS, + ETYPE_N_IDS, +} etype_id_t; + +static_always_inline void +eth_input_advance_and_flags (vlib_main_t * vm, u32 * from, u32 n_left, + i16 advance, u32 and_flags, u32 or_flags) +{ + vlib_buffer_t *b[8]; + while (n_left >= 8) + { + vlib_get_buffers (vm, from, b, 8); + vlib_buffer_advance (b[0], advance); + vlib_buffer_advance (b[1], advance); + vlib_buffer_advance (b[2], advance); + vlib_buffer_advance (b[3], advance); + vlib_buffer_advance (b[4], advance); + vlib_buffer_advance (b[5], advance); + vlib_buffer_advance (b[6], advance); + vlib_buffer_advance (b[7], advance); + b[0]->flags = (b[0]->flags & and_flags) | or_flags; + b[1]->flags = (b[1]->flags & and_flags) | or_flags; + b[2]->flags = (b[2]->flags & and_flags) | or_flags; + b[3]->flags = (b[3]->flags & and_flags) | or_flags; + b[4]->flags = (b[4]->flags & and_flags) | or_flags; + b[5]->flags = (b[5]->flags & and_flags) | or_flags; + b[6]->flags = (b[6]->flags & and_flags) | or_flags; + b[7]->flags = (b[7]->flags & and_flags) | or_flags; + + n_left -= 8; + from += 8; + } + while (n_left) + { + vlib_get_buffers (vm, from, b, 1); + vlib_buffer_advance (b[0], advance); + b[0]->flags = (b[0]->flags & and_flags) | or_flags; + + n_left -= 1; + from += 1; + } +} + +typedef struct +{ + u16 etypes[VLIB_FRAME_SIZE]; + u32 bufs_by_etype[ETYPE_N_IDS][VLIB_FRAME_SIZE]; + u16 n_bufs_by_etype[ETYPE_N_IDS]; +} eth_input_data_t; + +/* following vector code relies on following assumptions */ +STATIC_ASSERT_OFFSET_OF (vlib_buffer_t, current_data, 0); +STATIC_ASSERT_OFFSET_OF (vlib_buffer_t, current_length, 2); +STATIC_ASSERT_OFFSET_OF (vlib_buffer_t, flags, 4); +STATIC_ASSERT (STRUCT_OFFSET_OF (vnet_buffer_opaque_t, l2_hdr_offset) == + STRUCT_OFFSET_OF (vnet_buffer_opaque_t, l3_hdr_offset) - 2, + "l3_hdr_offset must follow l2_hdr_offset"); + +static_always_inline void +eth_input_adv_and_flags_x4 (vlib_buffer_t ** b, i16 adv, u32 flags, int is_l3) +{ +#ifdef CLIB_HAVE_VEC256 + /* to reduce number of small loads/stores we are loading first 64 bits + of each buffer metadata into 256-bit register so we can advance + current_data, current_length and flags. + Observed saving of this code is ~2 clocks per packet */ + u64x4 r, radv; + + /* vector if signed 16 bit integers used in signed vector add operation + to advnce current_data and current_length */ + u32x8 flags4 = { 0, flags, 0, flags, 0, flags, 0, flags }; + i16x16 adv4 = { + adv, -adv, 0, 0, adv, -adv, 0, 0, + adv, -adv, 0, 0, adv, -adv, 0, 0 + }; + + /* load 4 x 64 bits */ + r = u64x4_gather (b[0], b[1], b[2], b[3]); + + /* set flags */ + r |= (u64x4) flags4; + + /* advance buffer */ + radv = (u64x4) ((i16x16) r + adv4); + + /* write 4 x 64 bits */ + u64x4_scatter (is_l3 ? radv : r, b[0], b[1], b[2], b[3]); + + /* use old current_data as l2_hdr_offset and new current_data as + l3_hdr_offset */ + r = (u64x4) u16x16_blend (r, radv << 16, 0xaa); + + /* store both l2_hdr_offset and l3_hdr_offset in single store operation */ + u32x8_scatter_one ((u32x8) r, 0, &vnet_buffer (b[0])->l2_hdr_offset); + u32x8_scatter_one ((u32x8) r, 2, &vnet_buffer (b[1])->l2_hdr_offset); + u32x8_scatter_one ((u32x8) r, 4, &vnet_buffer (b[2])->l2_hdr_offset); + u32x8_scatter_one ((u32x8) r, 6, &vnet_buffer (b[3])->l2_hdr_offset); + + ASSERT (b[0]->current_data == vnet_buffer (b[0])->l3_hdr_offset); + ASSERT (b[1]->current_data == vnet_buffer (b[1])->l3_hdr_offset); + ASSERT (b[2]->current_data == vnet_buffer (b[2])->l3_hdr_offset); + ASSERT (b[3]->current_data == vnet_buffer (b[3])->l3_hdr_offset); + + ASSERT (b[0]->current_data - vnet_buffer (b[0])->l2_hdr_offset == adv); + ASSERT (b[1]->current_data - vnet_buffer (b[1])->l2_hdr_offset == adv); + ASSERT (b[2]->current_data - vnet_buffer (b[2])->l2_hdr_offset == adv); + ASSERT (b[3]->current_data - vnet_buffer (b[3])->l2_hdr_offset == adv); + +#else + vnet_buffer (b[0])->l2_hdr_offset = b[0]->current_data; + vnet_buffer (b[1])->l2_hdr_offset = b[1]->current_data; + vnet_buffer (b[2])->l2_hdr_offset = b[2]->current_data; + vnet_buffer (b[3])->l2_hdr_offset = b[3]->current_data; + vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data + adv; + vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data + adv; + vnet_buffer (b[2])->l3_hdr_offset = b[2]->current_data + adv; + vnet_buffer (b[3])->l3_hdr_offset = b[3]->current_data + adv; + + if (is_l3) + { + vlib_buffer_advance (b[0], adv); + vlib_buffer_advance (b[1], adv); + vlib_buffer_advance (b[2], adv); + vlib_buffer_advance (b[3], adv); + } + + b[0]->flags |= flags; + b[1]->flags |= flags; + b[2]->flags |= flags; + b[3]->flags |= flags; +#endif + + if (!is_l3) + { + vnet_buffer (b[0])->l2.l2_len = adv; + vnet_buffer (b[1])->l2.l2_len = adv; + vnet_buffer (b[2])->l2.l2_len = adv; + vnet_buffer (b[3])->l2.l2_len = adv; + } +} + +static_always_inline void +eth_input_adv_and_flags_x1 (vlib_buffer_t ** b, i16 adv, u32 flags, int is_l3) +{ + vnet_buffer (b[0])->l2_hdr_offset = b[0]->current_data; + vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data + adv; + + if (is_l3) + vlib_buffer_advance (b[0], adv); + b[0]->flags |= flags; + if (!is_l3) + vnet_buffer (b[0])->l2.l2_len = adv; +} + +static_always_inline void +eth_input_process_frame (vlib_main_t * vm, u32 * from, u16 * etype, + u32 n_left, int is_l3) +{ + vlib_buffer_t *b[16]; + ethernet_header_t *e; + int adv = sizeof (ethernet_header_t); + + u32 flags = VNET_BUFFER_F_L2_HDR_OFFSET_VALID | + VNET_BUFFER_F_L3_HDR_OFFSET_VALID; + + while (n_left >= 16) + { + vlib_buffer_t **ph = b + 12, **pd = b + 8; + vlib_get_buffers (vm, from, b, 4); + vlib_get_buffers (vm, from + 8, b + 8, 8); + + vlib_prefetch_buffer_header (ph[0], LOAD); + vlib_prefetch_buffer_data (pd[0], LOAD); + e = vlib_buffer_get_current (b[0]); + etype[0] = e->type; + + vlib_prefetch_buffer_header (ph[1], LOAD); + vlib_prefetch_buffer_data (pd[1], LOAD); + e = vlib_buffer_get_current (b[1]); + etype[1] = e->type; + + vlib_prefetch_buffer_header (ph[2], LOAD); + vlib_prefetch_buffer_data (pd[2], LOAD); + e = vlib_buffer_get_current (b[2]); + etype[2] = e->type; + + vlib_prefetch_buffer_header (ph[3], LOAD); + vlib_prefetch_buffer_data (pd[3], LOAD); + e = vlib_buffer_get_current (b[3]); + etype[3] = e->type; + + eth_input_adv_and_flags_x4 (b, adv, flags, is_l3); + + /* next */ + n_left -= 4; + etype += 4; + from += 4; + } + while (n_left >= 4) + { + vlib_get_buffers (vm, from, b, 4); + + e = vlib_buffer_get_current (b[0]); + etype[0] = e->type; + + e = vlib_buffer_get_current (b[1]); + etype[1] = e->type; + + e = vlib_buffer_get_current (b[2]); + etype[2] = e->type; + + e = vlib_buffer_get_current (b[3]); + etype[3] = e->type; + + eth_input_adv_and_flags_x4 (b, adv, flags, is_l3); + + /* next */ + n_left -= 4; + etype += 4; + from += 4; + } + while (n_left) + { + vlib_get_buffers (vm, from, b, 1); + + e = vlib_buffer_get_current (b[0]); + etype[0] = e->type; + + eth_input_adv_and_flags_x1 (b, adv, flags, is_l3); + + /* next */ + n_left -= 1; + etype += 1; + from += 1; + } +} + +static_always_inline void +eth_input_sort (vlib_main_t * vm, u32 * from, u32 n_packets, + eth_input_data_t * d) +{ + u16 *etype = d->etypes; + i32 n_left = n_packets; + +#if defined (CLIB_HAVE_VEC256) + u16x16 e16; + u16x16 et16_ip4 = u16x16_splat (clib_host_to_net_u16 (ETHERNET_TYPE_IP4)); + u16x16 et16_ip6 = u16x16_splat (clib_host_to_net_u16 (ETHERNET_TYPE_IP6)); + u16x16 et16_mpls = u16x16_splat (clib_host_to_net_u16 (ETHERNET_TYPE_MPLS)); + u16x16 id16_ip4 = u16x16_splat (ETYPE_ID_IP4); + u16x16 id16_ip6 = u16x16_splat (ETYPE_ID_IP6); + u16x16 id16_mpls = u16x16_splat (ETYPE_ID_MPLS); + + while (n_left > 0) + { + u16x16 r = { 0 }; + e16 = u16x16_load_unaligned (etype); + r += (e16 == et16_ip4) & id16_ip4; + r += (e16 == et16_ip6) & id16_ip6; + r += (e16 == et16_mpls) & id16_mpls; + u16x16_store_unaligned (r, etype); + etype += 16; + n_left -= 16; + } +#elif defined (CLIB_HAVE_VEC128) + u16x8 e8; + u16x8 et8_ip4 = u16x8_splat (clib_host_to_net_u16 (ETHERNET_TYPE_IP4)); + u16x8 et8_ip6 = u16x8_splat (clib_host_to_net_u16 (ETHERNET_TYPE_IP6)); + u16x8 et8_mpls = u16x8_splat (clib_host_to_net_u16 (ETHERNET_TYPE_MPLS)); + u16x8 id8_ip4 = u16x8_splat (ETYPE_ID_IP4); + u16x8 id8_ip6 = u16x8_splat (ETYPE_ID_IP6); + u16x8 id8_mpls = u16x8_splat (ETYPE_ID_MPLS); + + while (n_left > 0) + { + u16x8 r = { 0 }; + e8 = u16x8_load_unaligned (etype); + r += (e8 == et8_ip4) & id8_ip4; + r += (e8 == et8_ip6) & id8_ip6; + r += (e8 == et8_mpls) & id8_mpls; + u16x8_store_unaligned (r, etype); + etype += 8; + n_left -= 8; + } +#else + while (n_left) + { + if (etype[0] == ETHERNET_TYPE_IP4) + etype[0] = ETYPE_ID_IP4; + else if (etype[0] == ETHERNET_TYPE_IP6) + etype[0] = ETYPE_ID_IP6; + else if (etype[0] == ETHERNET_TYPE_MPLS) + etype[0] = ETYPE_ID_MPLS; + else + etype[0] = ETYPE_ID_UNKNOWN; + + etype += 1; + n_left -= 1; + } +#endif + + etype = d->etypes; + n_left = n_packets; + + clib_memset_u16 (d->n_bufs_by_etype, 0, ETYPE_N_IDS); + while (n_left) + { + u16 x, y; + x = etype[0]; + y = d->n_bufs_by_etype[x]; + +#ifdef CLIB_HAVE_VEC256 + if (n_left >= 16 && u16x16_is_all_equal (u16x16_load_unaligned (etype), + etype[0])) + { + clib_memcpy_fast (&d->bufs_by_etype[x][y], from, 16 * sizeof (u32)); + d->n_bufs_by_etype[x] += 16; + + /* next */ + n_left -= 16; + etype += 16; + from += 16; + continue; + } +#endif +#ifdef CLIB_HAVE_VEC128 + if (n_left >= 8 && u16x8_is_all_equal (u16x8_load_unaligned (etype), + etype[0])) + { + clib_memcpy_fast (&d->bufs_by_etype[x][y], from, 8 * sizeof (u32)); + d->n_bufs_by_etype[x] += 8; + + /* next */ + n_left -= 8; + etype += 8; + from += 8; + continue; + } +#endif + d->bufs_by_etype[x][y] = from[0]; + d->n_bufs_by_etype[x]++; + + /* next */ + n_left -= 1; + etype += 1; + from += 1; + } +} + +static_always_inline void +ethernet_input_trace (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 *from, n_left; + if ((node->flags & VLIB_NODE_FLAG_TRACE) == 0) + return; + + from = vlib_frame_vector_args (from_frame); + n_left = from_frame->n_vectors; + + while (n_left) + { + ethernet_input_trace_t *t0; + vlib_buffer_t *b0 = vlib_get_buffer (vm, from[0]); + + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + t0 = vlib_add_trace (vm, node, b0, sizeof (ethernet_input_trace_t)); + clib_memcpy_fast (t0->packet_data, b0->data + b0->current_data, + sizeof (t0->packet_data)); + t0->frame_flags = from_frame->flags; + clib_memcpy_fast (&t0->frame_data, + vlib_frame_scalar_args (from_frame), + sizeof (ethernet_input_frame_t)); + } + from += 1; + n_left -= 1; + } +} + +static_always_inline void ethernet_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_frame_t * from_frame, + u32 * from, u32 n_packets, ethernet_input_variant_t variant) { vnet_main_t *vnm = vnet_get_main (); ethernet_main_t *em = ðernet_main; vlib_node_runtime_t *error_node; - u32 n_left_from, next_index, *from, *to_next; + u32 n_left_from, next_index, *to_next; u32 stats_sw_if_index, stats_n_packets, stats_n_bytes; u32 thread_index = vm->thread_index; u32 cached_sw_if_index = ~0; @@ -310,15 +708,7 @@ ethernet_input_inline (vlib_main_t * vm, else error_node = node; - from = vlib_frame_vector_args (from_frame); - n_left_from = from_frame->n_vectors; - - if (node->flags & VLIB_NODE_FLAG_TRACE) - vlib_trace_frame_buffers_only (vm, node, - from, - n_left_from, - sizeof (from[0]), - sizeof (ethernet_input_trace_t)); + n_left_from = n_packets; next_index = node->cached_next_index; stats_sw_if_index = node->runtime_data[0]; @@ -764,32 +1154,136 @@ ethernet_input_inline (vlib_main_t * vm, thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes); node->runtime_data[0] = stats_sw_if_index; } +} - return from_frame->n_vectors; +static_always_inline void +eth_input_enqueue_untagged (vlib_main_t * vm, vlib_node_runtime_t * node, + eth_input_data_t * d, int ip4_cksum_ok, int is_l3) +{ + ethernet_main_t *em = ðernet_main; + etype_id_t id; + u32 next_index; + + id = ETYPE_ID_IP4; + if (d->n_bufs_by_etype[id]) + { + if (is_l3) + { + next_index = em->l3_next.input_next_ip4; + if (next_index == ETHERNET_INPUT_NEXT_IP4_INPUT && ip4_cksum_ok) + next_index = ETHERNET_INPUT_NEXT_IP4_INPUT_NCS; + } + else + next_index = em->l2_next; + + vlib_buffer_enqueue_to_single_next (vm, node, d->bufs_by_etype[id], + next_index, d->n_bufs_by_etype[id]); + } + + id = ETYPE_ID_IP6; + if (d->n_bufs_by_etype[id]) + { + next_index = is_l3 ? em->l3_next.input_next_ip6 : em->l2_next; + vlib_buffer_enqueue_to_single_next (vm, node, d->bufs_by_etype[id], + next_index, d->n_bufs_by_etype[id]); + } + + id = ETYPE_ID_MPLS; + if (d->n_bufs_by_etype[id]) + { + next_index = is_l3 ? em->l3_next.input_next_mpls : em->l2_next; + vlib_buffer_enqueue_to_single_next (vm, node, d->bufs_by_etype[id], + next_index, d->n_bufs_by_etype[id]); + } + + id = ETYPE_ID_UNKNOWN; + if (d->n_bufs_by_etype[id]) + { + /* in case of l3 interfaces, we already advanced buffer so we need to + roll back */ + if (is_l3) + eth_input_advance_and_flags (vm, d->bufs_by_etype[id], + d->n_bufs_by_etype[id], + -(i16) sizeof (ethernet_header_t), + ~VNET_BUFFER_F_L3_HDR_OFFSET_VALID, 0); + ethernet_input_inline (vm, node, d->bufs_by_etype[id], + d->n_bufs_by_etype[id], + ETHERNET_INPUT_VARIANT_ETHERNET); + } } VLIB_NODE_FN (ethernet_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_frame_t * from_frame) + vlib_frame_t * frame) { - return ethernet_input_inline (vm, node, from_frame, - ETHERNET_INPUT_VARIANT_ETHERNET); + vnet_main_t *vnm = vnet_get_main (); + ethernet_main_t *em = ðernet_main; + u32 *from = vlib_frame_vector_args (frame); + u32 n_packets = frame->n_vectors; + + ethernet_input_trace (vm, node, frame); + + if (frame->flags & ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX) + { + eth_input_data_t data, *d = &data; + ethernet_input_frame_t *ef = vlib_frame_scalar_args (frame); + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, ef->hw_if_index); + main_intf_t *intf0 = vec_elt_at_index (em->main_intfs, hi->hw_if_index); + subint_config_t *subint0 = &intf0->untagged_subint; + int ip4_cksum_ok = (frame->flags & ETH_INPUT_FRAME_F_IP4_CKSUM_OK) != 0; + + if (subint0->flags & SUBINT_CONFIG_L2) + { + /* untagged packets are treated as L2 */ + eth_input_process_frame (vm, from, d->etypes, n_packets, 0); + eth_input_sort (vm, from, n_packets, d); + eth_input_enqueue_untagged (vm, node, d, ip4_cksum_ok, 0); + } + else + { + ethernet_interface_t *ei; + ei = pool_elt_at_index (em->interfaces, hi->hw_instance); + + /* currently only slowpath deals with dmac check */ + if (ei->flags & ETHERNET_INTERFACE_FLAG_ACCEPT_ALL) + goto slowpath; + + /* untagged packets are treated as L3 */ + eth_input_process_frame (vm, from, d->etypes, n_packets, 1); + eth_input_sort (vm, from, n_packets, d); + eth_input_enqueue_untagged (vm, node, d, ip4_cksum_ok, 1); + } + return n_packets; + } + +slowpath: + ethernet_input_inline (vm, node, from, n_packets, + ETHERNET_INPUT_VARIANT_ETHERNET); + return n_packets; } VLIB_NODE_FN (ethernet_input_type_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * from_frame) { - return ethernet_input_inline (vm, node, from_frame, - ETHERNET_INPUT_VARIANT_ETHERNET_TYPE); + u32 *from = vlib_frame_vector_args (from_frame); + u32 n_packets = from_frame->n_vectors; + ethernet_input_trace (vm, node, from_frame); + ethernet_input_inline (vm, node, from, n_packets, + ETHERNET_INPUT_VARIANT_ETHERNET_TYPE); + return n_packets; } VLIB_NODE_FN (ethernet_input_not_l2_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * from_frame) { - return ethernet_input_inline (vm, node, from_frame, - ETHERNET_INPUT_VARIANT_NOT_L2); + u32 *from = vlib_frame_vector_args (from_frame); + u32 n_packets = from_frame->n_vectors; + ethernet_input_trace (vm, node, from_frame); + ethernet_input_inline (vm, node, from, n_packets, + ETHERNET_INPUT_VARIANT_NOT_L2); + return n_packets; } @@ -1159,6 +1653,7 @@ VLIB_REGISTER_NODE (ethernet_input_node) = { .name = "ethernet-input", /* Takes a vector of packets. */ .vector_size = sizeof (u32), + .scalar_size = sizeof (ethernet_input_frame_t), .n_errors = ETHERNET_N_ERROR, .error_strings = ethernet_error_strings, .n_next_nodes = ETHERNET_INPUT_N_NEXT, |