From daa2cd1512ac46dfe116b89a34caf19a71994c45 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Tue, 22 Nov 2016 21:10:25 -0800 Subject: l2: quad loop l2fwd node Change-Id: I8510575ee1d6b7c5ccf551766e77e40ce55e644c Signed-off-by: Damjan Marion --- vnet/vnet/l2/l2_fib.h | 77 +++++++++++++++++++++++++++++++++++ vnet/vnet/l2/l2_fwd.c | 109 +++++++++++++++++++++++++++++++++++++------------- 2 files changed, 158 insertions(+), 28 deletions(-) diff --git a/vnet/vnet/l2/l2_fib.h b/vnet/vnet/l2/l2_fib.h index d1b8534dfde..8e7a52e4d02 100644 --- a/vnet/vnet/l2/l2_fib.h +++ b/vnet/vnet/l2/l2_fib.h @@ -233,6 +233,83 @@ l2fib_lookup_2 (BVT (clib_bihash) * mac_table, } } +static_always_inline void +l2fib_lookup_4 (BVT (clib_bihash) * mac_table, + l2fib_entry_key_t * cached_key, + l2fib_entry_result_t * cached_result, + u8 * mac0, + u8 * mac1, + u8 * mac2, + u8 * mac3, + u16 bd_index0, + u16 bd_index1, + u16 bd_index2, + u16 bd_index3, + l2fib_entry_key_t * key0, + l2fib_entry_key_t * key1, + l2fib_entry_key_t * key2, + l2fib_entry_key_t * key3, + u32 * bucket0, + u32 * bucket1, + u32 * bucket2, + u32 * bucket3, + l2fib_entry_result_t * result0, + l2fib_entry_result_t * result1, + l2fib_entry_result_t * result2, + l2fib_entry_result_t * result3) +{ + /* set up key */ + key0->raw = l2fib_make_key (mac0, bd_index0); + key1->raw = l2fib_make_key (mac1, bd_index1); + key2->raw = l2fib_make_key (mac2, bd_index2); + key3->raw = l2fib_make_key (mac3, bd_index3); + + if ((key0->raw == cached_key->raw) && (key1->raw == cached_key->raw) && + (key2->raw == cached_key->raw) && (key3->raw == cached_key->raw)) + { + /* Both hit in the one-entry cache */ + result0->raw = cached_result->raw; + result1->raw = cached_result->raw; + result2->raw = cached_result->raw; + result3->raw = cached_result->raw; + *bucket0 = ~0; + *bucket1 = ~0; + *bucket2 = ~0; + *bucket3 = ~0; + + } + else + { + BVT (clib_bihash_kv) kv0, kv1, kv2, kv3; + + /* + * Do a regular mac table lookup + * Interleave lookups for packet 0 and packet 1 + */ + kv0.key = key0->raw; + kv1.key = key1->raw; + kv2.key = key2->raw; + kv3.key = key3->raw; + kv0.value = ~0ULL; + kv1.value = ~0ULL; + kv2.value = ~0ULL; + kv3.value = ~0ULL; + + BV (clib_bihash_search_inline) (mac_table, &kv0); + BV (clib_bihash_search_inline) (mac_table, &kv1); + BV (clib_bihash_search_inline) (mac_table, &kv2); + BV (clib_bihash_search_inline) (mac_table, &kv3); + + result0->raw = kv0.value; + result1->raw = kv1.value; + result2->raw = kv2.value; + result3->raw = kv3.value; + + /* Update one-entry cache */ + cached_key->raw = key1->raw; + cached_result->raw = result1->raw; + } +} BVT (clib_bihash) * get_mac_table (void); void diff --git a/vnet/vnet/l2/l2_fwd.c b/vnet/vnet/l2/l2_fwd.c index 586aa3d4206..710a9d9e8c3 100644 --- a/vnet/vnet/l2/l2_fwd.c +++ b/vnet/vnet/l2/l2_fwd.c @@ -223,49 +223,63 @@ l2fwd_node_fn (vlib_main_t * vm, /* get space to enqueue frame to graph node "next_index" */ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - while (n_left_from >= 4 && n_left_to_next >= 2) + while (n_left_from >= 8 && n_left_to_next >= 4) { - u32 bi0, bi1; - vlib_buffer_t *b0, *b1; - u32 next0, next1; - u32 sw_if_index0, sw_if_index1; - ethernet_header_t *h0, *h1; - l2fib_entry_key_t key0, key1; - l2fib_entry_result_t result0, result1; - u32 bucket0, bucket1; + u32 bi0, bi1, bi2, bi3; + vlib_buffer_t *b0, *b1, *b2, *b3; + u32 next0, next1, next2, next3; + u32 sw_if_index0, sw_if_index1, sw_if_index2, sw_if_index3; + ethernet_header_t *h0, *h1, *h2, *h3; + l2fib_entry_key_t key0, key1, key2, key3; + l2fib_entry_result_t result0, result1, result2, result3; + u32 bucket0, bucket1, bucket2, bucket3; /* Prefetch next iteration. */ { - vlib_buffer_t *p2, *p3; - - p2 = vlib_get_buffer (vm, from[2]); - p3 = vlib_get_buffer (vm, from[3]); - - vlib_prefetch_buffer_header (p2, LOAD); - vlib_prefetch_buffer_header (p3, LOAD); - - CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); - CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); + vlib_buffer_t *p4, *p5, *p6, *p7; + + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + p6 = vlib_get_buffer (vm, from[6]); + p7 = vlib_get_buffer (vm, from[7]); + + vlib_prefetch_buffer_header (p4, LOAD); + vlib_prefetch_buffer_header (p5, LOAD); + vlib_prefetch_buffer_header (p6, LOAD); + vlib_prefetch_buffer_header (p7, LOAD); + + CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE); } /* speculatively enqueue b0 and b1 to the current next frame */ /* bi is "buffer index", b is pointer to the buffer */ to_next[0] = bi0 = from[0]; to_next[1] = bi1 = from[1]; - from += 2; - to_next += 2; - n_left_from -= 2; - n_left_to_next -= 2; + to_next[2] = bi2 = from[2]; + to_next[3] = bi3 = from[3]; + from += 4; + to_next += 4; + n_left_from -= 4; + n_left_to_next -= 4; b0 = vlib_get_buffer (vm, bi0); b1 = vlib_get_buffer (vm, bi1); + b2 = vlib_get_buffer (vm, bi2); + b3 = vlib_get_buffer (vm, bi3); /* RX interface handles */ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX]; + sw_if_index2 = vnet_buffer (b2)->sw_if_index[VLIB_RX]; + sw_if_index3 = vnet_buffer (b3)->sw_if_index[VLIB_RX]; h0 = vlib_buffer_get_current (b0); h1 = vlib_buffer_get_current (b1); + h2 = vlib_buffer_get_current (b2); + h3 = vlib_buffer_get_current (b3); if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) { @@ -287,27 +301,66 @@ l2fwd_node_fn (vlib_main_t * vm, clib_memcpy (t->src, h1->src_address, 6); clib_memcpy (t->dst, h1->dst_address, 6); } + if (b2->flags & VLIB_BUFFER_IS_TRACED) + { + l2fwd_trace_t *t = + vlib_add_trace (vm, node, b2, sizeof (*t)); + t->sw_if_index = sw_if_index2; + t->bd_index = vnet_buffer (b2)->l2.bd_index; + clib_memcpy (t->src, h2->src_address, 6); + clib_memcpy (t->dst, h2->dst_address, 6); + } + if (b3->flags & VLIB_BUFFER_IS_TRACED) + { + l2fwd_trace_t *t = + vlib_add_trace (vm, node, b3, sizeof (*t)); + t->sw_if_index = sw_if_index3; + t->bd_index = vnet_buffer (b3)->l2.bd_index; + clib_memcpy (t->src, h3->src_address, 6); + clib_memcpy (t->dst, h3->dst_address, 6); + } } /* process 2 pkts */ #ifdef COUNTERS - em->counters[node_counter_base_index + L2FWD_ERROR_L2FWD] += 2; + em->counters[node_counter_base_index + L2FWD_ERROR_L2FWD] += 4; #endif - l2fib_lookup_2 (msm->mac_table, &cached_key, &cached_result, h0->dst_address, h1->dst_address, vnet_buffer (b0)->l2.bd_index, vnet_buffer (b1)->l2.bd_index, &key0, /* not used */ + /* *INDENT-OFF* */ + l2fib_lookup_4 (msm->mac_table, &cached_key, &cached_result, + h0->dst_address, h1->dst_address, + h2->dst_address, h3->dst_address, + vnet_buffer (b0)->l2.bd_index, + vnet_buffer (b1)->l2.bd_index, + vnet_buffer (b2)->l2.bd_index, + vnet_buffer (b3)->l2.bd_index, + &key0, /* not used */ &key1, /* not used */ + &key2, /* not used */ + &key3, /* not used */ &bucket0, /* not used */ &bucket1, /* not used */ - &result0, &result1); + &bucket2, /* not used */ + &bucket3, /* not used */ + &result0, + &result1, + &result2, + &result3); + /* *INDENT-ON* */ l2fwd_process (vm, node, msm, em, b0, sw_if_index0, &result0, &next0); l2fwd_process (vm, node, msm, em, b1, sw_if_index1, &result1, &next1); + l2fwd_process (vm, node, msm, em, b2, sw_if_index2, &result2, + &next2); + l2fwd_process (vm, node, msm, em, b3, sw_if_index3, &result3, + &next3); /* verify speculative enqueues, maybe switch current next frame */ /* if next0==next1==next_index then nothing special needs to be done */ - vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next, n_left_to_next, - bi0, bi1, next0, next1); + bi0, bi1, bi2, bi3, + next0, next1, next2, next3); } while (n_left_from > 0 && n_left_to_next > 0) -- cgit 1.2.3-korg