From 696e88da9799056036f329676213f3c0c0a1db9c Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Thu, 16 Mar 2017 07:34:55 -0400 Subject: MPLS performance improvments. 1 - Quad loop lookup and label imposition. 2 - optimise imposition for the 1 label case 3 - input gets TTL from header directly (no byte swap) Change-Id: I59204c9e5d134b0df75d7afa43e360f946d1ffe7 Signed-off-by: Neale Ranns --- src/vnet/mpls/mpls_lookup.c | 116 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 104 insertions(+), 12 deletions(-) (limited to 'src/vnet/mpls/mpls_lookup.c') diff --git a/src/vnet/mpls/mpls_lookup.c b/src/vnet/mpls/mpls_lookup.c index 2d34cbde..475bb204 100644 --- a/src/vnet/mpls/mpls_lookup.c +++ b/src/vnet/mpls/mpls_lookup.c @@ -80,7 +80,7 @@ mpls_lookup (vlib_main_t * vm, vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - while (n_left_from >= 4 && n_left_to_next >= 2) + while (n_left_from >= 8 && n_left_to_next >= 4) { u32 lbi0, next0, lfib_index0, bi0, hash_c0; const mpls_unicast_header_t * h0; @@ -92,46 +92,79 @@ mpls_lookup (vlib_main_t * vm, const load_balance_t *lb1; const dpo_id_t *dpo1; vlib_buffer_t * b1; + u32 lbi2, next2, lfib_index2, bi2, hash_c2; + const mpls_unicast_header_t * h2; + const load_balance_t *lb2; + const dpo_id_t *dpo2; + vlib_buffer_t * b2; + u32 lbi3, next3, lfib_index3, bi3, hash_c3; + const mpls_unicast_header_t * h3; + const load_balance_t *lb3; + const dpo_id_t *dpo3; + vlib_buffer_t * b3; /* Prefetch next iteration. */ { - vlib_buffer_t * p2, * p3; + vlib_buffer_t * p2, * p3, *p4, *p5; p2 = vlib_get_buffer (vm, from[2]); p3 = vlib_get_buffer (vm, from[3]); + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); vlib_prefetch_buffer_header (p2, STORE); vlib_prefetch_buffer_header (p3, STORE); + vlib_prefetch_buffer_header (p4, STORE); + vlib_prefetch_buffer_header (p5, STORE); CLIB_PREFETCH (p2->data, sizeof (h0[0]), STORE); CLIB_PREFETCH (p3->data, sizeof (h0[0]), STORE); + CLIB_PREFETCH (p4->data, sizeof (h0[0]), STORE); + CLIB_PREFETCH (p5->data, sizeof (h0[0]), STORE); } bi0 = to_next[0] = from[0]; bi1 = to_next[1] = from[1]; + bi2 = to_next[2] = from[2]; + bi3 = to_next[3] = from[3]; - from += 2; - n_left_from -= 2; - to_next += 2; - n_left_to_next -= 2; + from += 4; + n_left_from -= 4; + to_next += 4; + n_left_to_next -= 4; b0 = vlib_get_buffer (vm, bi0); b1 = vlib_get_buffer (vm, bi1); + b2 = vlib_get_buffer (vm, bi2); + b3 = vlib_get_buffer (vm, bi3); h0 = vlib_buffer_get_current (b0); h1 = vlib_buffer_get_current (b1); + h2 = vlib_buffer_get_current (b2); + h3 = vlib_buffer_get_current (b3); lfib_index0 = vec_elt(mm->fib_index_by_sw_if_index, vnet_buffer(b0)->sw_if_index[VLIB_RX]); lfib_index1 = vec_elt(mm->fib_index_by_sw_if_index, vnet_buffer(b1)->sw_if_index[VLIB_RX]); + lfib_index2 = vec_elt(mm->fib_index_by_sw_if_index, + vnet_buffer(b2)->sw_if_index[VLIB_RX]); + lfib_index3 = vec_elt(mm->fib_index_by_sw_if_index, + vnet_buffer(b3)->sw_if_index[VLIB_RX]); lbi0 = mpls_fib_table_forwarding_lookup (lfib_index0, h0); lbi1 = mpls_fib_table_forwarding_lookup (lfib_index1, h1); + lbi2 = mpls_fib_table_forwarding_lookup (lfib_index2, h2); + lbi3 = mpls_fib_table_forwarding_lookup (lfib_index3, h3); + lb0 = load_balance_get(lbi0); lb1 = load_balance_get(lbi1); + lb2 = load_balance_get(lbi2); + lb3 = load_balance_get(lbi3); hash_c0 = vnet_buffer(b0)->ip.flow_hash = 0; hash_c1 = vnet_buffer(b1)->ip.flow_hash = 0; + hash_c2 = vnet_buffer(b2)->ip.flow_hash = 0; + hash_c3 = vnet_buffer(b3)->ip.flow_hash = 0; if (PREDICT_FALSE(lb0->lb_n_buckets > 1)) { @@ -143,11 +176,25 @@ mpls_lookup (vlib_main_t * vm, hash_c1 = vnet_buffer (b1)->ip.flow_hash = mpls_compute_flow_hash(h1, lb1->lb_hash_config); } + if (PREDICT_FALSE(lb2->lb_n_buckets > 1)) + { + hash_c2 = vnet_buffer (b2)->ip.flow_hash = + mpls_compute_flow_hash(h2, lb2->lb_hash_config); + } + if (PREDICT_FALSE(lb3->lb_n_buckets > 1)) + { + hash_c3 = vnet_buffer (b3)->ip.flow_hash = + mpls_compute_flow_hash(h3, lb3->lb_hash_config); + } ASSERT (lb0->lb_n_buckets > 0); ASSERT (is_pow2 (lb0->lb_n_buckets)); ASSERT (lb1->lb_n_buckets > 0); ASSERT (is_pow2 (lb1->lb_n_buckets)); + ASSERT (lb2->lb_n_buckets > 0); + ASSERT (is_pow2 (lb2->lb_n_buckets)); + ASSERT (lb3->lb_n_buckets > 0); + ASSERT (is_pow2 (lb3->lb_n_buckets)); dpo0 = load_balance_get_bucket_i(lb0, (hash_c0 & @@ -155,12 +202,22 @@ mpls_lookup (vlib_main_t * vm, dpo1 = load_balance_get_bucket_i(lb1, (hash_c1 & (lb1->lb_n_buckets_minus_1))); + dpo2 = load_balance_get_bucket_i(lb2, + (hash_c2 & + (lb2->lb_n_buckets_minus_1))); + dpo3 = load_balance_get_bucket_i(lb3, + (hash_c3 & + (lb3->lb_n_buckets_minus_1))); next0 = dpo0->dpoi_next_node; next1 = dpo1->dpoi_next_node; + next2 = dpo2->dpoi_next_node; + next3 = dpo3->dpoi_next_node; vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; vnet_buffer (b1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index; + vnet_buffer (b2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index; + vnet_buffer (b3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index; vlib_increment_combined_counter (cm, cpu_index, lbi0, 1, @@ -168,6 +225,12 @@ mpls_lookup (vlib_main_t * vm, vlib_increment_combined_counter (cm, cpu_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b1)); + vlib_increment_combined_counter + (cm, cpu_index, lbi2, 1, + vlib_buffer_length_in_chain (vm, b2)); + vlib_increment_combined_counter + (cm, cpu_index, lbi3, 1, + vlib_buffer_length_in_chain (vm, b3)); /* * before we pop the label copy th values we need to maintain. @@ -181,12 +244,20 @@ mpls_lookup (vlib_main_t * vm, vnet_buffer (b1)->mpls.ttl = ((char*)h1)[3]; vnet_buffer (b1)->mpls.exp = (((char*)h1)[2] & 0xe) >> 1; vnet_buffer (b1)->mpls.first = 1; + vnet_buffer (b2)->mpls.ttl = ((char*)h2)[3]; + vnet_buffer (b2)->mpls.exp = (((char*)h2)[2] & 0xe) >> 1; + vnet_buffer (b2)->mpls.first = 1; + vnet_buffer (b3)->mpls.ttl = ((char*)h3)[3]; + vnet_buffer (b3)->mpls.exp = (((char*)h3)[2] & 0xe) >> 1; + vnet_buffer (b3)->mpls.first = 1; /* * pop the label that was just used in the lookup */ vlib_buffer_advance(b0, sizeof(*h0)); vlib_buffer_advance(b1, sizeof(*h1)); + vlib_buffer_advance(b2, sizeof(*h2)); + vlib_buffer_advance(b3, sizeof(*h3)); if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) { @@ -210,9 +281,32 @@ mpls_lookup (vlib_main_t * vm, tr->label_net_byte_order = h1->label_exp_s_ttl; } - vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + if (PREDICT_FALSE(b2->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_lookup_trace_t *tr = vlib_add_trace (vm, node, + b2, sizeof (*tr)); + tr->next_index = next2; + tr->lb_index = lbi2; + tr->lfib_index = lfib_index2; + tr->hash = hash_c2; + tr->label_net_byte_order = h2->label_exp_s_ttl; + } + + if (PREDICT_FALSE(b3->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_lookup_trace_t *tr = vlib_add_trace (vm, node, + b3, sizeof (*tr)); + tr->next_index = next3; + tr->lb_index = lbi3; + tr->lfib_index = lfib_index3; + tr->hash = hash_c3; + tr->label_net_byte_order = h3->label_exp_s_ttl; + } + + vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next, n_left_to_next, - bi0, bi1, next0, next1); + bi0, bi1, bi2, bi3, + next0, next1, next2, next3); } while (n_left_from > 0 && n_left_to_next > 0) @@ -361,10 +455,9 @@ mpls_load_balance (vlib_main_t * vm, while (n_left_from >= 4 && n_left_to_next >= 2) { - mpls_lookup_next_t next0, next1; const load_balance_t *lb0, *lb1; vlib_buffer_t * p0, *p1; - u32 pi0, lbi0, hc0, pi1, lbi1, hc1; + u32 pi0, lbi0, hc0, pi1, lbi1, hc1, next0, next1; const mpls_unicast_header_t *mpls0, *mpls1; const dpo_id_t *dpo0, *dpo1; @@ -465,10 +558,9 @@ mpls_load_balance (vlib_main_t * vm, while (n_left_from > 0 && n_left_to_next > 0) { - mpls_lookup_next_t next0; const load_balance_t *lb0; vlib_buffer_t * p0; - u32 pi0, lbi0, hc0; + u32 pi0, lbi0, hc0, next0; const mpls_unicast_header_t *mpls0; const dpo_id_t *dpo0; -- cgit 1.2.3-korg