summaryrefslogtreecommitdiffstats
path: root/src/vnet/mpls/mpls_lookup.c
diff options
context:
space:
mode:
authorNeale Ranns <nranns@cisco.com>2017-03-16 07:34:55 -0400
committerFlorin Coras <florin.coras@gmail.com>2017-03-17 16:08:56 +0000
commit696e88da9799056036f329676213f3c0c0a1db9c (patch)
tree750943198b82e1c57e8a0b00fafd91848a8b3e91 /src/vnet/mpls/mpls_lookup.c
parent4b919a56642ccd0a44920feace872aeb5b7a62cf (diff)
MPLS performance improvments.
1 - Quad loop lookup and label imposition. 2 - optimise imposition for the 1 label case 3 - input gets TTL from header directly (no byte swap) Change-Id: I59204c9e5d134b0df75d7afa43e360f946d1ffe7 Signed-off-by: Neale Ranns <nranns@cisco.com>
Diffstat (limited to 'src/vnet/mpls/mpls_lookup.c')
-rw-r--r--src/vnet/mpls/mpls_lookup.c116
1 files changed, 104 insertions, 12 deletions
diff --git a/src/vnet/mpls/mpls_lookup.c b/src/vnet/mpls/mpls_lookup.c
index 2d34cbde341..475bb20454b 100644
--- a/src/vnet/mpls/mpls_lookup.c
+++ b/src/vnet/mpls/mpls_lookup.c
@@ -80,7 +80,7 @@ mpls_lookup (vlib_main_t * vm,
vlib_get_next_frame (vm, node, next_index,
to_next, n_left_to_next);
- while (n_left_from >= 4 && n_left_to_next >= 2)
+ while (n_left_from >= 8 && n_left_to_next >= 4)
{
u32 lbi0, next0, lfib_index0, bi0, hash_c0;
const mpls_unicast_header_t * h0;
@@ -92,46 +92,79 @@ mpls_lookup (vlib_main_t * vm,
const load_balance_t *lb1;
const dpo_id_t *dpo1;
vlib_buffer_t * b1;
+ u32 lbi2, next2, lfib_index2, bi2, hash_c2;
+ const mpls_unicast_header_t * h2;
+ const load_balance_t *lb2;
+ const dpo_id_t *dpo2;
+ vlib_buffer_t * b2;
+ u32 lbi3, next3, lfib_index3, bi3, hash_c3;
+ const mpls_unicast_header_t * h3;
+ const load_balance_t *lb3;
+ const dpo_id_t *dpo3;
+ vlib_buffer_t * b3;
/* Prefetch next iteration. */
{
- vlib_buffer_t * p2, * p3;
+ vlib_buffer_t * p2, * p3, *p4, *p5;
p2 = vlib_get_buffer (vm, from[2]);
p3 = vlib_get_buffer (vm, from[3]);
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
vlib_prefetch_buffer_header (p2, STORE);
vlib_prefetch_buffer_header (p3, STORE);
+ vlib_prefetch_buffer_header (p4, STORE);
+ vlib_prefetch_buffer_header (p5, STORE);
CLIB_PREFETCH (p2->data, sizeof (h0[0]), STORE);
CLIB_PREFETCH (p3->data, sizeof (h0[0]), STORE);
+ CLIB_PREFETCH (p4->data, sizeof (h0[0]), STORE);
+ CLIB_PREFETCH (p5->data, sizeof (h0[0]), STORE);
}
bi0 = to_next[0] = from[0];
bi1 = to_next[1] = from[1];
+ bi2 = to_next[2] = from[2];
+ bi3 = to_next[3] = from[3];
- from += 2;
- n_left_from -= 2;
- to_next += 2;
- n_left_to_next -= 2;
+ from += 4;
+ n_left_from -= 4;
+ to_next += 4;
+ n_left_to_next -= 4;
b0 = vlib_get_buffer (vm, bi0);
b1 = vlib_get_buffer (vm, bi1);
+ b2 = vlib_get_buffer (vm, bi2);
+ b3 = vlib_get_buffer (vm, bi3);
h0 = vlib_buffer_get_current (b0);
h1 = vlib_buffer_get_current (b1);
+ h2 = vlib_buffer_get_current (b2);
+ h3 = vlib_buffer_get_current (b3);
lfib_index0 = vec_elt(mm->fib_index_by_sw_if_index,
vnet_buffer(b0)->sw_if_index[VLIB_RX]);
lfib_index1 = vec_elt(mm->fib_index_by_sw_if_index,
vnet_buffer(b1)->sw_if_index[VLIB_RX]);
+ lfib_index2 = vec_elt(mm->fib_index_by_sw_if_index,
+ vnet_buffer(b2)->sw_if_index[VLIB_RX]);
+ lfib_index3 = vec_elt(mm->fib_index_by_sw_if_index,
+ vnet_buffer(b3)->sw_if_index[VLIB_RX]);
lbi0 = mpls_fib_table_forwarding_lookup (lfib_index0, h0);
lbi1 = mpls_fib_table_forwarding_lookup (lfib_index1, h1);
+ lbi2 = mpls_fib_table_forwarding_lookup (lfib_index2, h2);
+ lbi3 = mpls_fib_table_forwarding_lookup (lfib_index3, h3);
+
lb0 = load_balance_get(lbi0);
lb1 = load_balance_get(lbi1);
+ lb2 = load_balance_get(lbi2);
+ lb3 = load_balance_get(lbi3);
hash_c0 = vnet_buffer(b0)->ip.flow_hash = 0;
hash_c1 = vnet_buffer(b1)->ip.flow_hash = 0;
+ hash_c2 = vnet_buffer(b2)->ip.flow_hash = 0;
+ hash_c3 = vnet_buffer(b3)->ip.flow_hash = 0;
if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
{
@@ -143,11 +176,25 @@ mpls_lookup (vlib_main_t * vm,
hash_c1 = vnet_buffer (b1)->ip.flow_hash =
mpls_compute_flow_hash(h1, lb1->lb_hash_config);
}
+ if (PREDICT_FALSE(lb2->lb_n_buckets > 1))
+ {
+ hash_c2 = vnet_buffer (b2)->ip.flow_hash =
+ mpls_compute_flow_hash(h2, lb2->lb_hash_config);
+ }
+ if (PREDICT_FALSE(lb3->lb_n_buckets > 1))
+ {
+ hash_c3 = vnet_buffer (b3)->ip.flow_hash =
+ mpls_compute_flow_hash(h3, lb3->lb_hash_config);
+ }
ASSERT (lb0->lb_n_buckets > 0);
ASSERT (is_pow2 (lb0->lb_n_buckets));
ASSERT (lb1->lb_n_buckets > 0);
ASSERT (is_pow2 (lb1->lb_n_buckets));
+ ASSERT (lb2->lb_n_buckets > 0);
+ ASSERT (is_pow2 (lb2->lb_n_buckets));
+ ASSERT (lb3->lb_n_buckets > 0);
+ ASSERT (is_pow2 (lb3->lb_n_buckets));
dpo0 = load_balance_get_bucket_i(lb0,
(hash_c0 &
@@ -155,12 +202,22 @@ mpls_lookup (vlib_main_t * vm,
dpo1 = load_balance_get_bucket_i(lb1,
(hash_c1 &
(lb1->lb_n_buckets_minus_1)));
+ dpo2 = load_balance_get_bucket_i(lb2,
+ (hash_c2 &
+ (lb2->lb_n_buckets_minus_1)));
+ dpo3 = load_balance_get_bucket_i(lb3,
+ (hash_c3 &
+ (lb3->lb_n_buckets_minus_1)));
next0 = dpo0->dpoi_next_node;
next1 = dpo1->dpoi_next_node;
+ next2 = dpo2->dpoi_next_node;
+ next3 = dpo3->dpoi_next_node;
vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
vnet_buffer (b1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
+ vnet_buffer (b2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index;
+ vnet_buffer (b3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
vlib_increment_combined_counter
(cm, cpu_index, lbi0, 1,
@@ -168,6 +225,12 @@ mpls_lookup (vlib_main_t * vm,
vlib_increment_combined_counter
(cm, cpu_index, lbi1, 1,
vlib_buffer_length_in_chain (vm, b1));
+ vlib_increment_combined_counter
+ (cm, cpu_index, lbi2, 1,
+ vlib_buffer_length_in_chain (vm, b2));
+ vlib_increment_combined_counter
+ (cm, cpu_index, lbi3, 1,
+ vlib_buffer_length_in_chain (vm, b3));
/*
* before we pop the label copy th values we need to maintain.
@@ -181,12 +244,20 @@ mpls_lookup (vlib_main_t * vm,
vnet_buffer (b1)->mpls.ttl = ((char*)h1)[3];
vnet_buffer (b1)->mpls.exp = (((char*)h1)[2] & 0xe) >> 1;
vnet_buffer (b1)->mpls.first = 1;
+ vnet_buffer (b2)->mpls.ttl = ((char*)h2)[3];
+ vnet_buffer (b2)->mpls.exp = (((char*)h2)[2] & 0xe) >> 1;
+ vnet_buffer (b2)->mpls.first = 1;
+ vnet_buffer (b3)->mpls.ttl = ((char*)h3)[3];
+ vnet_buffer (b3)->mpls.exp = (((char*)h3)[2] & 0xe) >> 1;
+ vnet_buffer (b3)->mpls.first = 1;
/*
* pop the label that was just used in the lookup
*/
vlib_buffer_advance(b0, sizeof(*h0));
vlib_buffer_advance(b1, sizeof(*h1));
+ vlib_buffer_advance(b2, sizeof(*h2));
+ vlib_buffer_advance(b3, sizeof(*h3));
if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
{
@@ -210,9 +281,32 @@ mpls_lookup (vlib_main_t * vm,
tr->label_net_byte_order = h1->label_exp_s_ttl;
}
- vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ if (PREDICT_FALSE(b2->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_lookup_trace_t *tr = vlib_add_trace (vm, node,
+ b2, sizeof (*tr));
+ tr->next_index = next2;
+ tr->lb_index = lbi2;
+ tr->lfib_index = lfib_index2;
+ tr->hash = hash_c2;
+ tr->label_net_byte_order = h2->label_exp_s_ttl;
+ }
+
+ if (PREDICT_FALSE(b3->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_lookup_trace_t *tr = vlib_add_trace (vm, node,
+ b3, sizeof (*tr));
+ tr->next_index = next3;
+ tr->lb_index = lbi3;
+ tr->lfib_index = lfib_index3;
+ tr->hash = hash_c3;
+ tr->label_net_byte_order = h3->label_exp_s_ttl;
+ }
+
+ vlib_validate_buffer_enqueue_x4 (vm, node, next_index,
to_next, n_left_to_next,
- bi0, bi1, next0, next1);
+ bi0, bi1, bi2, bi3,
+ next0, next1, next2, next3);
}
while (n_left_from > 0 && n_left_to_next > 0)
@@ -361,10 +455,9 @@ mpls_load_balance (vlib_main_t * vm,
while (n_left_from >= 4 && n_left_to_next >= 2)
{
- mpls_lookup_next_t next0, next1;
const load_balance_t *lb0, *lb1;
vlib_buffer_t * p0, *p1;
- u32 pi0, lbi0, hc0, pi1, lbi1, hc1;
+ u32 pi0, lbi0, hc0, pi1, lbi1, hc1, next0, next1;
const mpls_unicast_header_t *mpls0, *mpls1;
const dpo_id_t *dpo0, *dpo1;
@@ -465,10 +558,9 @@ mpls_load_balance (vlib_main_t * vm,
while (n_left_from > 0 && n_left_to_next > 0)
{
- mpls_lookup_next_t next0;
const load_balance_t *lb0;
vlib_buffer_t * p0;
- u32 pi0, lbi0, hc0;
+ u32 pi0, lbi0, hc0, next0;
const mpls_unicast_header_t *mpls0;
const dpo_id_t *dpo0;