summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorYulong Pei <yulong.pei@intel.com>2018-10-19 23:29:29 +0800
committerDamjan Marion <dmarion@me.com>2018-11-06 12:05:40 +0000
commitdcb0c1aae89a8b542f734b89e33505852ab3d6da (patch)
tree8b9ab1cd8e9d3ea79d4f99b3a4889bbef97d0c65 /src
parentf286c4b9427748568036d04e8aa2408a4069dee7 (diff)
Change l2_patch from dual-loop to quad-loop
The change can save 1.1 clocks per packet on Intel Atom C3858 platform, It downgraded from 2.05e1 to 1.94e1 clocks per packet. The change can save 0.3 clocks per packet on Intel Xeon CPU E5-2699 v4 @ 2.20GHz, It downgraded from 1.26e1 to 1.23e1 clocks per packet. Change-Id: I1ede77fb592a797d86940a8abad9ca291a89f1c7 Signed-off-by: Yulong Pei <yulong.pei@intel.com>
Diffstat (limited to 'src')
-rw-r--r--src/vnet/l2/l2_patch.c72
1 files changed, 54 insertions, 18 deletions
diff --git a/src/vnet/l2/l2_patch.c b/src/vnet/l2/l2_patch.c
index ff3d2f3a6af..83e14a79ebb 100644
--- a/src/vnet/l2/l2_patch.c
+++ b/src/vnet/l2/l2_patch.c
@@ -98,51 +98,70 @@ l2_patch_node_fn (vlib_main_t * vm,
vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
- while (n_left_from >= 4 && n_left_to_next >= 2)
+ while (n_left_from >= 8 && n_left_to_next >= 4)
{
- u32 bi0, bi1;
- vlib_buffer_t *b0, *b1;
- u32 next0, next1;
- u32 sw_if_index0, sw_if_index1;
+ u32 bi0, bi1, bi2, bi3;
+ vlib_buffer_t *b0, *b1, *b2, *b3;
+ u32 next0, next1, next2, next3;
+ u32 sw_if_index0, sw_if_index1, sw_if_index2, sw_if_index3;
/* Prefetch next iteration. */
{
- vlib_buffer_t *p2, *p3;
+ vlib_buffer_t *p4, *p5, *p6, *p7;
- p2 = vlib_get_buffer (vm, from[2]);
- p3 = vlib_get_buffer (vm, from[3]);
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
+ p6 = vlib_get_buffer (vm, from[6]);
+ p7 = vlib_get_buffer (vm, from[7]);
- vlib_prefetch_buffer_header (p2, LOAD);
- vlib_prefetch_buffer_header (p3, LOAD);
-
- /* So stupid / simple, we don't need to prefetch data */
+ vlib_prefetch_buffer_header (p4, LOAD);
+ vlib_prefetch_buffer_header (p5, LOAD);
+ vlib_prefetch_buffer_header (p6, LOAD);
+ vlib_prefetch_buffer_header (p7, LOAD);
}
/* speculatively enqueue b0 and b1 to the current next frame */
to_next[0] = bi0 = from[0];
to_next[1] = bi1 = from[1];
- from += 2;
- to_next += 2;
- n_left_from -= 2;
- n_left_to_next -= 2;
+ to_next[2] = bi2 = from[2];
+ to_next[3] = bi3 = from[3];
+ from += 4;
+ to_next += 4;
+ n_left_from -= 4;
+ n_left_to_next -= 4;
b0 = vlib_get_buffer (vm, bi0);
b1 = vlib_get_buffer (vm, bi1);
+ b2 = vlib_get_buffer (vm, bi2);
+ b3 = vlib_get_buffer (vm, bi3);
sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+ sw_if_index2 = vnet_buffer (b2)->sw_if_index[VLIB_RX];
+ sw_if_index3 = vnet_buffer (b3)->sw_if_index[VLIB_RX];
ASSERT (l2pm->tx_next_by_rx_sw_if_index[sw_if_index0] != ~0);
ASSERT (l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index0] != ~0);
ASSERT (l2pm->tx_next_by_rx_sw_if_index[sw_if_index1] != ~0);
ASSERT (l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index1] != ~0);
+ ASSERT (l2pm->tx_next_by_rx_sw_if_index[sw_if_index2] != ~0);
+ ASSERT (l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index2] != ~0);
+ ASSERT (l2pm->tx_next_by_rx_sw_if_index[sw_if_index3] != ~0);
+ ASSERT (l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index3] != ~0);
next0 = l2pm->tx_next_by_rx_sw_if_index[sw_if_index0];
next1 = l2pm->tx_next_by_rx_sw_if_index[sw_if_index1];
+ next2 = l2pm->tx_next_by_rx_sw_if_index[sw_if_index2];
+ next3 = l2pm->tx_next_by_rx_sw_if_index[sw_if_index3];
+
vnet_buffer (b0)->sw_if_index[VLIB_TX] =
l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index0];
vnet_buffer (b1)->sw_if_index[VLIB_TX] =
l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index1];
+ vnet_buffer (b2)->sw_if_index[VLIB_TX] =
+ l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index2];
+ vnet_buffer (b3)->sw_if_index[VLIB_TX] =
+ l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index3];
if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
{
@@ -162,12 +181,29 @@ l2_patch_node_fn (vlib_main_t * vm,
t->tx_sw_if_index =
l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index1];
}
+ if (b2->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ l2_patch_trace_t *t =
+ vlib_add_trace (vm, node, b2, sizeof (*t));
+ t->rx_sw_if_index = sw_if_index2;
+ t->tx_sw_if_index =
+ l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index2];
+ }
+ if (b3->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ l2_patch_trace_t *t =
+ vlib_add_trace (vm, node, b3, sizeof (*t));
+ t->rx_sw_if_index = sw_if_index3;
+ t->tx_sw_if_index =
+ l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index3];
+ }
}
/* verify speculative enqueues, maybe switch current next frame */
- vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ vlib_validate_buffer_enqueue_x4 (vm, node, next_index,
to_next, n_left_to_next,
- bi0, bi1, next0, next1);
+ bi0, bi1, bi2, bi3,
+ next0, next1, next2, next3);
}
while (n_left_from > 0 && n_left_to_next > 0)