diff options
author | Yulong Pei <yulong.pei@intel.com> | 2018-10-19 23:29:29 +0800 |
---|---|---|
committer | Damjan Marion <dmarion@me.com> | 2018-11-06 12:05:40 +0000 |
commit | dcb0c1aae89a8b542f734b89e33505852ab3d6da (patch) | |
tree | 8b9ab1cd8e9d3ea79d4f99b3a4889bbef97d0c65 | |
parent | f286c4b9427748568036d04e8aa2408a4069dee7 (diff) |
Change l2_patch from dual-loop to quad-loop
The change can save 1.1 clocks per packet on Intel Atom C3858 platform,
It downgraded from 2.05e1 to 1.94e1 clocks per packet.
The change can save 0.3 clocks per packet on Intel Xeon CPU E5-2699 v4 @ 2.20GHz,
It downgraded from 1.26e1 to 1.23e1 clocks per packet.
Change-Id: I1ede77fb592a797d86940a8abad9ca291a89f1c7
Signed-off-by: Yulong Pei <yulong.pei@intel.com>
-rw-r--r-- | src/vnet/l2/l2_patch.c | 72 |
1 files changed, 54 insertions, 18 deletions
diff --git a/src/vnet/l2/l2_patch.c b/src/vnet/l2/l2_patch.c index ff3d2f3a6af..83e14a79ebb 100644 --- a/src/vnet/l2/l2_patch.c +++ b/src/vnet/l2/l2_patch.c @@ -98,51 +98,70 @@ l2_patch_node_fn (vlib_main_t * vm, vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - while (n_left_from >= 4 && n_left_to_next >= 2) + while (n_left_from >= 8 && n_left_to_next >= 4) { - u32 bi0, bi1; - vlib_buffer_t *b0, *b1; - u32 next0, next1; - u32 sw_if_index0, sw_if_index1; + u32 bi0, bi1, bi2, bi3; + vlib_buffer_t *b0, *b1, *b2, *b3; + u32 next0, next1, next2, next3; + u32 sw_if_index0, sw_if_index1, sw_if_index2, sw_if_index3; /* Prefetch next iteration. */ { - vlib_buffer_t *p2, *p3; + vlib_buffer_t *p4, *p5, *p6, *p7; - p2 = vlib_get_buffer (vm, from[2]); - p3 = vlib_get_buffer (vm, from[3]); + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + p6 = vlib_get_buffer (vm, from[6]); + p7 = vlib_get_buffer (vm, from[7]); - vlib_prefetch_buffer_header (p2, LOAD); - vlib_prefetch_buffer_header (p3, LOAD); - - /* So stupid / simple, we don't need to prefetch data */ + vlib_prefetch_buffer_header (p4, LOAD); + vlib_prefetch_buffer_header (p5, LOAD); + vlib_prefetch_buffer_header (p6, LOAD); + vlib_prefetch_buffer_header (p7, LOAD); } /* speculatively enqueue b0 and b1 to the current next frame */ to_next[0] = bi0 = from[0]; to_next[1] = bi1 = from[1]; - from += 2; - to_next += 2; - n_left_from -= 2; - n_left_to_next -= 2; + to_next[2] = bi2 = from[2]; + to_next[3] = bi3 = from[3]; + from += 4; + to_next += 4; + n_left_from -= 4; + n_left_to_next -= 4; b0 = vlib_get_buffer (vm, bi0); b1 = vlib_get_buffer (vm, bi1); + b2 = vlib_get_buffer (vm, bi2); + b3 = vlib_get_buffer (vm, bi3); sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX]; + sw_if_index2 = vnet_buffer (b2)->sw_if_index[VLIB_RX]; + sw_if_index3 = vnet_buffer (b3)->sw_if_index[VLIB_RX]; ASSERT (l2pm->tx_next_by_rx_sw_if_index[sw_if_index0] != ~0); ASSERT (l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index0] != ~0); ASSERT (l2pm->tx_next_by_rx_sw_if_index[sw_if_index1] != ~0); ASSERT (l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index1] != ~0); + ASSERT (l2pm->tx_next_by_rx_sw_if_index[sw_if_index2] != ~0); + ASSERT (l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index2] != ~0); + ASSERT (l2pm->tx_next_by_rx_sw_if_index[sw_if_index3] != ~0); + ASSERT (l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index3] != ~0); next0 = l2pm->tx_next_by_rx_sw_if_index[sw_if_index0]; next1 = l2pm->tx_next_by_rx_sw_if_index[sw_if_index1]; + next2 = l2pm->tx_next_by_rx_sw_if_index[sw_if_index2]; + next3 = l2pm->tx_next_by_rx_sw_if_index[sw_if_index3]; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index0]; vnet_buffer (b1)->sw_if_index[VLIB_TX] = l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index1]; + vnet_buffer (b2)->sw_if_index[VLIB_TX] = + l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index2]; + vnet_buffer (b3)->sw_if_index[VLIB_TX] = + l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index3]; if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) { @@ -162,12 +181,29 @@ l2_patch_node_fn (vlib_main_t * vm, t->tx_sw_if_index = l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index1]; } + if (b2->flags & VLIB_BUFFER_IS_TRACED) + { + l2_patch_trace_t *t = + vlib_add_trace (vm, node, b2, sizeof (*t)); + t->rx_sw_if_index = sw_if_index2; + t->tx_sw_if_index = + l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index2]; + } + if (b3->flags & VLIB_BUFFER_IS_TRACED) + { + l2_patch_trace_t *t = + vlib_add_trace (vm, node, b3, sizeof (*t)); + t->rx_sw_if_index = sw_if_index3; + t->tx_sw_if_index = + l2pm->tx_sw_if_index_by_rx_sw_if_index[sw_if_index3]; + } } /* verify speculative enqueues, maybe switch current next frame */ - vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next, n_left_to_next, - bi0, bi1, next0, next1); + bi0, bi1, bi2, bi3, + next0, next1, next2, next3); } while (n_left_from > 0 && n_left_to_next > 0) |