aboutsummaryrefslogtreecommitdiffstats
path: root/src/plugins/odp/node.c
diff options
context:
space:
mode:
authorMichal Mazur <mkm@semihalf.com>2017-09-11 16:20:21 +0200
committerMichal Mazur <mkm@semihalf.com>2017-12-14 15:18:05 +0100
commit0c552959dc425c5f00499d6a587d16275a6399db (patch)
tree05fddc1ee2c881570bb80b2a365027297113d62e /src/plugins/odp/node.c
parentd830f80364a33f81c4b534eca902ac7600cbfc05 (diff)
Optimize Rx and Tx paths
1) Handle multiple ODP packets at once in receive loop 2) Wait to collect as many RX buffers as possible in single vector 3) Add prefetch of received and transmitted buffers 4) Disable parser, classifier and synchronization of RX queues Synchronization of Tx queues can also be disabled if 2 ports used. Change-Id: I65ed49ef2b60278022712e10a83f6ca24360694e Signed-off-by: Michal Mazur <mkm@semihalf.com>
Diffstat (limited to 'src/plugins/odp/node.c')
-rwxr-xr-xsrc/plugins/odp/node.c217
1 files changed, 175 insertions, 42 deletions
diff --git a/src/plugins/odp/node.c b/src/plugins/odp/node.c
index 7e13095d..a04ee47f 100755
--- a/src/plugins/odp/node.c
+++ b/src/plugins/odp/node.c
@@ -48,7 +48,24 @@ format_odp_packet_input_trace (u8 * s, va_list * args)
return s;
}
-int
+static_always_inline void
+odp_prefetch_buffer (odp_packet_t pkt)
+{
+ vlib_buffer_t *b = (vlib_buffer_t *) odp_packet_user_area (pkt);
+ CLIB_PREFETCH (pkt, CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, STORE);
+}
+
+static_always_inline void
+odp_prefetch_ethertype (odp_packet_t pkt)
+{
+ vlib_buffer_t *b = (vlib_buffer_t *) odp_packet_user_area (pkt);
+ CLIB_PREFETCH (vlib_buffer_get_current (b) +
+ STRUCT_OFFSET_OF (ethernet_header_t, type),
+ CLIB_CACHE_LINE_BYTES, LOAD);
+}
+
+always_inline int
odp_packet_queue_mode (odp_pktio_t pktio, u32 mode, odp_packet_t pkt_tbl[])
{
u32 num_evts = 0, num_pkts = 0;
@@ -60,7 +77,7 @@ odp_packet_queue_mode (odp_pktio_t pktio, u32 mode, odp_packet_t pkt_tbl[])
if (pktio == ODP_PKTIO_INVALID)
{
clib_warning ("odp_pktio_lookup() failed");
- return -1;
+ return 0;
}
inq = ODP_QUEUE_INVALID;
@@ -68,7 +85,7 @@ odp_packet_queue_mode (odp_pktio_t pktio, u32 mode, odp_packet_t pkt_tbl[])
(odp_pktin_event_queue (pktio, &inq, 1) != 1))
{
clib_warning ("Error:no input queue");
- return -1;
+ return 0;
}
while (num_evts < VLIB_FRAME_SIZE)
@@ -96,9 +113,9 @@ odp_packet_queue_mode (odp_pktio_t pktio, u32 mode, odp_packet_t pkt_tbl[])
return num_pkts;
}
-int
+always_inline int
odp_packet_burst_mode (odp_pktio_t pktio, odp_pktin_queue_t pktin,
- odp_packet_t pkt_tbl[])
+ odp_packet_t pkt_tbl[], u32 req_pkts)
{
u32 num_pkts = 0;
int ret;
@@ -106,13 +123,13 @@ odp_packet_burst_mode (odp_pktio_t pktio, odp_pktin_queue_t pktin,
if (odp_pktin_queue (pktio, &pktin, 1) != 1)
{
clib_warning ("odp_pktio_open() failed: no pktin queue");
- return -1;
+ return 0;
}
- while (num_pkts < VLIB_FRAME_SIZE)
+ while (num_pkts < req_pkts)
{
ret = odp_pktin_recv (pktin, &pkt_tbl[num_pkts],
- VLIB_FRAME_SIZE - num_pkts);
+ req_pkts - num_pkts);
if (ret <= 0)
break;
num_pkts += ret;
@@ -155,6 +172,43 @@ odp_rx_next_from_etype (void *mb, vlib_buffer_t * b0)
return VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
}
+static_always_inline void
+odp_adjust_buffer (vlib_buffer_t * buf, odp_packet_t pkt,
+ odp_packet_if_t * oif)
+{
+ buf->current_length = odp_packet_len (pkt);
+ buf->current_data = 0;
+ buf->total_length_not_including_first_buffer = 0;
+ buf->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ vnet_buffer (buf)->sw_if_index[VLIB_RX] = oif->sw_if_index;
+ vnet_buffer (buf)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+}
+
+#define ODP_TRACE_BUFFER(n_trace, b0, next0, vm, node, oif) \
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); \
+ if (PREDICT_FALSE ((n_trace) > 0)) \
+ { \
+ odp_packet_input_trace_t *tr; \
+ vlib_trace_buffer (vm, node, next0, b0, 0); \
+ vlib_set_trace_count (vm, node, --(n_trace)); \
+ tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); \
+ tr->next_index = next0; \
+ tr->hw_if_index = (oif)->hw_if_index; \
+ }
+
+void
+odp_trace_buffer_x4 (uword * n_trace, vlib_main_t * vm,
+ vlib_node_runtime_t * node, odp_packet_if_t * oif,
+ vlib_buffer_t * b0, vlib_buffer_t * b1,
+ vlib_buffer_t * b2, vlib_buffer_t * b3, u32 next0,
+ u32 next1, u32 next2, u32 next3)
+{
+ ODP_TRACE_BUFFER (*n_trace, b0, next0, vm, node, oif);
+ ODP_TRACE_BUFFER (*n_trace, b1, next1, vm, node, oif);
+ ODP_TRACE_BUFFER (*n_trace, b2, next2, vm, node, oif);
+ ODP_TRACE_BUFFER (*n_trace, b3, next3, vm, node, oif);
+}
+
always_inline uword
odp_packet_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame, odp_packet_if_t * oif)
@@ -166,26 +220,107 @@ odp_packet_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
u32 *to_next = 0;
odp_pktin_queue_t pktin = { 0 };
odp_packet_t pkt_tbl[VLIB_FRAME_SIZE];
- u32 pkts = 0, pkts_ok = 0;
-
- if ((oif->mode == (APPL_MODE_PKT_QUEUE)) ||
- (oif->mode == (APPL_MODE_PKT_SCHED)))
+ int pkts = 0, i;
+ u32 retry = 8;
+ u32 n_left = 0, n_left_to_next = VLIB_FRAME_SIZE;
+ u32 next0 = next_index;
+ u32 next1 = next_index;
+ u32 next2 = next_index;
+ u32 next3 = next_index;
+
+ while (1)
{
- pkts = odp_packet_queue_mode (oif->pktio, oif->mode, pkt_tbl);
- }
- else
- {
- pkts = odp_packet_burst_mode (oif->pktio, pktin, pkt_tbl);
- }
+ if ((oif->mode == (APPL_MODE_PKT_QUEUE)) ||
+ (oif->mode == (APPL_MODE_PKT_SCHED)))
+ {
+ pkts = odp_packet_queue_mode (oif->pktio, oif->mode, pkt_tbl);
+ }
+ else
+ {
+ pkts = odp_packet_burst_mode (oif->pktio, pktin, pkt_tbl,
+ n_left_to_next);
+ }
- if (pkts > 0)
- {
- u32 n_left_to_next, i = 0;
- u32 next0 = next_index;
- pkts_ok = drop_err_pkts (pkt_tbl, pkts);
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+ n_left = drop_err_pkts (pkt_tbl, pkts);
+ if (n_left == 0)
+ {
+ if (retry--)
+ continue;
+ else
+ break;
+ }
+ i = 0;
+
+ if (n_rx_packets == 0)
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while ((n_left >= 4) && (n_left_to_next >= 4))
+ {
+ u32 bi0 = 0, bi1 = 0, bi2 = 0, bi3 = 0;
+ vlib_buffer_t *b0, *b1, *b2, *b3;
- while ((i < pkts_ok) && (n_left_to_next))
+ b0 = (vlib_buffer_t *) odp_packet_user_area (pkt_tbl[i]);
+ b1 = (vlib_buffer_t *) odp_packet_user_area (pkt_tbl[i + 1]);
+ b2 = (vlib_buffer_t *) odp_packet_user_area (pkt_tbl[i + 2]);
+ b3 = (vlib_buffer_t *) odp_packet_user_area (pkt_tbl[i + 3]);
+ bi0 = vlib_get_buffer_index (vm, b0);
+ bi1 = vlib_get_buffer_index (vm, b1);
+ bi2 = vlib_get_buffer_index (vm, b2);
+ bi3 = vlib_get_buffer_index (vm, b3);
+
+ odp_adjust_buffer (b0, pkt_tbl[i], oif);
+ odp_adjust_buffer (b1, pkt_tbl[i + 1], oif);
+ odp_adjust_buffer (b2, pkt_tbl[i + 2], oif);
+ odp_adjust_buffer (b3, pkt_tbl[i + 3], oif);
+
+ if (PREDICT_FALSE (oif->per_interface_next_index != ~0))
+ {
+ next0 = oif->per_interface_next_index;
+ next1 = oif->per_interface_next_index;
+ next2 = oif->per_interface_next_index;
+ next3 = oif->per_interface_next_index;
+ }
+ else
+ {
+ next0 = odp_rx_next_from_etype (pkt_tbl[i], b0);
+ next1 = odp_rx_next_from_etype (pkt_tbl[i + 1], b1);
+ next2 = odp_rx_next_from_etype (pkt_tbl[i + 2], b2);
+ next3 = odp_rx_next_from_etype (pkt_tbl[i + 3], b3);
+ }
+
+ vlib_buffer_advance (b0, device_input_next_node_advance[next0]);
+ vlib_buffer_advance (b1, device_input_next_node_advance[next1]);
+ vlib_buffer_advance (b2, device_input_next_node_advance[next2]);
+ vlib_buffer_advance (b3, device_input_next_node_advance[next3]);
+
+ /* trace */
+ if (PREDICT_FALSE ((n_trace) > 0))
+ odp_trace_buffer_x4 (&n_trace, vm, node, oif, b0, b1, b2, b3,
+ next0, next1, next2, next3);
+
+ n_left_to_next -= 4;
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ to_next[2] = bi2;
+ to_next[3] = bi3;
+ to_next += 4;
+
+ /* enque and take next packet */
+ vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, bi1, bi2, bi3,
+ next0, next1, next2, next3);
+
+ /* next packet */
+ n_rx_bytes += b0->current_length;
+ n_rx_bytes += b1->current_length;
+ n_rx_bytes += b2->current_length;
+ n_rx_bytes += b3->current_length;
+ i += 4;
+ n_left -= 4;
+ n_rx_packets += 4;
+ }
+
+ while ((n_left > 0) && (n_left_to_next > 0))
{
u32 bi0 = 0;
vlib_buffer_t *b0;
@@ -209,16 +344,7 @@ odp_packet_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_buffer_advance (b0, device_input_next_node_advance[next0]);
/* trace */
- VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
- if (PREDICT_FALSE (n_trace > 0))
- {
- odp_packet_input_trace_t *tr;
- vlib_trace_buffer (vm, node, next0, b0, 0);
- vlib_set_trace_count (vm, node, --n_trace);
- tr = vlib_add_trace (vm, node, b0, sizeof (*tr));
- tr->next_index = next0;
- tr->hw_if_index = oif->hw_if_index;
- }
+ ODP_TRACE_BUFFER (n_trace, b0, next0, vm, node, oif);
n_left_to_next--;
to_next[0] = bi0;
@@ -229,21 +355,28 @@ odp_packet_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
n_left_to_next, bi0, next0);
/* next packet */
- n_rx_packets++;
n_rx_bytes += odp_packet_len (pkt_tbl[i]);
i++;
+ n_left--;
+ n_rx_packets++;
}
+ if (n_left_to_next < 4)
+ break;
+ }
+
+ if (n_rx_packets)
+ {
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ vlib_increment_combined_counter (vnet_get_main ()->
+ interface_main.combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_RX,
+ vlib_get_thread_index (),
+ oif->hw_if_index, n_rx_packets,
+ n_rx_bytes);
}
- vlib_increment_combined_counter (vnet_get_main ()->
- interface_main.combined_sw_if_counters +
- VNET_INTERFACE_COUNTER_RX,
- vlib_get_thread_index (), oif->hw_if_index,
- n_rx_packets, n_rx_bytes);
-
return n_rx_packets;
}