aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichal Mazur <mkm@semihalf.com>2017-09-11 16:20:21 +0200
committerMichal Mazur <mkm@semihalf.com>2017-12-14 15:18:05 +0100
commit0c552959dc425c5f00499d6a587d16275a6399db (patch)
tree05fddc1ee2c881570bb80b2a365027297113d62e
parentd830f80364a33f81c4b534eca902ac7600cbfc05 (diff)
Optimize Rx and Tx paths
1) Handle multiple ODP packets at once in receive loop 2) Wait to collect as many RX buffers as possible in single vector 3) Add prefetch of received and transmitted buffers 4) Disable parser, classifier and synchronization of RX queues Synchronization of Tx queues can also be disabled if 2 ports used. Change-Id: I65ed49ef2b60278022712e10a83f6ca24360694e Signed-off-by: Michal Mazur <mkm@semihalf.com>
-rwxr-xr-xsrc/plugins/odp/device.c10
-rwxr-xr-xsrc/plugins/odp/node.c217
-rwxr-xr-xsrc/plugins/odp/odp_packet.c15
3 files changed, 199 insertions, 43 deletions
diff --git a/src/plugins/odp/device.c b/src/plugins/odp/device.c
index e904f65..3e11106 100755
--- a/src/plugins/odp/device.c
+++ b/src/plugins/odp/device.c
@@ -31,6 +31,16 @@ static char *odp_packet_tx_func_error_strings[] = {
#undef _
};
+static_always_inline void
+odp_prefetch_buffer_by_index (vlib_main_t * vm, u32 bi)
+{
+ vlib_buffer_t *b;
+ odp_packet_t pkt;
+ b = vlib_get_buffer (vm, bi);
+ pkt = odp_packet_from_vlib_buffer (b);
+ CLIB_PREFETCH (pkt, CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, LOAD);
+}
static u8 *
format_odp_packet_device_name (u8 * s, va_list * args)
diff --git a/src/plugins/odp/node.c b/src/plugins/odp/node.c
index 7e13095..a04ee47 100755
--- a/src/plugins/odp/node.c
+++ b/src/plugins/odp/node.c
@@ -48,7 +48,24 @@ format_odp_packet_input_trace (u8 * s, va_list * args)
return s;
}
-int
+static_always_inline void
+odp_prefetch_buffer (odp_packet_t pkt)
+{
+ vlib_buffer_t *b = (vlib_buffer_t *) odp_packet_user_area (pkt);
+ CLIB_PREFETCH (pkt, CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, STORE);
+}
+
+static_always_inline void
+odp_prefetch_ethertype (odp_packet_t pkt)
+{
+ vlib_buffer_t *b = (vlib_buffer_t *) odp_packet_user_area (pkt);
+ CLIB_PREFETCH (vlib_buffer_get_current (b) +
+ STRUCT_OFFSET_OF (ethernet_header_t, type),
+ CLIB_CACHE_LINE_BYTES, LOAD);
+}
+
+always_inline int
odp_packet_queue_mode (odp_pktio_t pktio, u32 mode, odp_packet_t pkt_tbl[])
{
u32 num_evts = 0, num_pkts = 0;
@@ -60,7 +77,7 @@ odp_packet_queue_mode (odp_pktio_t pktio, u32 mode, odp_packet_t pkt_tbl[])
if (pktio == ODP_PKTIO_INVALID)
{
clib_warning ("odp_pktio_lookup() failed");
- return -1;
+ return 0;
}
inq = ODP_QUEUE_INVALID;
@@ -68,7 +85,7 @@ odp_packet_queue_mode (odp_pktio_t pktio, u32 mode, odp_packet_t pkt_tbl[])
(odp_pktin_event_queue (pktio, &inq, 1) != 1))
{
clib_warning ("Error:no input queue");
- return -1;
+ return 0;
}
while (num_evts < VLIB_FRAME_SIZE)
@@ -96,9 +113,9 @@ odp_packet_queue_mode (odp_pktio_t pktio, u32 mode, odp_packet_t pkt_tbl[])
return num_pkts;
}
-int
+always_inline int
odp_packet_burst_mode (odp_pktio_t pktio, odp_pktin_queue_t pktin,
- odp_packet_t pkt_tbl[])
+ odp_packet_t pkt_tbl[], u32 req_pkts)
{
u32 num_pkts = 0;
int ret;
@@ -106,13 +123,13 @@ odp_packet_burst_mode (odp_pktio_t pktio, odp_pktin_queue_t pktin,
if (odp_pktin_queue (pktio, &pktin, 1) != 1)
{
clib_warning ("odp_pktio_open() failed: no pktin queue");
- return -1;
+ return 0;
}
- while (num_pkts < VLIB_FRAME_SIZE)
+ while (num_pkts < req_pkts)
{
ret = odp_pktin_recv (pktin, &pkt_tbl[num_pkts],
- VLIB_FRAME_SIZE - num_pkts);
+ req_pkts - num_pkts);
if (ret <= 0)
break;
num_pkts += ret;
@@ -155,6 +172,43 @@ odp_rx_next_from_etype (void *mb, vlib_buffer_t * b0)
return VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
}
+static_always_inline void
+odp_adjust_buffer (vlib_buffer_t * buf, odp_packet_t pkt,
+ odp_packet_if_t * oif)
+{
+ buf->current_length = odp_packet_len (pkt);
+ buf->current_data = 0;
+ buf->total_length_not_including_first_buffer = 0;
+ buf->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ vnet_buffer (buf)->sw_if_index[VLIB_RX] = oif->sw_if_index;
+ vnet_buffer (buf)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+}
+
+#define ODP_TRACE_BUFFER(n_trace, b0, next0, vm, node, oif) \
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); \
+ if (PREDICT_FALSE ((n_trace) > 0)) \
+ { \
+ odp_packet_input_trace_t *tr; \
+ vlib_trace_buffer (vm, node, next0, b0, 0); \
+ vlib_set_trace_count (vm, node, --(n_trace)); \
+ tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); \
+ tr->next_index = next0; \
+ tr->hw_if_index = (oif)->hw_if_index; \
+ }
+
+void
+odp_trace_buffer_x4 (uword * n_trace, vlib_main_t * vm,
+ vlib_node_runtime_t * node, odp_packet_if_t * oif,
+ vlib_buffer_t * b0, vlib_buffer_t * b1,
+ vlib_buffer_t * b2, vlib_buffer_t * b3, u32 next0,
+ u32 next1, u32 next2, u32 next3)
+{
+ ODP_TRACE_BUFFER (*n_trace, b0, next0, vm, node, oif);
+ ODP_TRACE_BUFFER (*n_trace, b1, next1, vm, node, oif);
+ ODP_TRACE_BUFFER (*n_trace, b2, next2, vm, node, oif);
+ ODP_TRACE_BUFFER (*n_trace, b3, next3, vm, node, oif);
+}
+
always_inline uword
odp_packet_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame, odp_packet_if_t * oif)
@@ -166,26 +220,107 @@ odp_packet_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
u32 *to_next = 0;
odp_pktin_queue_t pktin = { 0 };
odp_packet_t pkt_tbl[VLIB_FRAME_SIZE];
- u32 pkts = 0, pkts_ok = 0;
-
- if ((oif->mode == (APPL_MODE_PKT_QUEUE)) ||
- (oif->mode == (APPL_MODE_PKT_SCHED)))
+ int pkts = 0, i;
+ u32 retry = 8;
+ u32 n_left = 0, n_left_to_next = VLIB_FRAME_SIZE;
+ u32 next0 = next_index;
+ u32 next1 = next_index;
+ u32 next2 = next_index;
+ u32 next3 = next_index;
+
+ while (1)
{
- pkts = odp_packet_queue_mode (oif->pktio, oif->mode, pkt_tbl);
- }
- else
- {
- pkts = odp_packet_burst_mode (oif->pktio, pktin, pkt_tbl);
- }
+ if ((oif->mode == (APPL_MODE_PKT_QUEUE)) ||
+ (oif->mode == (APPL_MODE_PKT_SCHED)))
+ {
+ pkts = odp_packet_queue_mode (oif->pktio, oif->mode, pkt_tbl);
+ }
+ else
+ {
+ pkts = odp_packet_burst_mode (oif->pktio, pktin, pkt_tbl,
+ n_left_to_next);
+ }
- if (pkts > 0)
- {
- u32 n_left_to_next, i = 0;
- u32 next0 = next_index;
- pkts_ok = drop_err_pkts (pkt_tbl, pkts);
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+ n_left = drop_err_pkts (pkt_tbl, pkts);
+ if (n_left == 0)
+ {
+ if (retry--)
+ continue;
+ else
+ break;
+ }
+ i = 0;
+
+ if (n_rx_packets == 0)
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while ((n_left >= 4) && (n_left_to_next >= 4))
+ {
+ u32 bi0 = 0, bi1 = 0, bi2 = 0, bi3 = 0;
+ vlib_buffer_t *b0, *b1, *b2, *b3;
- while ((i < pkts_ok) && (n_left_to_next))
+ b0 = (vlib_buffer_t *) odp_packet_user_area (pkt_tbl[i]);
+ b1 = (vlib_buffer_t *) odp_packet_user_area (pkt_tbl[i + 1]);
+ b2 = (vlib_buffer_t *) odp_packet_user_area (pkt_tbl[i + 2]);
+ b3 = (vlib_buffer_t *) odp_packet_user_area (pkt_tbl[i + 3]);
+ bi0 = vlib_get_buffer_index (vm, b0);
+ bi1 = vlib_get_buffer_index (vm, b1);
+ bi2 = vlib_get_buffer_index (vm, b2);
+ bi3 = vlib_get_buffer_index (vm, b3);
+
+ odp_adjust_buffer (b0, pkt_tbl[i], oif);
+ odp_adjust_buffer (b1, pkt_tbl[i + 1], oif);
+ odp_adjust_buffer (b2, pkt_tbl[i + 2], oif);
+ odp_adjust_buffer (b3, pkt_tbl[i + 3], oif);
+
+ if (PREDICT_FALSE (oif->per_interface_next_index != ~0))
+ {
+ next0 = oif->per_interface_next_index;
+ next1 = oif->per_interface_next_index;
+ next2 = oif->per_interface_next_index;
+ next3 = oif->per_interface_next_index;
+ }
+ else
+ {
+ next0 = odp_rx_next_from_etype (pkt_tbl[i], b0);
+ next1 = odp_rx_next_from_etype (pkt_tbl[i + 1], b1);
+ next2 = odp_rx_next_from_etype (pkt_tbl[i + 2], b2);
+ next3 = odp_rx_next_from_etype (pkt_tbl[i + 3], b3);
+ }
+
+ vlib_buffer_advance (b0, device_input_next_node_advance[next0]);
+ vlib_buffer_advance (b1, device_input_next_node_advance[next1]);
+ vlib_buffer_advance (b2, device_input_next_node_advance[next2]);
+ vlib_buffer_advance (b3, device_input_next_node_advance[next3]);
+
+ /* trace */
+ if (PREDICT_FALSE ((n_trace) > 0))
+ odp_trace_buffer_x4 (&n_trace, vm, node, oif, b0, b1, b2, b3,
+ next0, next1, next2, next3);
+
+ n_left_to_next -= 4;
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ to_next[2] = bi2;
+ to_next[3] = bi3;
+ to_next += 4;
+
+ /* enque and take next packet */
+ vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, bi1, bi2, bi3,
+ next0, next1, next2, next3);
+
+ /* next packet */
+ n_rx_bytes += b0->current_length;
+ n_rx_bytes += b1->current_length;
+ n_rx_bytes += b2->current_length;
+ n_rx_bytes += b3->current_length;
+ i += 4;
+ n_left -= 4;
+ n_rx_packets += 4;
+ }
+
+ while ((n_left > 0) && (n_left_to_next > 0))
{
u32 bi0 = 0;
vlib_buffer_t *b0;
@@ -209,16 +344,7 @@ odp_packet_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_buffer_advance (b0, device_input_next_node_advance[next0]);
/* trace */
- VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
- if (PREDICT_FALSE (n_trace > 0))
- {
- odp_packet_input_trace_t *tr;
- vlib_trace_buffer (vm, node, next0, b0, 0);
- vlib_set_trace_count (vm, node, --n_trace);
- tr = vlib_add_trace (vm, node, b0, sizeof (*tr));
- tr->next_index = next0;
- tr->hw_if_index = oif->hw_if_index;
- }
+ ODP_TRACE_BUFFER (n_trace, b0, next0, vm, node, oif);
n_left_to_next--;
to_next[0] = bi0;
@@ -229,21 +355,28 @@ odp_packet_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
n_left_to_next, bi0, next0);
/* next packet */
- n_rx_packets++;
n_rx_bytes += odp_packet_len (pkt_tbl[i]);
i++;
+ n_left--;
+ n_rx_packets++;
}
+ if (n_left_to_next < 4)
+ break;
+ }
+
+ if (n_rx_packets)
+ {
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ vlib_increment_combined_counter (vnet_get_main ()->
+ interface_main.combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_RX,
+ vlib_get_thread_index (),
+ oif->hw_if_index, n_rx_packets,
+ n_rx_bytes);
}
- vlib_increment_combined_counter (vnet_get_main ()->
- interface_main.combined_sw_if_counters +
- VNET_INTERFACE_COUNTER_RX,
- vlib_get_thread_index (), oif->hw_if_index,
- n_rx_packets, n_rx_bytes);
-
return n_rx_packets;
}
diff --git a/src/plugins/odp/odp_packet.c b/src/plugins/odp/odp_packet.c
index 7fb7d4b..70f7452 100755
--- a/src/plugins/odp/odp_packet.c
+++ b/src/plugins/odp/odp_packet.c
@@ -67,6 +67,8 @@ create_pktio (const char *dev, odp_pool_t pool, u32 mode)
int ret;
odp_pktio_param_t pktio_param;
odp_pktin_queue_param_t pktin_param;
+ odp_pktout_queue_param_t pktout_param;
+ odp_pktio_config_t pktio_config;
odp_pktio_param_init (&pktio_param);
@@ -93,7 +95,13 @@ create_pktio (const char *dev, odp_pool_t pool, u32 mode)
clib_warning ("Error: pktio create failed for %s", dev);
}
+ odp_pktio_config_init (&pktio_config);
+ pktio_config.parser.layer = ODP_PKTIO_PARSER_LAYER_NONE;
+ odp_pktio_config (pktio, &pktio_config);
+
odp_pktin_queue_param_init (&pktin_param);
+ pktin_param.classifier_enable = 0;
+ pktin_param.op_mode = ODP_PKTIO_OP_MT_UNSAFE;
if (mode == APPL_MODE_PKT_SCHED)
pktin_param.queue_param.sched.sync = ODP_SCHED_SYNC_ATOMIC;
@@ -103,7 +111,12 @@ create_pktio (const char *dev, odp_pool_t pool, u32 mode)
clib_warning ("Error: pktin config failed");
}
- if (odp_pktout_queue_config (pktio, NULL))
+ odp_pktout_queue_param_init (&pktout_param);
+ /* TODO use multiple output queue and no synchronization
+ pktout_param.op_mode = ODP_PKTIO_OP_MT_UNSAFE;
+ */
+
+ if (odp_pktout_queue_config (pktio, &pktout_param))
{
clib_warning ("Error: pktout config failed");
}