aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDamjan Marion <damarion@cisco.com>2016-11-22 18:21:51 -0800
committerDamjan Marion <damarion@cisco.com>2016-11-23 17:22:58 +0100
commit754962130a63487d646a9dda947aefa515c0c0b4 (patch)
treea949f2b9dd1197dc1a311e16a81dd77d4d7bfe16
parent971158ee7bc1a07d0d1468774e5dc0e388b6c449 (diff)
dpdk: input node next and error optimizations
Change-Id: Iee90ed725e824a3beed2660d21d74dae48c76eb0 Signed-off-by: Damjan Marion <damarion@cisco.com>
-rw-r--r--vnet/vnet/devices/dpdk/dpdk.h3
-rwxr-xr-xvnet/vnet/devices/dpdk/init.c5
-rw-r--r--vnet/vnet/devices/dpdk/node.c235
3 files changed, 117 insertions, 126 deletions
diff --git a/vnet/vnet/devices/dpdk/dpdk.h b/vnet/vnet/devices/dpdk/dpdk.h
index 7e5712fbce9..19c521bcf3a 100644
--- a/vnet/vnet/devices/dpdk/dpdk.h
+++ b/vnet/vnet/devices/dpdk/dpdk.h
@@ -507,9 +507,6 @@ void dpdk_rx_trace (dpdk_main_t * dm,
#define EFD_OPERATION_LESS_THAN 0
#define EFD_OPERATION_GREATER_OR_EQUAL 1
-void *dpdk_input_multiarch_select ();
-void *dpdk_input_rss_multiarch_select ();
-
format_function_t format_dpdk_device_name;
format_function_t format_dpdk_device;
format_function_t format_dpdk_tx_dma_trace;
diff --git a/vnet/vnet/devices/dpdk/init.c b/vnet/vnet/devices/dpdk/init.c
index ad3180528f4..c9e7dc8e1de 100755
--- a/vnet/vnet/devices/dpdk/init.c
+++ b/vnet/vnet/devices/dpdk/init.c
@@ -243,7 +243,6 @@ dpdk_lib_init (dpdk_main_t * dm)
clib_error_t *error;
vlib_main_t *vm = vlib_get_main ();
vlib_thread_main_t *tm = vlib_get_thread_main ();
- vlib_node_runtime_t *rt;
vnet_sw_interface_t *sw;
vnet_hw_interface_t *hi;
dpdk_device_t *xd;
@@ -259,9 +258,6 @@ dpdk_lib_init (dpdk_main_t * dm)
dm->input_cpu_first_index = 0;
dm->input_cpu_count = 1;
- rt = vlib_node_get_runtime (vm, dpdk_input_node.index);
- rt->function = dpdk_input_multiarch_select ();
-
/* find out which cpus will be used for input */
p = hash_get_mem (tm->thread_registrations_by_name, "workers");
tr = p ? (vlib_thread_registration_t *) p[0] : 0;
@@ -410,7 +406,6 @@ dpdk_lib_init (dpdk_main_t * dm)
if (devconf->num_rx_queues > 1 && dm->use_rss == 0)
{
- rt->function = dpdk_input_rss_multiarch_select ();
dm->use_rss = 1;
}
diff --git a/vnet/vnet/devices/dpdk/node.c b/vnet/vnet/devices/dpdk/node.c
index 417ac3966f3..758bcfb91ac 100644
--- a/vnet/vnet/devices/dpdk/node.c
+++ b/vnet/vnet/devices/dpdk/node.c
@@ -54,60 +54,38 @@ vlib_buffer_is_mpls (vlib_buffer_t * b)
}
always_inline void
-dpdk_rx_next_and_error_from_mb_flags_x1 (dpdk_device_t * xd,
- struct rte_mbuf *mb,
- vlib_buffer_t * b0, u32 * next0,
- u8 * error0)
+dpdk_rx_next_from_mb (struct rte_mbuf *mb, vlib_buffer_t * b0, u32 * next0)
{
u32 n0;
- uint16_t mb_flags = mb->ol_flags;
- if (PREDICT_FALSE (mb_flags & (PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD)))
+ if (PREDICT_FALSE ((mb->ol_flags & PKT_RX_VLAN_PKT)))
+ n0 = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+ else if (PREDICT_TRUE (dpdk_mbuf_is_ip4 (mb)))
+ n0 = VNET_DEVICE_INPUT_NEXT_IP4_INPUT;
+ else if (PREDICT_TRUE (dpdk_mbuf_is_ip6 (mb)))
+ n0 = VNET_DEVICE_INPUT_NEXT_IP6_INPUT;
+ else if (PREDICT_TRUE (vlib_buffer_is_mpls (b0)))
+ n0 = VNET_DEVICE_INPUT_NEXT_MPLS_INPUT;
+ else
+ n0 = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+ *next0 = n0;
+}
+
+always_inline void
+dpdk_rx_error_from_mb (struct rte_mbuf *mb, u32 * next, u8 * error)
+{
+ if (mb->ol_flags & PKT_RX_IP_CKSUM_BAD)
{
- /* some error was flagged. determine the drop reason */
- n0 = VNET_DEVICE_INPUT_NEXT_DROP;
- *error0 =
- (mb_flags & PKT_RX_IP_CKSUM_BAD) ? DPDK_ERROR_IP_CHECKSUM_ERROR :
- (mb_flags & PKT_RX_L4_CKSUM_BAD) ? DPDK_ERROR_L4_CHECKSUM_ERROR :
- DPDK_ERROR_NONE;
+ *error = DPDK_ERROR_IP_CHECKSUM_ERROR;
+ *next = VNET_DEVICE_INPUT_NEXT_DROP;
}
- else
+ else if (mb->ol_flags & PKT_RX_L4_CKSUM_BAD)
{
- *error0 = DPDK_ERROR_NONE;
- if (PREDICT_FALSE (xd->per_interface_next_index != ~0))
- {
- n0 = xd->per_interface_next_index;
- b0->flags |= BUFFER_HANDOFF_NEXT_VALID;
- if (PREDICT_TRUE (dpdk_mbuf_is_ip4 (mb)))
- vnet_buffer (b0)->handoff.next_index =
- HANDOFF_DISPATCH_NEXT_IP4_INPUT;
- else if (PREDICT_TRUE (dpdk_mbuf_is_ip6 (mb)))
- vnet_buffer (b0)->handoff.next_index =
- HANDOFF_DISPATCH_NEXT_IP6_INPUT;
- else if (PREDICT_TRUE (vlib_buffer_is_mpls (b0)))
- vnet_buffer (b0)->handoff.next_index =
- HANDOFF_DISPATCH_NEXT_MPLS_INPUT;
- else
- vnet_buffer (b0)->handoff.next_index =
- HANDOFF_DISPATCH_NEXT_ETHERNET_INPUT;
- }
- else
- if (PREDICT_FALSE ((xd->flags & DPDK_DEVICE_FLAG_HAVE_SUBIF) ||
- (mb_flags & PKT_RX_VLAN_PKT)))
- n0 = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
- else
- {
- if (PREDICT_TRUE (dpdk_mbuf_is_ip4 (mb)))
- n0 = VNET_DEVICE_INPUT_NEXT_IP4_INPUT;
- else if (PREDICT_TRUE (dpdk_mbuf_is_ip6 (mb)))
- n0 = VNET_DEVICE_INPUT_NEXT_IP6_INPUT;
- else if (PREDICT_TRUE (vlib_buffer_is_mpls (b0)))
- n0 = VNET_DEVICE_INPUT_NEXT_MPLS_INPUT;
- else
- n0 = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
- }
+ *error = DPDK_ERROR_L4_CHECKSUM_ERROR;
+ *next = VNET_DEVICE_INPUT_NEXT_DROP;
}
- *next0 = n0;
+ else
+ *error = DPDK_ERROR_NONE;
}
void
@@ -136,7 +114,15 @@ dpdk_rx_trace (dpdk_main_t * dm,
b0 = vlib_get_buffer (vm, bi0);
mb = rte_mbuf_from_vlib_buffer (b0);
- dpdk_rx_next_and_error_from_mb_flags_x1 (xd, mb, b0, &next0, &error0);
+
+ if (PREDICT_FALSE (xd->per_interface_next_index != ~0))
+ next0 = xd->per_interface_next_index;
+ else if (PREDICT_FALSE (xd->flags & DPDK_DEVICE_FLAG_HAVE_SUBIF))
+ next0 = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+ else
+ dpdk_rx_next_from_mb (mb, b0, &next0);
+ dpdk_rx_error_from_mb (mb, &next0, &error0);
+
vlib_trace_buffer (vm, node, next0, b0, /* follow_chain */ 0);
t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
t0->queue_index = queue_id;
@@ -188,7 +174,8 @@ dpdk_rx_burst (dpdk_main_t * dm, dpdk_device_t * xd, u16 queue_id)
static_always_inline void
-dpdk_process_subseq_segs (vlib_main_t *vm, vlib_buffer_t *b, struct rte_mbuf * mb, vlib_buffer_free_list_t *fl)
+dpdk_process_subseq_segs (vlib_main_t * vm, vlib_buffer_t * b,
+ struct rte_mbuf *mb, vlib_buffer_free_list_t * fl)
{
u8 nb_seg = 1;
struct rte_mbuf *mb_seg = 0;
@@ -228,20 +215,18 @@ dpdk_process_subseq_segs (vlib_main_t *vm, vlib_buffer_t *b, struct rte_mbuf * m
static_always_inline void
dpdk_prefetch_buffer (struct rte_mbuf *mb)
{
- vlib_buffer_t *b = vlib_buffer_from_rte_mbuf (mb);
- CLIB_PREFETCH (mb, CLIB_CACHE_LINE_BYTES, LOAD);
- CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, STORE);
+ vlib_buffer_t *b = vlib_buffer_from_rte_mbuf (mb);
+ CLIB_PREFETCH (mb, CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, STORE);
}
/*
* This function is used when there are no worker threads.
* The main thread performs IO and forwards the packets.
*/
-static inline u32
-dpdk_device_input (dpdk_main_t * dm,
- dpdk_device_t * xd,
- vlib_node_runtime_t * node,
- u32 cpu_index, u16 queue_id)
+static_always_inline u32
+dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd,
+ vlib_node_runtime_t * node, u32 cpu_index, u16 queue_id)
{
u32 n_buffers;
u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
@@ -275,9 +260,10 @@ dpdk_device_input (dpdk_main_t * dm,
while (n--)
{
- struct rte_mbuf *mb = xd->rx_vectors[queue_id][mb_index++];
- vlib_buffer_t * b = vlib_buffer_from_rte_mbuf (mb);
- vec_add1 (xd->d_trace_buffers[cpu_index], vlib_get_buffer_index (vm, b));
+ struct rte_mbuf *mb = xd->rx_vectors[queue_id][mb_index++];
+ vlib_buffer_t *b = vlib_buffer_from_rte_mbuf (mb);
+ vec_add1 (xd->d_trace_buffers[cpu_index],
+ vlib_get_buffer_index (vm, b));
}
}
@@ -293,6 +279,7 @@ dpdk_device_input (dpdk_main_t * dm,
u32 bi2, next2, l3_offset2;
u32 bi3, next3, l3_offset3;
u8 error0, error1, error2, error3;
+ u64 or_ol_flags;
vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
@@ -325,6 +312,8 @@ dpdk_device_input (dpdk_main_t * dm,
ASSERT (mb2);
ASSERT (mb3);
+ or_ol_flags = (mb0->ol_flags | mb1->ol_flags |
+ mb2->ol_flags | mb3->ol_flags);
b0 = vlib_buffer_from_rte_mbuf (mb0);
b1 = vlib_buffer_from_rte_mbuf (mb1);
b2 = vlib_buffer_from_rte_mbuf (mb2);
@@ -344,28 +333,58 @@ dpdk_device_input (dpdk_main_t * dm,
to_next[1] = bi1;
to_next[2] = bi2;
to_next[3] = bi3;
- to_next+=4;
- n_left_to_next-=4;
+ to_next += 4;
+ n_left_to_next -= 4;
- dpdk_rx_next_and_error_from_mb_flags_x1 (xd, mb0, b0, &next0, &error0);
- dpdk_rx_next_and_error_from_mb_flags_x1 (xd, mb1, b1, &next1, &error1);
- dpdk_rx_next_and_error_from_mb_flags_x1 (xd, mb2, b2, &next2, &error2);
- dpdk_rx_next_and_error_from_mb_flags_x1 (xd, mb3, b3, &next3, &error3);
+ if (PREDICT_FALSE (xd->per_interface_next_index != ~0))
+ {
+ next0 = next1 = next2 = next3 = xd->per_interface_next_index;
+ }
+ else if (PREDICT_FALSE (xd->flags & DPDK_DEVICE_FLAG_HAVE_SUBIF))
+ {
+ next0 = next1 = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+ next2 = next3 = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+ }
+ else
+ {
+ dpdk_rx_next_from_mb (mb0, b0, &next0);
+ dpdk_rx_next_from_mb (mb1, b1, &next1);
+ dpdk_rx_next_from_mb (mb2, b2, &next2);
+ dpdk_rx_next_from_mb (mb3, b3, &next3);
+ }
- b0->error = node->errors[error0];
- b1->error = node->errors[error1];
- b2->error = node->errors[error2];
- b3->error = node->errors[error3];
+ if (PREDICT_FALSE (or_ol_flags & (PKT_RX_IP_CKSUM_BAD |
+ PKT_RX_L4_CKSUM_BAD)))
+ {
+ dpdk_rx_error_from_mb (mb0, &next0, &error0);
+ dpdk_rx_error_from_mb (mb1, &next1, &error1);
+ dpdk_rx_error_from_mb (mb2, &next2, &error2);
+ dpdk_rx_error_from_mb (mb3, &next3, &error3);
+ b0->error = node->errors[error0];
+ b1->error = node->errors[error1];
+ b2->error = node->errors[error2];
+ b3->error = node->errors[error3];
+ }
+ else
+ {
+ b0->error = b1->error = node->errors[DPDK_ERROR_NONE];
+ b2->error = b3->error = node->errors[DPDK_ERROR_NONE];
+ }
l3_offset0 = device_input_next_node_advance[next0];
l3_offset1 = device_input_next_node_advance[next1];
l3_offset2 = device_input_next_node_advance[next2];
l3_offset3 = device_input_next_node_advance[next3];
- b0->current_data = l3_offset0 + mb0->data_off - RTE_PKTMBUF_HEADROOM;
- b1->current_data = l3_offset1 + mb1->data_off - RTE_PKTMBUF_HEADROOM;
- b2->current_data = l3_offset2 + mb2->data_off - RTE_PKTMBUF_HEADROOM;
- b3->current_data = l3_offset3 + mb3->data_off - RTE_PKTMBUF_HEADROOM;
+ b0->current_data = l3_offset0 + mb0->data_off;
+ b1->current_data = l3_offset1 + mb1->data_off;
+ b2->current_data = l3_offset2 + mb2->data_off;
+ b3->current_data = l3_offset3 + mb3->data_off;
+
+ b0->current_data -= RTE_PKTMBUF_HEADROOM;
+ b1->current_data -= RTE_PKTMBUF_HEADROOM;
+ b2->current_data -= RTE_PKTMBUF_HEADROOM;
+ b3->current_data -= RTE_PKTMBUF_HEADROOM;
b0->current_length = mb0->data_len - l3_offset0;
b1->current_length = mb1->data_len - l3_offset1;
@@ -411,7 +430,7 @@ dpdk_device_input (dpdk_main_t * dm,
VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b2);
VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b3);
- /* Do we have any driver RX features configured on the interface? */
+ /* Do we have any driver RX features configured on the interface? */
vnet_feature_start_device_input_x4 (xd->vlib_sw_if_index,
&next0, &next1, &next2, &next3,
b0, b1, b2, b3,
@@ -422,8 +441,8 @@ dpdk_device_input (dpdk_main_t * dm,
to_next, n_left_to_next,
bi0, bi1, bi2, bi3,
next0, next1, next2, next3);
- n_buffers-=4;
- mb_index+=4;
+ n_buffers -= 4;
+ mb_index += 4;
}
while (n_buffers > 0 && n_left_to_next > 0)
{
@@ -445,8 +464,13 @@ dpdk_device_input (dpdk_main_t * dm,
to_next++;
n_left_to_next--;
- dpdk_rx_next_and_error_from_mb_flags_x1 (xd, mb0, b0,
- &next0, &error0);
+ if (PREDICT_FALSE (xd->per_interface_next_index != ~0))
+ next0 = xd->per_interface_next_index;
+ else if (PREDICT_FALSE (xd->flags & DPDK_DEVICE_FLAG_HAVE_SUBIF))
+ next0 = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+ else
+ dpdk_rx_next_from_mb (mb0, b0, &next0);
+ dpdk_rx_error_from_mb (mb0, &next0, &error0);
b0->error = node->errors[error0];
l3_offset0 = device_input_next_node_advance[next0];
@@ -471,8 +495,9 @@ dpdk_device_input (dpdk_main_t * dm,
*/
VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
- /* Do we have any driver RX features configured on the interface? */
- vnet_feature_start_device_input_x1 (xd->vlib_sw_if_index, &next0, b0, l3_offset0);
+ /* Do we have any driver RX features configured on the interface? */
+ vnet_feature_start_device_input_x1 (xd->vlib_sw_if_index, &next0,
+ b0, l3_offset0);
vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
to_next, n_left_to_next,
@@ -487,8 +512,8 @@ dpdk_device_input (dpdk_main_t * dm,
{
dpdk_rx_trace (dm, node, xd, queue_id, xd->d_trace_buffers[cpu_index],
vec_len (xd->d_trace_buffers[cpu_index]));
- vlib_set_trace_count (vm, node,
- n_trace - vec_len (xd->d_trace_buffers[cpu_index]));
+ vlib_set_trace_count (vm, node, n_trace -
+ vec_len (xd->d_trace_buffers[cpu_index]));
}
vlib_increment_combined_counter
@@ -578,33 +603,6 @@ dpdk_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f)
vec_foreach (dq, dm->devices_by_cpu[cpu_index])
{
xd = vec_elt_at_index(dm->devices, dq->device);
- ASSERT(dq->queue_id == 0);
- n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, 0);
- }
- /* *INDENT-ON* */
-
- poll_rate_limit (dm);
-
- return n_rx_packets;
-}
-
-uword
-dpdk_input_rss (vlib_main_t * vm,
- vlib_node_runtime_t * node, vlib_frame_t * f)
-{
- dpdk_main_t *dm = &dpdk_main;
- dpdk_device_t *xd;
- uword n_rx_packets = 0;
- dpdk_device_and_queue_t *dq;
- u32 cpu_index = os_get_cpu_number ();
-
- /*
- * Poll all devices on this cpu for input/interrupts.
- */
- /* *INDENT-OFF* */
- vec_foreach (dq, dm->devices_by_cpu[cpu_index])
- {
- xd = vec_elt_at_index(dm->devices, dq->device);
n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, dq->queue_id);
}
/* *INDENT-ON* */
@@ -631,12 +629,13 @@ VLIB_REGISTER_NODE (dpdk_input_node) = {
.error_strings = dpdk_error_strings,
};
+VLIB_NODE_FUNCTION_MULTIARCH (dpdk_input_node, dpdk_input);
+/* *INDENT-ON* */
-/* handle dpdk_input_rss alternative function */
-VLIB_NODE_FUNCTION_MULTIARCH_CLONE(dpdk_input)
-VLIB_NODE_FUNCTION_MULTIARCH_CLONE(dpdk_input_rss)
-
-/* this macro defines dpdk_input_rss_multiarch_select() */
-CLIB_MULTIARCH_SELECT_FN(dpdk_input);
-CLIB_MULTIARCH_SELECT_FN(dpdk_input_rss);
-
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */