aboutsummaryrefslogtreecommitdiffstats
path: root/src/plugins/dpdk
diff options
context:
space:
mode:
authorDamjan Marion <damarion@cisco.com>2017-11-10 21:55:45 +0100
committerDave Barach <openvpp@barachs.net>2017-11-13 16:14:50 +0000
commit04f3db3847d242857b9d9d858bcdca538a1be7d7 (patch)
treee0304ad2a3986698d62e9a164f5a0a28565cef01 /src/plugins/dpdk
parent2d8bf304230102a6d9b312b98315418617798175 (diff)
dpdk: introduce AVX512 variants of node functions
Change-Id: If581feca0d51d0420c971801aecdf9250c671b36 Signed-off-by: Damjan Marion <damarion@cisco.com>
Diffstat (limited to 'src/plugins/dpdk')
-rw-r--r--src/plugins/dpdk/buffer.c64
-rw-r--r--src/plugins/dpdk/device/device.c30
-rw-r--r--src/plugins/dpdk/device/node.c64
3 files changed, 98 insertions, 60 deletions
diff --git a/src/plugins/dpdk/buffer.c b/src/plugins/dpdk/buffer.c
index 555b1109c19..80c6442f463 100644
--- a/src/plugins/dpdk/buffer.c
+++ b/src/plugins/dpdk/buffer.c
@@ -124,6 +124,7 @@ next:
}
}
+#ifndef CLIB_MULTIARCH_VARIANT
static void
del_free_list (vlib_main_t * vm, vlib_buffer_free_list_t * f)
{
@@ -176,6 +177,7 @@ dpdk_buffer_delete_free_list (vlib_main_t * vm, u32 free_list_index)
pool_put (bm->buffer_free_list_pool, f);
}
}
+#endif
/* Make sure free list has at least given number of free buffers. */
static uword
@@ -253,10 +255,7 @@ fill_free_list (vlib_main_t * vm,
fl->buffers[f++] = bi2;
fl->buffers[f++] = bi3;
- clib_memcpy (b0, &bt, sizeof (vlib_buffer_t));
- clib_memcpy (b1, &bt, sizeof (vlib_buffer_t));
- clib_memcpy (b2, &bt, sizeof (vlib_buffer_t));
- clib_memcpy (b3, &bt, sizeof (vlib_buffer_t));
+ clib_memcpy64_x4 (b0, b1, b2, b3, &bt);
if (fl->buffer_init_function)
{
@@ -317,7 +316,8 @@ alloc_from_free_list (vlib_main_t * vm,
Returns number actually allocated which will be either zero or
number requested. */
u32
-dpdk_buffer_alloc (vlib_main_t * vm, u32 * buffers, u32 n_buffers)
+CLIB_MULTIARCH_FN (dpdk_buffer_alloc) (vlib_main_t * vm, u32 * buffers,
+ u32 n_buffers)
{
vlib_buffer_main_t *bm = vm->buffer_main;
@@ -330,9 +330,10 @@ dpdk_buffer_alloc (vlib_main_t * vm, u32 * buffers, u32 n_buffers)
u32
-dpdk_buffer_alloc_from_free_list (vlib_main_t * vm,
- u32 * buffers,
- u32 n_buffers, u32 free_list_index)
+CLIB_MULTIARCH_FN (dpdk_buffer_alloc_from_free_list) (vlib_main_t * vm,
+ u32 * buffers,
+ u32 n_buffers,
+ u32 free_list_index)
{
vlib_buffer_main_t *bm = vm->buffer_main;
vlib_buffer_free_list_t *f;
@@ -455,20 +456,23 @@ vlib_buffer_free_inline (vlib_main_t * vm,
}
}
-static void
-dpdk_buffer_free (vlib_main_t * vm, u32 * buffers, u32 n_buffers)
+void
+CLIB_MULTIARCH_FN (dpdk_buffer_free) (vlib_main_t * vm, u32 * buffers,
+ u32 n_buffers)
{
vlib_buffer_free_inline (vm, buffers, n_buffers, /* follow_buffer_next */
1);
}
-static void
-dpdk_buffer_free_no_next (vlib_main_t * vm, u32 * buffers, u32 n_buffers)
+void
+CLIB_MULTIARCH_FN (dpdk_buffer_free_no_next) (vlib_main_t * vm, u32 * buffers,
+ u32 n_buffers)
{
vlib_buffer_free_inline (vm, buffers, n_buffers, /* follow_buffer_next */
0);
}
+#ifndef CLIB_MULTIARCH_VARIANT
static void
dpdk_packet_template_init (vlib_main_t * vm,
void *vt,
@@ -682,6 +686,42 @@ VLIB_BUFFER_REGISTER_CALLBACKS (dpdk, static) = {
};
/* *INDENT-ON* */
+#if __x86_64__
+vlib_buffer_alloc_cb_t __clib_weak dpdk_buffer_alloc_avx512;
+vlib_buffer_alloc_cb_t __clib_weak dpdk_buffer_alloc_avx2;
+vlib_buffer_alloc_from_free_list_cb_t __clib_weak
+ dpdk_buffer_alloc_from_free_list_avx512;
+vlib_buffer_alloc_from_free_list_cb_t __clib_weak
+ dpdk_buffer_alloc_from_free_list_avx2;
+vlib_buffer_free_cb_t __clib_weak dpdk_buffer_free_cb_avx512;
+vlib_buffer_free_cb_t __clib_weak dpdk_buffer_free_cb_avx2;
+vlib_buffer_free_no_next_cb_t __clib_weak dpdk_buffer_free_no_next_cb_avx512;
+vlib_buffer_free_no_next_cb_t __clib_weak dpdk_buffer_free_no_next_cb_avx2;
+
+static void __clib_constructor
+dpdk_input_multiarch_select (void)
+{
+ vlib_buffer_callbacks_t *cb = &__dpdk_buffer_callbacks;
+ if (dpdk_buffer_alloc_avx512 && clib_cpu_supports_avx512f ())
+ {
+ cb->vlib_buffer_alloc_cb = dpdk_buffer_alloc_avx512;
+ cb->vlib_buffer_alloc_from_free_list_cb =
+ dpdk_buffer_alloc_from_free_list_avx512;
+ cb->vlib_buffer_free_cb = dpdk_buffer_free_cb_avx512;
+ cb->vlib_buffer_free_no_next_cb = dpdk_buffer_free_no_next_cb_avx512;
+ }
+ else if (dpdk_buffer_alloc_avx2 && clib_cpu_supports_avx2 ())
+ {
+ cb->vlib_buffer_alloc_cb = dpdk_buffer_alloc_avx2;
+ cb->vlib_buffer_alloc_from_free_list_cb =
+ dpdk_buffer_alloc_from_free_list_avx2;
+ cb->vlib_buffer_free_cb = dpdk_buffer_free_cb_avx2;
+ cb->vlib_buffer_free_no_next_cb = dpdk_buffer_free_no_next_cb_avx2;
+ }
+}
+#endif
+#endif
+
/** @endcond */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/dpdk/device/device.c b/src/plugins/dpdk/device/device.c
index aa134327373..987596ead9b 100644
--- a/src/plugins/dpdk/device/device.c
+++ b/src/plugins/dpdk/device/device.c
@@ -38,6 +38,7 @@ typedef enum
DPDK_TX_FUNC_N_ERROR,
} dpdk_tx_func_error_t;
+#ifndef CLIB_MULTIARCH_VARIANT
static char *dpdk_tx_func_error_strings[] = {
#define _(n,s) s,
foreach_dpdk_tx_func_error
@@ -65,8 +66,9 @@ dpdk_set_mac_address (vnet_hw_interface_t * hi, char *address)
return NULL;
}
}
+#endif
-struct rte_mbuf *
+static struct rte_mbuf *
dpdk_replicate_packet_mb (vlib_buffer_t * b)
{
dpdk_main_t *dm = &dpdk_main;
@@ -368,9 +370,10 @@ dpdk_buffer_tx_offload (dpdk_device_t * xd, vlib_buffer_t * b,
* rte_mbuf pointers. It then passes this vector to tx_burst_vector_internal
* which calls the dpdk tx_burst function.
*/
-static uword
-dpdk_interface_tx (vlib_main_t * vm,
- vlib_node_runtime_t * node, vlib_frame_t * f)
+uword
+CLIB_MULTIARCH_FN (dpdk_interface_tx) (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * f)
{
dpdk_main_t *dm = &dpdk_main;
vnet_interface_output_runtime_t *rd = (void *) node->runtime_data;
@@ -632,6 +635,7 @@ dpdk_interface_tx (vlib_main_t * vm,
return tx_pkts;
}
+#ifndef CLIB_MULTIARCH_VARIANT
static void
dpdk_clear_hw_interface_counters (u32 instance)
{
@@ -789,12 +793,25 @@ VNET_DEVICE_CLASS (dpdk_device_class) = {
.rx_redirect_to_node = dpdk_set_interface_next_node,
.mac_addr_change_function = dpdk_set_mac_address,
};
-
-VLIB_DEVICE_TX_FUNCTION_MULTIARCH (dpdk_device_class, dpdk_interface_tx)
/* *INDENT-ON* */
+#if __x86_64__
+vlib_node_function_t __clib_weak dpdk_interface_tx_avx512;
+vlib_node_function_t __clib_weak dpdk_interface_tx_avx2;
+static void __clib_constructor
+dpdk_interface_tx_multiarch_select (void)
+{
+ if (dpdk_interface_tx_avx512 && clib_cpu_supports_avx512f ())
+ dpdk_device_class.tx_function = dpdk_interface_tx_avx512;
+ else if (dpdk_interface_tx_avx2 && clib_cpu_supports_avx2 ())
+ dpdk_device_class.tx_function = dpdk_interface_tx_avx2;
+}
+#endif
+#endif
+
#define UP_DOWN_FLAG_EVENT 1
+#ifndef CLIB_MULTIARCH_VARIANT
uword
admin_up_down_process (vlib_main_t * vm,
vlib_node_runtime_t * rt, vlib_frame_t * f)
@@ -846,6 +863,7 @@ VLIB_REGISTER_NODE (admin_up_down_process_node,static) = {
.process_log2_n_stack_bytes = 17, // 256KB
};
/* *INDENT-ON* */
+#endif
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/dpdk/device/node.c b/src/plugins/dpdk/device/node.c
index 82978216817..1240b8d01a7 100644
--- a/src/plugins/dpdk/device/node.c
+++ b/src/plugins/dpdk/device/node.c
@@ -28,11 +28,13 @@
#include <dpdk/device/dpdk_priv.h>
+#ifndef CLIB_MULTIARCH_VARIANT
static char *dpdk_error_strings[] = {
#define _(n,s) s,
foreach_dpdk_error
#undef _
};
+#endif
always_inline int
vlib_buffer_is_ip4 (vlib_buffer_t * b)
@@ -259,43 +261,6 @@ dpdk_prefetch_ethertype (struct rte_mbuf *mb)
CLIB_CACHE_LINE_BYTES, LOAD);
}
-
-/*
- This function should fill 1st cacheline of vlib_buffer_t metadata with data
- from buffer template. Instead of filling field by field, we construct
- template and then use 128/256 bit vector instruction to copy data.
- This code first loads whole cacheline into 4 128-bit registers (xmm)
- or two 256 bit registers (ymm) and then stores data into all 4 buffers
- efectively saving on register load operations.
-*/
-
-static_always_inline void
-dpdk_buffer_init_from_template (void *d0, void *d1, void *d2, void *d3,
- void *s)
-{
-#if defined(CLIB_HAVE_VEC128)
- int i;
- for (i = 0; i < 2; i++)
- {
- *(u8x32 *) (((u8 *) d0) + i * 32) =
- *(u8x32 *) (((u8 *) d1) + i * 32) =
- *(u8x32 *) (((u8 *) d2) + i * 32) =
- *(u8x32 *) (((u8 *) d3) + i * 32) = *(u8x32 *) (((u8 *) s) + i * 32);
- }
-#elif defined(CLIB_HAVE_VEC64)
- int i;
- for (i = 0; i < 4; i++)
- {
- *(u8x16 *) (((u8 *) d0) + i * 16) =
- *(u8x16 *) (((u8 *) d1) + i * 16) =
- *(u8x16 *) (((u8 *) d2) + i * 16) =
- *(u8x16 *) (((u8 *) d3) + i * 16) = *(u8x16 *) (((u8 *) s) + i * 16);
- }
-#else
-#error "Either CLIB_HAVE_VEC128 or CLIB_HAVE_VEC64 has to be defined"
-#endif
-}
-
/*
* This function is used when there are no worker threads.
* The main thread performs IO and forwards the packets.
@@ -401,7 +366,7 @@ dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd,
b2 = vlib_buffer_from_rte_mbuf (mb2);
b3 = vlib_buffer_from_rte_mbuf (mb3);
- dpdk_buffer_init_from_template (b0, b1, b2, b3, bt);
+ clib_memcpy64_x4 (b0, b1, b2, b3, bt);
dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 9]);
dpdk_prefetch_ethertype (xd->rx_vectors[queue_id][mb_index + 5]);
@@ -647,8 +612,9 @@ poll_rate_limit (dpdk_main_t * dm)
<code>xd->per_interface_next_index</code>
*/
-static uword
-dpdk_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f)
+uword
+CLIB_MULTIARCH_FN (dpdk_input) (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * f)
{
dpdk_main_t *dm = &dpdk_main;
dpdk_device_t *xd;
@@ -678,6 +644,7 @@ dpdk_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f)
return n_rx_packets;
}
+#ifndef CLIB_MULTIARCH_VARIANT
/* *INDENT-OFF* */
VLIB_REGISTER_NODE (dpdk_input_node) = {
.function = dpdk_input,
@@ -694,10 +661,23 @@ VLIB_REGISTER_NODE (dpdk_input_node) = {
.n_errors = DPDK_N_ERROR,
.error_strings = dpdk_error_strings,
};
-
-VLIB_NODE_FUNCTION_MULTIARCH (dpdk_input_node, dpdk_input);
/* *INDENT-ON* */
+vlib_node_function_t __clib_weak dpdk_input_avx512;
+vlib_node_function_t __clib_weak dpdk_input_avx2;
+
+#if __x86_64__
+static void __clib_constructor
+dpdk_input_multiarch_select (void)
+{
+ if (dpdk_input_avx512 && clib_cpu_supports_avx512f ())
+ dpdk_input_node.function = dpdk_input_avx512;
+ else if (dpdk_input_avx2 && clib_cpu_supports_avx2 ())
+ dpdk_input_node.function = dpdk_input_avx2;
+}
+#endif
+#endif
+
/*
* fd.io coding-style-patch-verification: ON
*