From 04f3db3847d242857b9d9d858bcdca538a1be7d7 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Fri, 10 Nov 2017 21:55:45 +0100 Subject: dpdk: introduce AVX512 variants of node functions Change-Id: If581feca0d51d0420c971801aecdf9250c671b36 Signed-off-by: Damjan Marion --- src/plugins/dpdk/device/device.c | 30 +++++++++++++++---- src/plugins/dpdk/device/node.c | 64 ++++++++++++++-------------------------- 2 files changed, 46 insertions(+), 48 deletions(-) (limited to 'src/plugins/dpdk/device') diff --git a/src/plugins/dpdk/device/device.c b/src/plugins/dpdk/device/device.c index aa134327373..987596ead9b 100644 --- a/src/plugins/dpdk/device/device.c +++ b/src/plugins/dpdk/device/device.c @@ -38,6 +38,7 @@ typedef enum DPDK_TX_FUNC_N_ERROR, } dpdk_tx_func_error_t; +#ifndef CLIB_MULTIARCH_VARIANT static char *dpdk_tx_func_error_strings[] = { #define _(n,s) s, foreach_dpdk_tx_func_error @@ -65,8 +66,9 @@ dpdk_set_mac_address (vnet_hw_interface_t * hi, char *address) return NULL; } } +#endif -struct rte_mbuf * +static struct rte_mbuf * dpdk_replicate_packet_mb (vlib_buffer_t * b) { dpdk_main_t *dm = &dpdk_main; @@ -368,9 +370,10 @@ dpdk_buffer_tx_offload (dpdk_device_t * xd, vlib_buffer_t * b, * rte_mbuf pointers. It then passes this vector to tx_burst_vector_internal * which calls the dpdk tx_burst function. */ -static uword -dpdk_interface_tx (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * f) +uword +CLIB_MULTIARCH_FN (dpdk_interface_tx) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * f) { dpdk_main_t *dm = &dpdk_main; vnet_interface_output_runtime_t *rd = (void *) node->runtime_data; @@ -632,6 +635,7 @@ dpdk_interface_tx (vlib_main_t * vm, return tx_pkts; } +#ifndef CLIB_MULTIARCH_VARIANT static void dpdk_clear_hw_interface_counters (u32 instance) { @@ -789,12 +793,25 @@ VNET_DEVICE_CLASS (dpdk_device_class) = { .rx_redirect_to_node = dpdk_set_interface_next_node, .mac_addr_change_function = dpdk_set_mac_address, }; - -VLIB_DEVICE_TX_FUNCTION_MULTIARCH (dpdk_device_class, dpdk_interface_tx) /* *INDENT-ON* */ +#if __x86_64__ +vlib_node_function_t __clib_weak dpdk_interface_tx_avx512; +vlib_node_function_t __clib_weak dpdk_interface_tx_avx2; +static void __clib_constructor +dpdk_interface_tx_multiarch_select (void) +{ + if (dpdk_interface_tx_avx512 && clib_cpu_supports_avx512f ()) + dpdk_device_class.tx_function = dpdk_interface_tx_avx512; + else if (dpdk_interface_tx_avx2 && clib_cpu_supports_avx2 ()) + dpdk_device_class.tx_function = dpdk_interface_tx_avx2; +} +#endif +#endif + #define UP_DOWN_FLAG_EVENT 1 +#ifndef CLIB_MULTIARCH_VARIANT uword admin_up_down_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) @@ -846,6 +863,7 @@ VLIB_REGISTER_NODE (admin_up_down_process_node,static) = { .process_log2_n_stack_bytes = 17, // 256KB }; /* *INDENT-ON* */ +#endif /* * fd.io coding-style-patch-verification: ON diff --git a/src/plugins/dpdk/device/node.c b/src/plugins/dpdk/device/node.c index 82978216817..1240b8d01a7 100644 --- a/src/plugins/dpdk/device/node.c +++ b/src/plugins/dpdk/device/node.c @@ -28,11 +28,13 @@ #include +#ifndef CLIB_MULTIARCH_VARIANT static char *dpdk_error_strings[] = { #define _(n,s) s, foreach_dpdk_error #undef _ }; +#endif always_inline int vlib_buffer_is_ip4 (vlib_buffer_t * b) @@ -259,43 +261,6 @@ dpdk_prefetch_ethertype (struct rte_mbuf *mb) CLIB_CACHE_LINE_BYTES, LOAD); } - -/* - This function should fill 1st cacheline of vlib_buffer_t metadata with data - from buffer template. Instead of filling field by field, we construct - template and then use 128/256 bit vector instruction to copy data. - This code first loads whole cacheline into 4 128-bit registers (xmm) - or two 256 bit registers (ymm) and then stores data into all 4 buffers - efectively saving on register load operations. -*/ - -static_always_inline void -dpdk_buffer_init_from_template (void *d0, void *d1, void *d2, void *d3, - void *s) -{ -#if defined(CLIB_HAVE_VEC128) - int i; - for (i = 0; i < 2; i++) - { - *(u8x32 *) (((u8 *) d0) + i * 32) = - *(u8x32 *) (((u8 *) d1) + i * 32) = - *(u8x32 *) (((u8 *) d2) + i * 32) = - *(u8x32 *) (((u8 *) d3) + i * 32) = *(u8x32 *) (((u8 *) s) + i * 32); - } -#elif defined(CLIB_HAVE_VEC64) - int i; - for (i = 0; i < 4; i++) - { - *(u8x16 *) (((u8 *) d0) + i * 16) = - *(u8x16 *) (((u8 *) d1) + i * 16) = - *(u8x16 *) (((u8 *) d2) + i * 16) = - *(u8x16 *) (((u8 *) d3) + i * 16) = *(u8x16 *) (((u8 *) s) + i * 16); - } -#else -#error "Either CLIB_HAVE_VEC128 or CLIB_HAVE_VEC64 has to be defined" -#endif -} - /* * This function is used when there are no worker threads. * The main thread performs IO and forwards the packets. @@ -401,7 +366,7 @@ dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd, b2 = vlib_buffer_from_rte_mbuf (mb2); b3 = vlib_buffer_from_rte_mbuf (mb3); - dpdk_buffer_init_from_template (b0, b1, b2, b3, bt); + clib_memcpy64_x4 (b0, b1, b2, b3, bt); dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 9]); dpdk_prefetch_ethertype (xd->rx_vectors[queue_id][mb_index + 5]); @@ -647,8 +612,9 @@ poll_rate_limit (dpdk_main_t * dm) xd->per_interface_next_index */ -static uword -dpdk_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f) +uword +CLIB_MULTIARCH_FN (dpdk_input) (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * f) { dpdk_main_t *dm = &dpdk_main; dpdk_device_t *xd; @@ -678,6 +644,7 @@ dpdk_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f) return n_rx_packets; } +#ifndef CLIB_MULTIARCH_VARIANT /* *INDENT-OFF* */ VLIB_REGISTER_NODE (dpdk_input_node) = { .function = dpdk_input, @@ -694,10 +661,23 @@ VLIB_REGISTER_NODE (dpdk_input_node) = { .n_errors = DPDK_N_ERROR, .error_strings = dpdk_error_strings, }; - -VLIB_NODE_FUNCTION_MULTIARCH (dpdk_input_node, dpdk_input); /* *INDENT-ON* */ +vlib_node_function_t __clib_weak dpdk_input_avx512; +vlib_node_function_t __clib_weak dpdk_input_avx2; + +#if __x86_64__ +static void __clib_constructor +dpdk_input_multiarch_select (void) +{ + if (dpdk_input_avx512 && clib_cpu_supports_avx512f ()) + dpdk_input_node.function = dpdk_input_avx512; + else if (dpdk_input_avx2 && clib_cpu_supports_avx2 ()) + dpdk_input_node.function = dpdk_input_avx2; +} +#endif +#endif + /* * fd.io coding-style-patch-verification: ON * -- cgit 1.2.3-korg