From e1480a2c12ff764622dd2ae1bc9bce6cd25bcbdd Mon Sep 17 00:00:00 2001 From: Radu Nicolau Date: Thu, 14 Jan 2021 10:25:02 +0000 Subject: avf: use write combining store for queues tail update Performance improvement: on supported platforms, currently only Intel Tremont, use a write combining store to update the tail pointers. Also, Tremont node variant is added for all. Type: improvement Signed-off-by: Radu Nicolau Change-Id: Ie9606e403b7d9655184f778e3ffee3027c8c9edd --- src/cmake/cpu.cmake | 4 ++++ src/plugins/avf/avf.h | 11 ++++++++++ src/plugins/avf/input.c | 2 +- src/plugins/avf/output.c | 2 +- src/vppinfra/cpu.h | 55 ++++++++++++++++++++++++++++-------------------- 5 files changed, 49 insertions(+), 25 deletions(-) (limited to 'src') diff --git a/src/cmake/cpu.cmake b/src/cmake/cpu.cmake index 37bc24c09b5..b3f9f0e4330 100644 --- a/src/cmake/cpu.cmake +++ b/src/cmake/cpu.cmake @@ -80,6 +80,10 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*") if(compiler_flag_march_haswell) list(APPEND MARCH_VARIANTS "hsw\;-march=haswell -mtune=haswell") endif() + check_c_compiler_flag("-march=tremont" compiler_flag_march_tremont) + if(compiler_flag_march_tremont) + list(APPEND MARCH_VARIANTS "trm\;-march=tremont -mtune=tremont") + endif() if (GNU_ASSEMBLER_AVX512_BUG) message(WARNING "AVX-512 multiarch variant(s) disabled due to GNU Assembler bug") else() diff --git a/src/plugins/avf/avf.h b/src/plugins/avf/avf.h index b3fcc259206..23cc36c4882 100644 --- a/src/plugins/avf/avf.h +++ b/src/plugins/avf/avf.h @@ -373,6 +373,17 @@ avf_reg_flush (avf_device_t * ad) asm volatile ("":::"memory"); } +static inline void +avf_tail_write (volatile u32 *addr, u32 val) +{ +#ifdef __MOVDIRI__ + _mm_sfence (); + _directstoreu_u32 ((void *) addr, val); +#else + clib_atomic_store_rel_n (addr, val); +#endif +} + static_always_inline int avf_rxd_is_not_eop (avf_rx_desc_t * d) { diff --git a/src/plugins/avf/input.c b/src/plugins/avf/input.c index 5041f6ef4e7..221b54b19a9 100644 --- a/src/plugins/avf/input.c +++ b/src/plugins/avf/input.c @@ -125,7 +125,7 @@ avf_rxq_refill (vlib_main_t * vm, vlib_node_runtime_t * node, avf_rxq_t * rxq, n_alloc -= 8; } - clib_atomic_store_rel_n (rxq->qrx_tail, slot); + avf_tail_write (rxq->qrx_tail, slot); } diff --git a/src/plugins/avf/output.c b/src/plugins/avf/output.c index 952d151117c..c53fc42a850 100644 --- a/src/plugins/avf/output.c +++ b/src/plugins/avf/output.c @@ -441,7 +441,7 @@ avf_tx_enqueue (vlib_main_t * vm, vlib_node_runtime_t * node, avf_txq_t * txq, } txq->next = next & mask; - clib_atomic_store_rel_n (txq->qtx_tail, txq->next); + avf_tail_write (txq->qtx_tail, txq->next); txq->n_enqueued += n_desc; return n_packets - n_packets_left; } diff --git a/src/vppinfra/cpu.h b/src/vppinfra/cpu.h index 750b3934df9..6a812180008 100644 --- a/src/vppinfra/cpu.h +++ b/src/vppinfra/cpu.h @@ -114,29 +114,30 @@ _CLIB_MARCH_FN_REGISTRATION(fn) clib_march_fn_registration *fn##_march_fn_registrations = 0; \ _CLIB_MARCH_FN_REGISTRATION(fn) #endif -#define foreach_x86_64_flags \ -_ (sse3, 1, ecx, 0) \ -_ (pclmulqdq, 1, ecx, 1) \ -_ (ssse3, 1, ecx, 9) \ -_ (sse41, 1, ecx, 19) \ -_ (sse42, 1, ecx, 20) \ -_ (avx, 1, ecx, 28) \ -_ (rdrand, 1, ecx, 30) \ -_ (avx2, 7, ebx, 5) \ -_ (rtm, 7, ebx, 11) \ -_ (pqm, 7, ebx, 12) \ -_ (pqe, 7, ebx, 15) \ -_ (avx512f, 7, ebx, 16) \ -_ (rdseed, 7, ebx, 18) \ -_ (x86_aes, 1, ecx, 25) \ -_ (sha, 7, ebx, 29) \ -_ (vaes, 7, ecx, 9) \ -_ (vpclmulqdq, 7, ecx, 10) \ -_ (avx512_vnni, 7, ecx, 11) \ -_ (avx512_bitalg, 7, ecx, 12) \ -_ (avx512_vpopcntdq, 7, ecx, 14) \ -_ (invariant_tsc, 0x80000007, edx, 8) - +#define foreach_x86_64_flags \ + _ (sse3, 1, ecx, 0) \ + _ (pclmulqdq, 1, ecx, 1) \ + _ (ssse3, 1, ecx, 9) \ + _ (sse41, 1, ecx, 19) \ + _ (sse42, 1, ecx, 20) \ + _ (avx, 1, ecx, 28) \ + _ (rdrand, 1, ecx, 30) \ + _ (avx2, 7, ebx, 5) \ + _ (rtm, 7, ebx, 11) \ + _ (pqm, 7, ebx, 12) \ + _ (pqe, 7, ebx, 15) \ + _ (avx512f, 7, ebx, 16) \ + _ (rdseed, 7, ebx, 18) \ + _ (x86_aes, 1, ecx, 25) \ + _ (sha, 7, ebx, 29) \ + _ (vaes, 7, ecx, 9) \ + _ (vpclmulqdq, 7, ecx, 10) \ + _ (avx512_vnni, 7, ecx, 11) \ + _ (avx512_bitalg, 7, ecx, 12) \ + _ (avx512_vpopcntdq, 7, ecx, 14) \ + _ (movdiri, 7, ecx, 27) \ + _ (movdir64b, 7, ecx, 28) \ + _ (invariant_tsc, 0x80000007, edx, 8) #define foreach_aarch64_flags \ _ (fp, 0) \ @@ -263,6 +264,14 @@ clib_cpu_march_priority_skx () return -1; } +static inline int +clib_cpu_march_priority_trm () +{ + if (clib_cpu_supports_movdiri ()) + return 60; + return -1; +} + static inline int clib_cpu_march_priority_hsw () { -- cgit 1.2.3-korg