aboutsummaryrefslogtreecommitdiffstats
path: root/src/vppinfra/vector_avx512.h
diff options
context:
space:
mode:
authorZhiyong Yang <zhiyong.yang@intel.com>2020-07-08 20:28:36 +0000
committerJunfeng Wang <drenfong.wang@intel.com>2020-09-04 02:34:03 +0000
commit5e52417a2aa3b2063a811c6a9f293a79d73bcb43 (patch)
tree4b4cdaccaa682c1dddea8617717af5ac4f520ea0 /src/vppinfra/vector_avx512.h
parent4a433f46084d05a524154db64d3d7d2567305009 (diff)
ip: enhance vtep4_check of tunnel by vector way
This patch aims to improve decap performance by reducing expensive hash_get callings as less as possible using AVX512 on XEON. e.g. vxlan, vxlan_gpe, geneve, gtpu. For the existing code, if vtep4 of the current packet match the last vtep4_key_t well, expensive hash computation can be avoided and the code returns directly. This patch improves tunnel decap multiple flows case greatly by leveraging 512bit vector register on XEON accommodating 8 vtep4_keys. It enhances the possiblity of avoiding unnecessary hash computing once hash key of the current packet hits any one of 8 in the 512bit cache. The oldest element in vtep4_cache_t is updated in round-robin order. vlib_get_buffers is also leveraged in the meanwhile. Type: improvement Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com> Signed-off-by: Ray Kinsella <mdr@ashroe.eu> Signed-off-by: Junfeng Wang <drenfong.wang@intel.com> Change-Id: I313103202bd76f2dd638cd942554721b37ddad60
Diffstat (limited to 'src/vppinfra/vector_avx512.h')
-rw-r--r--src/vppinfra/vector_avx512.h6
1 files changed, 6 insertions, 0 deletions
diff --git a/src/vppinfra/vector_avx512.h b/src/vppinfra/vector_avx512.h
index 6eb7c5eaa4d..a51644be1db 100644
--- a/src/vppinfra/vector_avx512.h
+++ b/src/vppinfra/vector_avx512.h
@@ -246,6 +246,12 @@ u8x64_mask_blend (u8x64 a, u8x64 b, u64 mask)
return (u8x64) _mm512_mask_blend_epi8 (mask, (__m512i) a, (__m512i) b);
}
+static_always_inline u8
+u64x8_mask_is_equal (u64x8 a, u64x8 b)
+{
+ return _mm512_cmpeq_epu64_mask ((__m512i) a, (__m512i) b);
+}
+
static_always_inline void
u32x16_transpose (u32x16 m[16])
{