From a52e1668c9976bd5cdd20d02b668df41ea41f16f Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Sat, 19 May 2018 00:04:23 +0200 Subject: vector functions cleanup and improvements Remove functions which have native C equivalent (i.e. _is_equal can be replaced with ==, _add with +) Add SSE4.2, AVX-512 implementations of splat, load_unaligned, store_unaligned, is_all_zero, is_equal, is_all_equal Change-Id: Ie80b0e482e7a76248ad79399c2576468532354cd Signed-off-by: Damjan Marion --- src/vppinfra/vector_avx2.h | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'src/vppinfra/vector_avx2.h') diff --git a/src/vppinfra/vector_avx2.h b/src/vppinfra/vector_avx2.h index 3f0b397b828..1fb41dfd7df 100644 --- a/src/vppinfra/vector_avx2.h +++ b/src/vppinfra/vector_avx2.h @@ -19,6 +19,7 @@ #include #include +/* *INDENT-OFF* */ #define foreach_avx2_vec256i \ _(i,8,32,epi8) _(i,16,16,epi16) _(i,32,8,epi32) _(i,64,4,epi64x) #define foreach_avx2_vec256u \ @@ -26,7 +27,8 @@ #define foreach_avx2_vec256f \ _(f,32,8,ps) _(f,64,4,pd) -/* splat, load_unaligned, store_unaligned, is_all_zero, is_all_equal */ +/* splat, load_unaligned, store_unaligned, is_all_zero, is_equal, + is_all_equal */ #define _(t, s, c, i) \ static_always_inline t##s##x##c \ t##s##x##c##_splat (t##s x) \ @@ -45,13 +47,18 @@ t##s##x##c##_is_all_zero (t##s##x##c x) \ { return _mm256_testz_si256 ((__m256i) x, (__m256i) x); } \ \ static_always_inline int \ -t##s##x##c##_is_all_equal (t##s##x##c v, t##s x) \ -{ return t##s##x##c##_is_all_zero (v != t##s##x##c##_splat (x)); }; \ +t##s##x##c##_is_equal (t##s##x##c x, t##s##x##c y) \ +{ return _mm256_testc_si256 ((__m256i) x, (__m256i) y); } \ \ +static_always_inline int \ +t##s##x##c##_is_all_equal (t##s##x##c v, t##s x) \ +{ return t##s##x##c##_is_equal (v, t##s##x##c##_splat (x)); }; \ foreach_avx2_vec256i foreach_avx2_vec256u #undef _ - always_inline u32x8 +/* *INDENT-ON* */ + +always_inline u32x8 u32x8_permute (u32x8 v, u32x8 idx) { return (u32x8) _mm256_permutevar8x32_epi32 ((__m256i) v, (__m256i) idx); -- cgit 1.2.3-korg