aboutsummaryrefslogtreecommitdiffstats
path: root/src/vppinfra/vector_avx2.h
diff options
context:
space:
mode:
authorDamjan Marion <damarion@cisco.com>2018-05-19 00:04:23 +0200
committerDamjan Marion <damarion@cisco.com>2018-05-20 14:52:39 +0200
commita52e1668c9976bd5cdd20d02b668df41ea41f16f (patch)
tree75ed44c2df7a88e0e3af39ae1c4de4cc49f36ad7 /src/vppinfra/vector_avx2.h
parent3b854a5cb876b5af4e69c56028e2c2824100f4b2 (diff)
vector functions cleanup and improvements
Remove functions which have native C equivalent (i.e. _is_equal can be replaced with ==, _add with +) Add SSE4.2, AVX-512 implementations of splat, load_unaligned, store_unaligned, is_all_zero, is_equal, is_all_equal Change-Id: Ie80b0e482e7a76248ad79399c2576468532354cd Signed-off-by: Damjan Marion <damarion@cisco.com>
Diffstat (limited to 'src/vppinfra/vector_avx2.h')
-rw-r--r--src/vppinfra/vector_avx2.h15
1 files changed, 11 insertions, 4 deletions
diff --git a/src/vppinfra/vector_avx2.h b/src/vppinfra/vector_avx2.h
index 3f0b397b828..1fb41dfd7df 100644
--- a/src/vppinfra/vector_avx2.h
+++ b/src/vppinfra/vector_avx2.h
@@ -19,6 +19,7 @@
#include <vppinfra/clib.h>
#include <x86intrin.h>
+/* *INDENT-OFF* */
#define foreach_avx2_vec256i \
_(i,8,32,epi8) _(i,16,16,epi16) _(i,32,8,epi32) _(i,64,4,epi64x)
#define foreach_avx2_vec256u \
@@ -26,7 +27,8 @@
#define foreach_avx2_vec256f \
_(f,32,8,ps) _(f,64,4,pd)
-/* splat, load_unaligned, store_unaligned, is_all_zero, is_all_equal */
+/* splat, load_unaligned, store_unaligned, is_all_zero, is_equal,
+ is_all_equal */
#define _(t, s, c, i) \
static_always_inline t##s##x##c \
t##s##x##c##_splat (t##s x) \
@@ -45,13 +47,18 @@ t##s##x##c##_is_all_zero (t##s##x##c x) \
{ return _mm256_testz_si256 ((__m256i) x, (__m256i) x); } \
\
static_always_inline int \
-t##s##x##c##_is_all_equal (t##s##x##c v, t##s x) \
-{ return t##s##x##c##_is_all_zero (v != t##s##x##c##_splat (x)); }; \
+t##s##x##c##_is_equal (t##s##x##c x, t##s##x##c y) \
+{ return _mm256_testc_si256 ((__m256i) x, (__m256i) y); } \
\
+static_always_inline int \
+t##s##x##c##_is_all_equal (t##s##x##c v, t##s x) \
+{ return t##s##x##c##_is_equal (v, t##s##x##c##_splat (x)); }; \
foreach_avx2_vec256i foreach_avx2_vec256u
#undef _
- always_inline u32x8
+/* *INDENT-ON* */
+
+always_inline u32x8
u32x8_permute (u32x8 v, u32x8 idx)
{
return (u32x8) _mm256_permutevar8x32_epi32 ((__m256i) v, (__m256i) idx);