From efd6de87d3268f1ab499799cefb2e8b32a613f79 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Thu, 2 Dec 2021 13:02:38 +0100 Subject: vppinfra: vector shuffle cleanup Type: refactor Change-Id: I8b3fc2ce30df313467274a174c5ac6adbf296153 Signed-off-by: Damjan Marion --- src/vppinfra/vector.h | 62 ++++++++++++++++++++++++++++++++++++++++++++ src/vppinfra/vector_avx2.h | 6 ----- src/vppinfra/vector_avx512.h | 6 ----- src/vppinfra/vector_neon.h | 6 ----- src/vppinfra/vector_sse42.h | 18 ------------- 5 files changed, 62 insertions(+), 36 deletions(-) (limited to 'src/vppinfra') diff --git a/src/vppinfra/vector.h b/src/vppinfra/vector.h index 88cf288cb26..49bc2976b3e 100644 --- a/src/vppinfra/vector.h +++ b/src/vppinfra/vector.h @@ -126,6 +126,68 @@ foreach_vec #undef _vector_size + /* _shuffle and _shuffle2 */ +#if defined(__GNUC__) && !defined(__clang__) +#define __builtin_shufflevector(v1, v2, ...) \ + __builtin_shuffle ((v1), (v2), (__typeof__ (v1)){ __VA_ARGS__ }) +#endif + +#define u8x16_shuffle(v1, ...) \ + (u8x16) __builtin_shufflevector ((u8x16) (v1), (u8x16) (v1), __VA_ARGS__) +#define u8x32_shuffle(v1, ...) \ + (u8x32) __builtin_shufflevector ((u8x32) (v1), (u8x32) (v1), __VA_ARGS__) +#define u8x64_shuffle(v1, ...) \ + (u8x64) __builtin_shufflevector ((u8x64) (v1), (u8x64) (v1), __VA_ARGS__) + +#define u16x8_shuffle(v1, ...) \ + (u16x8) __builtin_shufflevector ((u16x8) (v1), (u16x8) (v1), __VA_ARGS__) +#define u16x16_shuffle(v1, ...) \ + (u16x16) __builtin_shufflevector ((u16x16) (v1), (u16x16) (v1), __VA_ARGS__) +#define u16x32_shuffle(v1, ...) \ + (u16u32) __builtin_shufflevector ((u16x32) (v1), (u16x32) (v1), __VA_ARGS__); + +#define u32x4_shuffle(v1, ...) \ + (u32x4) __builtin_shufflevector ((u32x4) (v1), (u32x4) (v1), __VA_ARGS__) +#define u32x8_shuffle(v1, ...) \ + (u32x8) __builtin_shufflevector ((u32x8) (v1), (u32x8) (v1), __VA_ARGS__) +#define u32x16_shuffle(v1, ...) \ + (u32x16) __builtin_shufflevector ((u32x16) (v1), (u32x16) (v1), __VA_ARGS__) + +#define u64x2_shuffle(v1, ...) \ + (u64x2) __builtin_shufflevector ((u64x2) (v1), (u64x2) (v1), __VA_ARGS__) +#define u64x4_shuffle(v1, ...) \ + (u64x4) __builtin_shufflevector ((u64x4) (v1), (u64x4) (v1), __VA_ARGS__) +#define u64x8_shuffle(v1, ...) \ + (u64x8) __builtin_shufflevector ((u64x8) (v1), (u64x8) (v1), __VA_ARGS__) + +#define u8x16_shuffle2(v1, v2, ...) \ + (u8x16) __builtin_shufflevector ((u8x16) (v1), (u8x16) (v2), __VA_ARGS__) +#define u8x32_shuffle2(v1, v2, ...) \ + (u8x32) __builtin_shufflevector ((u8x32) (v1), (u8x32) (v2), __VA_ARGS__) +#define u8x64_shuffle2(v1, v2, ...) \ + (u8x64) __builtin_shufflevector ((u8x64) (v1), (u8x64) (v2), __VA_ARGS__) + +#define u16x8_shuffle2(v1, v2, ...) \ + (u16x8) __builtin_shufflevector ((u16x8) (v1), (u16x8) (v2), __VA_ARGS__) +#define u16x16_shuffle2(v1, v2, ...) \ + (u16x16) __builtin_shufflevector ((u16x16) (v1), (u16x16) (v2), __VA_ARGS__) +#define u16x32_shuffle2(v1, v2, ...) \ + (u16u32) __builtin_shufflevector ((u16x32) (v1), (u16x32) (v2), __VA_ARGS__); + +#define u32x4_shuffle2(v1, v2, ...) \ + (u32x4) __builtin_shufflevector ((u32x4) (v1), (u32x4) (v2), __VA_ARGS__) +#define u32x8_shuffle2(v1, v2, ...) \ + (u32x8) __builtin_shufflevector ((u32x8) (v1), (u32x8) (v2), __VA_ARGS__) +#define u32x16_shuffle2(v1, v2, ...) \ + (u32x16) __builtin_shufflevector ((u32x16) (v1), (u32x16) (v2), __VA_ARGS__) + +#define u64x2_shuffle2(v1, v2, ...) \ + (u64x2) __builtin_shufflevector ((u64x2) (v1), (u64x2) (v2), __VA_ARGS__) +#define u64x4_shuffle2(v1, v2, ...) \ + (u64x4) __builtin_shufflevector ((u64x4) (v1), (u64x4) (v2), __VA_ARGS__) +#define u64x8_shuffle2(v1, v2, ...) \ + (u64x8) __builtin_shufflevector ((u64x8) (v1), (u64x8) (v2), __VA_ARGS__) + #define VECTOR_WORD_TYPE(t) t##x #define VECTOR_WORD_TYPE_LEN(t) (sizeof (VECTOR_WORD_TYPE(t)) / sizeof (t)) diff --git a/src/vppinfra/vector_avx2.h b/src/vppinfra/vector_avx2.h index 7226c230e68..59857182a93 100644 --- a/src/vppinfra/vector_avx2.h +++ b/src/vppinfra/vector_avx2.h @@ -183,12 +183,6 @@ u16x16_byte_swap (u16x16 v) return (u16x16) _mm256_shuffle_epi8 ((__m256i) v, (__m256i) swap); } -static_always_inline u8x32 -u8x32_shuffle (u8x32 v, u8x32 m) -{ - return (u8x32) _mm256_shuffle_epi8 ((__m256i) v, (__m256i) m); -} - #define u8x32_align_right(a, b, imm) \ (u8x32) _mm256_alignr_epi8 ((__m256i) a, (__m256i) b, imm) diff --git a/src/vppinfra/vector_avx512.h b/src/vppinfra/vector_avx512.h index 8acac2a3a9f..33f40ef7b5a 100644 --- a/src/vppinfra/vector_avx512.h +++ b/src/vppinfra/vector_avx512.h @@ -196,12 +196,6 @@ u8x64_reflect_u8x16 (u8x64 x) return (u8x64) _mm512_shuffle_epi8 ((__m512i) x, (__m512i) mask); } -static_always_inline u8x64 -u8x64_shuffle (u8x64 v, u8x64 m) -{ - return (u8x64) _mm512_shuffle_epi8 ((__m512i) v, (__m512i) m); -} - #define u8x64_align_right(a, b, imm) \ (u8x64) _mm512_alignr_epi8 ((__m512i) a, (__m512i) b, imm) diff --git a/src/vppinfra/vector_neon.h b/src/vppinfra/vector_neon.h index 80d7bda9f3c..e7b31259f73 100644 --- a/src/vppinfra/vector_neon.h +++ b/src/vppinfra/vector_neon.h @@ -129,12 +129,6 @@ u32x4_byte_swap (u32x4 v) return (u32x4) vrev32q_u8 ((u8x16) v); } -static_always_inline u8x16 -u8x16_shuffle (u8x16 v, u8x16 m) -{ - return (u8x16) vqtbl1q_u8 (v, m); -} - static_always_inline u32x4 u32x4_hadd (u32x4 v1, u32x4 v2) { diff --git a/src/vppinfra/vector_sse42.h b/src/vppinfra/vector_sse42.h index 7e75ad28710..35495d6658f 100644 --- a/src/vppinfra/vector_sse42.h +++ b/src/vppinfra/vector_sse42.h @@ -411,24 +411,6 @@ u32x4_sum_elts (u32x4 sum4) return sum4[0]; } -static_always_inline u8x16 -u8x16_shuffle (u8x16 v, u8x16 m) -{ - return (u8x16) _mm_shuffle_epi8 ((__m128i) v, (__m128i) m); -} - -static_always_inline u32x4 -u32x4_shuffle (u32x4 v, const int a, const int b, const int c, const int d) -{ -#if defined(__clang__) || !__OPTIMIZE__ - u32x4 r = { v[a], v[b], v[c], v[d] }; - return r; -#else - return (u32x4) _mm_shuffle_epi32 ((__m128i) v, - a | b << 2 | c << 4 | d << 6); -#endif -} - /* _from_ */ /* *INDENT-OFF* */ #define _(f,t,i) \ -- cgit 1.2.3-korg