diff options
author | Damjan Marion <damarion@cisco.com> | 2023-09-29 15:09:11 +0200 |
---|---|---|
committer | Ole Tr�an <otroan@employees.org> | 2023-10-03 12:19:33 +0000 |
commit | 029bff4b9a60ceabad8744059427b8736fe48a2b (patch) | |
tree | 3f4282d1bda61792e451a98d460c5eaf831ad258 /src | |
parent | 1457828c596c56f271d71f9a56c39728ee9e4c67 (diff) |
vppinfra: splat and gather vector inlines
Type: improvement
Change-Id: I4b00b3a6ff63fc8b313c89217ccdea356c0783a3
Signed-off-by: Damjan Marion <damarion@cisco.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/vppinfra/vector_avx2.h | 18 | ||||
-rw-r--r-- | src/vppinfra/vector_avx512.h | 6 |
2 files changed, 24 insertions, 0 deletions
diff --git a/src/vppinfra/vector_avx2.h b/src/vppinfra/vector_avx2.h index 17271b8fcd0..ee3d5404f91 100644 --- a/src/vppinfra/vector_avx2.h +++ b/src/vppinfra/vector_avx2.h @@ -335,6 +335,18 @@ u32x8_scatter_one (u32x8 r, int index, void *p) *(u32 *) p = r[index]; } +#define u32x8_gather_u32(base, indices, scale) \ + (u32x8) _mm256_i32gather_epi32 (base, (__m256i) indices, scale) + +#ifdef __AVX512F__ +#define u32x8_scatter_u32(base, indices, v, scale) \ + _mm256_i32scatter_epi32 (base, (__m256i) indices, (__m256i) v, scale) +#else +#define u32x8_scatter_u32(base, indices, v, scale) \ + for (u32 i = 0; i < 8; i++) \ + *((u32u *) ((u8 *) base + (scale) * (indices)[i])) = (v)[i]; +#endif + static_always_inline u8x32 u8x32_blend (u8x32 v1, u8x32 v2, u8x32 mask) { @@ -428,6 +440,12 @@ u32x8_splat_u32x4 (u32x4 a) return (u32x8) _mm256_broadcastsi128_si256 ((__m128i) a); } +static_always_inline u64x4 +u64x4_splat_u64x2 (u64x2 a) +{ + return (u64x4) _mm256_broadcastsi128_si256 ((__m128i) a); +} + static_always_inline u8x32 u8x32_load_partial (u8 *data, uword n) { diff --git a/src/vppinfra/vector_avx512.h b/src/vppinfra/vector_avx512.h index b745b46fd73..f15a04ec128 100644 --- a/src/vppinfra/vector_avx512.h +++ b/src/vppinfra/vector_avx512.h @@ -326,6 +326,12 @@ u32x16_splat_u32x4 (u32x4 a) return (u32x16) _mm512_broadcast_i64x2 ((__m128i) a); } +static_always_inline u64x8 +u64x8_splat_u64x2 (u64x2 a) +{ + return (u64x8) _mm512_broadcast_i64x2 ((__m128i) a); +} + static_always_inline u32x16 u32x16_mask_blend (u32x16 a, u32x16 b, u16 mask) { |