diff options
author | Damjan Marion <damarion@cisco.com> | 2021-04-28 19:31:22 +0200 |
---|---|---|
committer | Florin Coras <florin.coras@gmail.com> | 2021-04-28 19:00:37 +0000 |
commit | 3295ddf6b6e06f43ebf1e081a09b7b785dd217ea (patch) | |
tree | 0f686e6877babf556e6605faed460975ad3dd6fc | |
parent | ea6236b376c397f5519ff2763702818a4bfe46d9 (diff) |
vppinfra: AArch64 NEON implementation of clib_compare_u16_x64()
Type: improvement
Change-Id: I1382813211ea20c6204d7a3b6e6d470aa51aed69
Signed-off-by: Damjan Marion <damarion@cisco.com>
-rw-r--r-- | src/vlib/buffer_funcs.c | 2 | ||||
-rw-r--r-- | src/vppinfra/vector_funcs.h | 21 |
2 files changed, 21 insertions, 2 deletions
diff --git a/src/vlib/buffer_funcs.c b/src/vlib/buffer_funcs.c index eaf141e5e9a..a0edd7ec60b 100644 --- a/src/vlib/buffer_funcs.c +++ b/src/vlib/buffer_funcs.c @@ -3,8 +3,8 @@ */ #include <vppinfra/clib.h> -#include <vppinfra/vector_funcs.h> #include <vlib/vlib.h> +#include <vppinfra/vector_funcs.h> typedef struct { diff --git a/src/vppinfra/vector_funcs.h b/src/vppinfra/vector_funcs.h index c8670662910..2b02d9eb301 100644 --- a/src/vppinfra/vector_funcs.h +++ b/src/vppinfra/vector_funcs.h @@ -17,7 +17,7 @@ static_always_inline u64 clib_compare_u16_x64 (u16 v, u16 *a) { u64 mask = 0; -#if defined(CLIB_HAVE_VEC512) && !defined(__aarch64__) +#if defined(CLIB_HAVE_VEC512) u16x32 v32 = u16x32_splat (v); u16x32u *av = (u16x32u *) a; mask = ((u64) u16x32_is_equal_mask (av[0], v32) | @@ -31,6 +31,25 @@ clib_compare_u16_x64 (u16 v, u16 *a) mask = i8x32_msb_mask ((i8x32) u64x4_permute (x, 0, 2, 1, 3)); x = i16x16_pack (v16 == av[2], v16 == av[3]); mask |= (u64) i8x32_msb_mask ((i8x32) u64x4_permute (x, 0, 2, 1, 3)) << 32; +#elif defined(CLIB_HAVE_VEC128) && defined(__ARM_NEON) + u16x8 idx8 = u16x8_splat (v); + u16x8 m = { 1, 2, 4, 8, 16, 32, 64, 128 }; + u16x8u *av = (u16x8u *) a; + + /* compare each u16 elemment with idx8, result gives 0xffff in each element + of the resulting vector if comparison result is true. + Bitwise AND with m will give us one bit set for true result and offset + of that bit represend element index. Finally vaddvq_u16() gives us sum + of all elements of the vector which will give us u8 bitmap. */ + + mask = ((u64) vaddvq_u16 ((av[0] == idx8) & m) | + (u64) vaddvq_u16 ((av[1] == idx8) & m) << 8 | + (u64) vaddvq_u16 ((av[2] == idx8) & m) << 16 | + (u64) vaddvq_u16 ((av[3] == idx8) & m) << 24 | + (u64) vaddvq_u16 ((av[4] == idx8) & m) << 32 | + (u64) vaddvq_u16 ((av[5] == idx8) & m) << 40 | + (u64) vaddvq_u16 ((av[6] == idx8) & m) << 48 | + (u64) vaddvq_u16 ((av[7] == idx8) & m) << 56); #elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK) u16x8 idx8 = u16x8_splat (v); u16x8u *av = (u16x8u *) a; |