From 3295ddf6b6e06f43ebf1e081a09b7b785dd217ea Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Wed, 28 Apr 2021 19:31:22 +0200 Subject: vppinfra: AArch64 NEON implementation of clib_compare_u16_x64() Type: improvement Change-Id: I1382813211ea20c6204d7a3b6e6d470aa51aed69 Signed-off-by: Damjan Marion --- src/vppinfra/vector_funcs.h | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'src/vppinfra/vector_funcs.h') diff --git a/src/vppinfra/vector_funcs.h b/src/vppinfra/vector_funcs.h index c8670662910..2b02d9eb301 100644 --- a/src/vppinfra/vector_funcs.h +++ b/src/vppinfra/vector_funcs.h @@ -17,7 +17,7 @@ static_always_inline u64 clib_compare_u16_x64 (u16 v, u16 *a) { u64 mask = 0; -#if defined(CLIB_HAVE_VEC512) && !defined(__aarch64__) +#if defined(CLIB_HAVE_VEC512) u16x32 v32 = u16x32_splat (v); u16x32u *av = (u16x32u *) a; mask = ((u64) u16x32_is_equal_mask (av[0], v32) | @@ -31,6 +31,25 @@ clib_compare_u16_x64 (u16 v, u16 *a) mask = i8x32_msb_mask ((i8x32) u64x4_permute (x, 0, 2, 1, 3)); x = i16x16_pack (v16 == av[2], v16 == av[3]); mask |= (u64) i8x32_msb_mask ((i8x32) u64x4_permute (x, 0, 2, 1, 3)) << 32; +#elif defined(CLIB_HAVE_VEC128) && defined(__ARM_NEON) + u16x8 idx8 = u16x8_splat (v); + u16x8 m = { 1, 2, 4, 8, 16, 32, 64, 128 }; + u16x8u *av = (u16x8u *) a; + + /* compare each u16 elemment with idx8, result gives 0xffff in each element + of the resulting vector if comparison result is true. + Bitwise AND with m will give us one bit set for true result and offset + of that bit represend element index. Finally vaddvq_u16() gives us sum + of all elements of the vector which will give us u8 bitmap. */ + + mask = ((u64) vaddvq_u16 ((av[0] == idx8) & m) | + (u64) vaddvq_u16 ((av[1] == idx8) & m) << 8 | + (u64) vaddvq_u16 ((av[2] == idx8) & m) << 16 | + (u64) vaddvq_u16 ((av[3] == idx8) & m) << 24 | + (u64) vaddvq_u16 ((av[4] == idx8) & m) << 32 | + (u64) vaddvq_u16 ((av[5] == idx8) & m) << 40 | + (u64) vaddvq_u16 ((av[6] == idx8) & m) << 48 | + (u64) vaddvq_u16 ((av[7] == idx8) & m) << 56); #elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK) u16x8 idx8 = u16x8_splat (v); u16x8u *av = (u16x8u *) a; -- cgit 1.2.3-korg