diff options
author | Damjan Marion <damarion@cisco.com> | 2021-05-05 19:31:41 +0200 |
---|---|---|
committer | Damjan Marion <damarion@cisco.com> | 2021-05-05 19:57:04 +0200 |
commit | 7d14aad6379ebf96b75dd076260a2fccb7caa3b4 (patch) | |
tree | d9d4618182aa3f0da578a583c459eb93f95a7c80 /src/vppinfra/vector_funcs.h | |
parent | a7cea39b335efbad736addf926a9fb2c8c64a460 (diff) |
vppinfra: fix x86 packs / packus wrappers
They both take signed value as input.
Type: fix
Change-Id: If3d8ec4e0b1c02d7d65262bdd9db49ff7fbfef39
Signed-off-by: Damjan Marion <damarion@cisco.com>
Diffstat (limited to 'src/vppinfra/vector_funcs.h')
-rw-r--r-- | src/vppinfra/vector_funcs.h | 35 |
1 files changed, 17 insertions, 18 deletions
diff --git a/src/vppinfra/vector_funcs.h b/src/vppinfra/vector_funcs.h index 2b02d9eb301..5c446a5d50d 100644 --- a/src/vppinfra/vector_funcs.h +++ b/src/vppinfra/vector_funcs.h @@ -27,37 +27,36 @@ clib_compare_u16_x64 (u16 v, u16 *a) u16x16u *av = (u16x16u *) a; i8x32 x; - x = i16x16_pack (v16 == av[0], v16 == av[1]); + x = i8x32_pack (v16 == av[0], v16 == av[1]); mask = i8x32_msb_mask ((i8x32) u64x4_permute (x, 0, 2, 1, 3)); - x = i16x16_pack (v16 == av[2], v16 == av[3]); + x = i8x32_pack (v16 == av[2], v16 == av[3]); mask |= (u64) i8x32_msb_mask ((i8x32) u64x4_permute (x, 0, 2, 1, 3)) << 32; #elif defined(CLIB_HAVE_VEC128) && defined(__ARM_NEON) - u16x8 idx8 = u16x8_splat (v); + u16x8 v8 = u16x8_splat (v); u16x8 m = { 1, 2, 4, 8, 16, 32, 64, 128 }; u16x8u *av = (u16x8u *) a; - /* compare each u16 elemment with idx8, result gives 0xffff in each element + /* compare each u16 elemment with v8, result gives 0xffff in each element of the resulting vector if comparison result is true. Bitwise AND with m will give us one bit set for true result and offset of that bit represend element index. Finally vaddvq_u16() gives us sum of all elements of the vector which will give us u8 bitmap. */ - mask = ((u64) vaddvq_u16 ((av[0] == idx8) & m) | - (u64) vaddvq_u16 ((av[1] == idx8) & m) << 8 | - (u64) vaddvq_u16 ((av[2] == idx8) & m) << 16 | - (u64) vaddvq_u16 ((av[3] == idx8) & m) << 24 | - (u64) vaddvq_u16 ((av[4] == idx8) & m) << 32 | - (u64) vaddvq_u16 ((av[5] == idx8) & m) << 40 | - (u64) vaddvq_u16 ((av[6] == idx8) & m) << 48 | - (u64) vaddvq_u16 ((av[7] == idx8) & m) << 56); + mask = ((u64) vaddvq_u16 ((av[0] == v8) & m) | + (u64) vaddvq_u16 ((av[1] == v8) & m) << 8 | + (u64) vaddvq_u16 ((av[2] == v8) & m) << 16 | + (u64) vaddvq_u16 ((av[3] == v8) & m) << 24 | + (u64) vaddvq_u16 ((av[4] == v8) & m) << 32 | + (u64) vaddvq_u16 ((av[5] == v8) & m) << 40 | + (u64) vaddvq_u16 ((av[6] == v8) & m) << 48 | + (u64) vaddvq_u16 ((av[7] == v8) & m) << 56); #elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK) - u16x8 idx8 = u16x8_splat (v); + u16x8 v8 = u16x8_splat (v); u16x8u *av = (u16x8u *) a; - mask = - ((u64) i8x16_msb_mask (i16x8_pack (idx8 == av[0], idx8 == av[1])) | - (u64) i8x16_msb_mask (i16x8_pack (idx8 == av[2], idx8 == av[3])) << 16 | - (u64) i8x16_msb_mask (i16x8_pack (idx8 == av[4], idx8 == av[5])) << 32 | - (u64) i8x16_msb_mask (i16x8_pack (idx8 == av[6], idx8 == av[7])) << 48); + mask = ((u64) i8x16_msb_mask (i8x16_pack (v8 == av[0], v8 == av[1])) | + (u64) i8x16_msb_mask (i8x16_pack (v8 == av[2], v8 == av[3])) << 16 | + (u64) i8x16_msb_mask (i8x16_pack (v8 == av[4], v8 == av[5])) << 32 | + (u64) i8x16_msb_mask (i8x16_pack (v8 == av[6], v8 == av[7])) << 48); #else for (int i = 0; i < 64; i++) if (a[i] == v) |