summaryrefslogtreecommitdiffstats
path: root/src/vppinfra/vector_funcs.h
diff options
context:
space:
mode:
authorDamjan Marion <damarion@cisco.com>2021-05-05 19:31:41 +0200
committerDamjan Marion <damarion@cisco.com>2021-05-05 19:57:04 +0200
commit7d14aad6379ebf96b75dd076260a2fccb7caa3b4 (patch)
treed9d4618182aa3f0da578a583c459eb93f95a7c80 /src/vppinfra/vector_funcs.h
parenta7cea39b335efbad736addf926a9fb2c8c64a460 (diff)
vppinfra: fix x86 packs / packus wrappers
They both take signed value as input. Type: fix Change-Id: If3d8ec4e0b1c02d7d65262bdd9db49ff7fbfef39 Signed-off-by: Damjan Marion <damarion@cisco.com>
Diffstat (limited to 'src/vppinfra/vector_funcs.h')
-rw-r--r--src/vppinfra/vector_funcs.h35
1 files changed, 17 insertions, 18 deletions
diff --git a/src/vppinfra/vector_funcs.h b/src/vppinfra/vector_funcs.h
index 2b02d9eb301..5c446a5d50d 100644
--- a/src/vppinfra/vector_funcs.h
+++ b/src/vppinfra/vector_funcs.h
@@ -27,37 +27,36 @@ clib_compare_u16_x64 (u16 v, u16 *a)
u16x16u *av = (u16x16u *) a;
i8x32 x;
- x = i16x16_pack (v16 == av[0], v16 == av[1]);
+ x = i8x32_pack (v16 == av[0], v16 == av[1]);
mask = i8x32_msb_mask ((i8x32) u64x4_permute (x, 0, 2, 1, 3));
- x = i16x16_pack (v16 == av[2], v16 == av[3]);
+ x = i8x32_pack (v16 == av[2], v16 == av[3]);
mask |= (u64) i8x32_msb_mask ((i8x32) u64x4_permute (x, 0, 2, 1, 3)) << 32;
#elif defined(CLIB_HAVE_VEC128) && defined(__ARM_NEON)
- u16x8 idx8 = u16x8_splat (v);
+ u16x8 v8 = u16x8_splat (v);
u16x8 m = { 1, 2, 4, 8, 16, 32, 64, 128 };
u16x8u *av = (u16x8u *) a;
- /* compare each u16 elemment with idx8, result gives 0xffff in each element
+ /* compare each u16 elemment with v8, result gives 0xffff in each element
of the resulting vector if comparison result is true.
Bitwise AND with m will give us one bit set for true result and offset
of that bit represend element index. Finally vaddvq_u16() gives us sum
of all elements of the vector which will give us u8 bitmap. */
- mask = ((u64) vaddvq_u16 ((av[0] == idx8) & m) |
- (u64) vaddvq_u16 ((av[1] == idx8) & m) << 8 |
- (u64) vaddvq_u16 ((av[2] == idx8) & m) << 16 |
- (u64) vaddvq_u16 ((av[3] == idx8) & m) << 24 |
- (u64) vaddvq_u16 ((av[4] == idx8) & m) << 32 |
- (u64) vaddvq_u16 ((av[5] == idx8) & m) << 40 |
- (u64) vaddvq_u16 ((av[6] == idx8) & m) << 48 |
- (u64) vaddvq_u16 ((av[7] == idx8) & m) << 56);
+ mask = ((u64) vaddvq_u16 ((av[0] == v8) & m) |
+ (u64) vaddvq_u16 ((av[1] == v8) & m) << 8 |
+ (u64) vaddvq_u16 ((av[2] == v8) & m) << 16 |
+ (u64) vaddvq_u16 ((av[3] == v8) & m) << 24 |
+ (u64) vaddvq_u16 ((av[4] == v8) & m) << 32 |
+ (u64) vaddvq_u16 ((av[5] == v8) & m) << 40 |
+ (u64) vaddvq_u16 ((av[6] == v8) & m) << 48 |
+ (u64) vaddvq_u16 ((av[7] == v8) & m) << 56);
#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
- u16x8 idx8 = u16x8_splat (v);
+ u16x8 v8 = u16x8_splat (v);
u16x8u *av = (u16x8u *) a;
- mask =
- ((u64) i8x16_msb_mask (i16x8_pack (idx8 == av[0], idx8 == av[1])) |
- (u64) i8x16_msb_mask (i16x8_pack (idx8 == av[2], idx8 == av[3])) << 16 |
- (u64) i8x16_msb_mask (i16x8_pack (idx8 == av[4], idx8 == av[5])) << 32 |
- (u64) i8x16_msb_mask (i16x8_pack (idx8 == av[6], idx8 == av[7])) << 48);
+ mask = ((u64) i8x16_msb_mask (i8x16_pack (v8 == av[0], v8 == av[1])) |
+ (u64) i8x16_msb_mask (i8x16_pack (v8 == av[2], v8 == av[3])) << 16 |
+ (u64) i8x16_msb_mask (i8x16_pack (v8 == av[4], v8 == av[5])) << 32 |
+ (u64) i8x16_msb_mask (i8x16_pack (v8 == av[6], v8 == av[7])) << 48);
#else
for (int i = 0; i < 64; i++)
if (a[i] == v)