summaryrefslogtreecommitdiffstats
path: root/src/vppinfra/vector_neon.h
diff options
context:
space:
mode:
authorGabriel Ganne <gabriel.ganne@enea.com>2017-12-05 17:33:37 +0100
committerDamjan Marion <dmarion.lists@gmail.com>2018-02-08 20:32:31 +0000
commitb81831d122d59b55c7d42b39e4fd9e94b7d40aca (patch)
tree2427cad202283105a19387ab5f249f7be07acc97 /src/vppinfra/vector_neon.h
parentc43b3f986476ffb4506b7115898e809a6e34f601 (diff)
add CLIB_HAVE_VEC128 with NEON intrinsics (VPP-1127)
Enable CLIB_HAVE_VEC128 if both aarch64 and __ARM_NEON ie. armv8 only, not armv7 Add more neon compare intrinsics wrappers. I only add simple intrinsics wrappers. More complex ones can be added later as they are needed, with performance tests on the corresponding feature to back them up. Remove wrongly added 128bits definitions defined on both armv7 and armv8 without concern for NEON instructions presence. Notable correspondinf code activations: * MHEAP_FLAG_SMALL_OBJECT_CACHE in mheap.c * ip4 fib mtrie leaves access * enable ixge plugin compilation for aarch64 (conf still disables it by default) Change-Id: I99953823627bdff6f222d232c78aa7b655aaf77a Signed-off-by: Gabriel Ganne <gabriel.ganne@enea.com>
Diffstat (limited to 'src/vppinfra/vector_neon.h')
-rw-r--r--src/vppinfra/vector_neon.h60
1 files changed, 60 insertions, 0 deletions
diff --git a/src/vppinfra/vector_neon.h b/src/vppinfra/vector_neon.h
index cea5275949f..6357d68fabb 100644
--- a/src/vppinfra/vector_neon.h
+++ b/src/vppinfra/vector_neon.h
@@ -31,8 +31,68 @@
#define u16x8_sub_saturate(a,b) vsubq_u16(a,b)
#define i16x8_sub_saturate(a,b) vsubq_s16(a,b)
+
+/* Compare operations. */
+#define u8x16_is_equal(a,b) vceqq_u8(a,b)
+#define i8x16_is_equal(a,b) vceqq_s8(a,b)
#define u16x8_is_equal(a,b) vceqq_u16(a,b)
#define i16x8_is_equal(a,b) vceqq_i16(a,b)
+#define u32x4_is_equal(a,b) vceqq_u32(a,b)
+#define i32x4_is_equal(a,b) vceqq_s32(a,b)
+#define i8x16_is_greater(a,b) vcgtq_s8(a,b)
+#define i16x8_is_greater(a,b) vcgtq_u8(a,b)
+#define i32x4_is_greater(a,b) vcgtq_s32(a,b)
+
+always_inline u8x16
+u8x16_is_zero (u8x16 x)
+{
+ u8x16 zero = { 0 };
+ return u8x16_is_equal (x, zero);
+}
+
+always_inline u16x8
+u16x8_is_zero (u16x8 x)
+{
+ u16x8 zero = { 0 };
+ return u16x8_is_equal (x, zero);
+}
+
+always_inline u32x4
+u32x4_is_zero (u32x4 x)
+{
+ u32x4 zero = { 0 };
+ return u32x4_is_equal (x, zero);
+}
+
+/* Converts all ones/zeros compare mask to bitmap. */
+always_inline u32
+u8x16_compare_byte_mask (u8x16 x)
+{
+ static int8_t const __attribute__ ((aligned (16))) xr[8] =
+ {
+ -7, -6, -5, -4, -3, -2, -1, 0};
+ uint8x8_t mask_and = vdup_n_u8 (0x80);
+ int8x8_t mask_shift = vld1_s8 (xr);
+
+ uint8x8_t lo = vget_low_u8 (x);
+ uint8x8_t hi = vget_high_u8 (x);
+
+ lo = vand_u8 (lo, mask_and);
+ lo = vshl_u8 (lo, mask_shift);
+
+ hi = vand_u8 (hi, mask_and);
+ hi = vshl_u8 (hi, mask_shift);
+
+ lo = vpadd_u8 (lo, lo);
+ lo = vpadd_u8 (lo, lo);
+ lo = vpadd_u8 (lo, lo);
+
+ hi = vpadd_u8 (hi, hi);
+ hi = vpadd_u8 (hi, hi);
+ hi = vpadd_u8 (hi, hi);
+
+ return ((hi[0] << 8) | (lo[0] & 0xff));
+}
always_inline u32
u16x8_zero_byte_mask (u16x8 input)