summaryrefslogtreecommitdiffstats
path: root/src/vppinfra
diff options
context:
space:
mode:
authorLijian.Zhang <Lijian.Zhang@arm.com>2019-03-12 18:32:39 +0800
committerDamjan Marion <dmarion@me.com>2019-03-20 09:27:20 +0000
commite6a47cf5576c6e1dbae9e41365bbecfcf128603e (patch)
treef30b8c426d5830ffe3c85f76432878c7fdffa4ea /src/vppinfra
parente225f71766a75364523e6892a709ca95a3e043c1 (diff)
Re-enable aarch64 neon instruction in vlib_buffer_free_inline
int vaddvq_u8 (uint8x16_t __a) is not appropriate to implement xxx_is_all_zero, as there may be overflow causing incorrect return value. Here's an example. u8x16 x = {0 <repeats 12 times>, 1, 255, 0, 0}; Change-Id: Ia6a10bdf8da360dec12db902d028751a1a77e9a4 Signed-off-by: Lijian Zhang <Lijian.Zhang@arm.com> Reviewed-by: Sirshak Das <Sirshak.Das@arm.com>
Diffstat (limited to 'src/vppinfra')
-rw-r--r--src/vppinfra/vector_neon.h11
1 files changed, 9 insertions, 2 deletions
diff --git a/src/vppinfra/vector_neon.h b/src/vppinfra/vector_neon.h
index aef83657321..90451602d2d 100644
--- a/src/vppinfra/vector_neon.h
+++ b/src/vppinfra/vector_neon.h
@@ -22,6 +22,13 @@
#define i16x8_sub_saturate(a,b) vsubq_s16(a,b)
/* Dummy. Aid making uniform macros */
#define vreinterpretq_u8_u8(a) a
+/* Implement the missing intrinsics to make uniform macros */
+#define vminvq_u64(x) \
+({ \
+ u64 x0 = vgetq_lane_u64(x, 0); \
+ u64 x1 = vgetq_lane_u64(x, 1); \
+ x0 < x1 ? x0 : x1; \
+})
/* Converts all ones/zeros compare mask to bitmap. */
always_inline u32
@@ -62,11 +69,11 @@ t##s##x##c##_store_unaligned (t##s##x##c v, void *p) \
\
static_always_inline int \
t##s##x##c##_is_all_zero (t##s##x##c x) \
-{ return !(vaddvq_##i (x)); } \
+{ return !!(vminvq_u##s (vceqq_##i (vdupq_n_##i(0), x))); } \
\
static_always_inline int \
t##s##x##c##_is_equal (t##s##x##c a, t##s##x##c b) \
-{ return t##s##x##c##_is_all_zero (a ^ b); } \
+{ return !!(vminvq_u##s (vceqq_##i (a, b))); } \
\
static_always_inline int \
t##s##x##c##_is_all_equal (t##s##x##c v, t##s x) \