From e6a47cf5576c6e1dbae9e41365bbecfcf128603e Mon Sep 17 00:00:00 2001 From: "Lijian.Zhang" Date: Tue, 12 Mar 2019 18:32:39 +0800 Subject: Re-enable aarch64 neon instruction in vlib_buffer_free_inline int vaddvq_u8 (uint8x16_t __a) is not appropriate to implement xxx_is_all_zero, as there may be overflow causing incorrect return value. Here's an example. u8x16 x = {0 , 1, 255, 0, 0}; Change-Id: Ia6a10bdf8da360dec12db902d028751a1a77e9a4 Signed-off-by: Lijian Zhang Reviewed-by: Sirshak Das --- src/vppinfra/vector_neon.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'src/vppinfra/vector_neon.h') diff --git a/src/vppinfra/vector_neon.h b/src/vppinfra/vector_neon.h index aef83657321..90451602d2d 100644 --- a/src/vppinfra/vector_neon.h +++ b/src/vppinfra/vector_neon.h @@ -22,6 +22,13 @@ #define i16x8_sub_saturate(a,b) vsubq_s16(a,b) /* Dummy. Aid making uniform macros */ #define vreinterpretq_u8_u8(a) a +/* Implement the missing intrinsics to make uniform macros */ +#define vminvq_u64(x) \ +({ \ + u64 x0 = vgetq_lane_u64(x, 0); \ + u64 x1 = vgetq_lane_u64(x, 1); \ + x0 < x1 ? x0 : x1; \ +}) /* Converts all ones/zeros compare mask to bitmap. */ always_inline u32 @@ -62,11 +69,11 @@ t##s##x##c##_store_unaligned (t##s##x##c v, void *p) \ \ static_always_inline int \ t##s##x##c##_is_all_zero (t##s##x##c x) \ -{ return !(vaddvq_##i (x)); } \ +{ return !!(vminvq_u##s (vceqq_##i (vdupq_n_##i(0), x))); } \ \ static_always_inline int \ t##s##x##c##_is_equal (t##s##x##c a, t##s##x##c b) \ -{ return t##s##x##c##_is_all_zero (a ^ b); } \ +{ return !!(vminvq_u##s (vceqq_##i (a, b))); } \ \ static_always_inline int \ t##s##x##c##_is_all_equal (t##s##x##c v, t##s x) \ -- cgit 1.2.3-korg