diff options
author | Lijian.Zhang <Lijian.Zhang@arm.com> | 2019-03-12 18:32:39 +0800 |
---|---|---|
committer | Damjan Marion <dmarion@me.com> | 2019-03-20 09:27:20 +0000 |
commit | e6a47cf5576c6e1dbae9e41365bbecfcf128603e (patch) | |
tree | f30b8c426d5830ffe3c85f76432878c7fdffa4ea | |
parent | e225f71766a75364523e6892a709ca95a3e043c1 (diff) |
Re-enable aarch64 neon instruction in vlib_buffer_free_inline
int vaddvq_u8 (uint8x16_t __a) is not appropriate to implement xxx_is_all_zero,
as there may be overflow causing incorrect return value.
Here's an example.
u8x16 x = {0 <repeats 12 times>, 1, 255, 0, 0};
Change-Id: Ia6a10bdf8da360dec12db902d028751a1a77e9a4
Signed-off-by: Lijian Zhang <Lijian.Zhang@arm.com>
Reviewed-by: Sirshak Das <Sirshak.Das@arm.com>
-rw-r--r-- | src/vlib/buffer_funcs.h | 6 | ||||
-rw-r--r-- | src/vppinfra/vector_neon.h | 11 |
2 files changed, 12 insertions, 5 deletions
diff --git a/src/vlib/buffer_funcs.h b/src/vlib/buffer_funcs.h index f2ac0bd92dd..e7fd6126d47 100644 --- a/src/vlib/buffer_funcs.h +++ b/src/vlib/buffer_funcs.h @@ -712,7 +712,7 @@ vlib_buffer_free_inline (vlib_main_t * vm, u32 * buffers, u32 n_buffers, u8 buffer_pool_index = ~0; u32 n_queue = 0, queue[queue_size + 4]; vlib_buffer_t bt = { }; -#if defined(CLIB_HAVE_VEC128) && !__aarch64__ +#if defined(CLIB_HAVE_VEC128) vlib_buffer_t bpi_mask = {.buffer_pool_index = ~0 }; vlib_buffer_t bpi_vec = {.buffer_pool_index = ~0 }; vlib_buffer_t flags_refs_mask = { @@ -737,7 +737,7 @@ vlib_buffer_free_inline (vlib_main_t * vm, u32 * buffers, u32 n_buffers, vlib_prefetch_buffer_header (b[6], LOAD); vlib_prefetch_buffer_header (b[7], LOAD); -#if defined(CLIB_HAVE_VEC128) && !__aarch64__ +#if defined(CLIB_HAVE_VEC128) u8x16 p0, p1, p2, p3, r; p0 = u8x16_load_unaligned (b[0]); p1 = u8x16_load_unaligned (b[1]); @@ -815,7 +815,7 @@ vlib_buffer_free_inline (vlib_main_t * vm, u32 * buffers, u32 n_buffers, } buffer_pool_index = b[0]->buffer_pool_index; -#if defined(CLIB_HAVE_VEC128) && !__aarch64__ +#if defined(CLIB_HAVE_VEC128) bpi_vec.buffer_pool_index = buffer_pool_index; #endif bp = vlib_get_buffer_pool (vm, buffer_pool_index); diff --git a/src/vppinfra/vector_neon.h b/src/vppinfra/vector_neon.h index aef83657321..90451602d2d 100644 --- a/src/vppinfra/vector_neon.h +++ b/src/vppinfra/vector_neon.h @@ -22,6 +22,13 @@ #define i16x8_sub_saturate(a,b) vsubq_s16(a,b) /* Dummy. Aid making uniform macros */ #define vreinterpretq_u8_u8(a) a +/* Implement the missing intrinsics to make uniform macros */ +#define vminvq_u64(x) \ +({ \ + u64 x0 = vgetq_lane_u64(x, 0); \ + u64 x1 = vgetq_lane_u64(x, 1); \ + x0 < x1 ? x0 : x1; \ +}) /* Converts all ones/zeros compare mask to bitmap. */ always_inline u32 @@ -62,11 +69,11 @@ t##s##x##c##_store_unaligned (t##s##x##c v, void *p) \ \ static_always_inline int \ t##s##x##c##_is_all_zero (t##s##x##c x) \ -{ return !(vaddvq_##i (x)); } \ +{ return !!(vminvq_u##s (vceqq_##i (vdupq_n_##i(0), x))); } \ \ static_always_inline int \ t##s##x##c##_is_equal (t##s##x##c a, t##s##x##c b) \ -{ return t##s##x##c##_is_all_zero (a ^ b); } \ +{ return !!(vminvq_u##s (vceqq_##i (a, b))); } \ \ static_always_inline int \ t##s##x##c##_is_all_equal (t##s##x##c v, t##s x) \ |