summaryrefslogtreecommitdiffstats
path: root/src/vlib
diff options
context:
space:
mode:
authorSirshak Das <sirshak.das@arm.com>2018-08-22 08:46:52 +0800
committerDamjan Marion <dmarion@me.com>2018-09-12 10:09:50 +0000
commit759226e6687a2c20321aa842c60c7ec6ee5cb2ed (patch)
tree9bb0c40c5fa3afe28f1fe8a12aa6ad838698430e /src/vlib
parentdbecf18b8ad2ef1a35b73c6a0f8f5d4c471d195f (diff)
Add and enable u32x4_extend_to_u64x2_high for aarch64 NEON intrinsics.
This is the high version of extendto. This function accomplishes the same task as both shuffling and extending done by SSE intrinsics. This enables the NEON version for buffer indexes to buffer pointer translation. Change-Id: I52d7bbf3d76ba69c9acb0e518ff4bc6abf3bbbd4 Signed-off-by: Sirshak Das <sirshak.das@arm.com> Reviewed-by: Steve Capper <steve.capper@arm.com> Reviewed-by: Yi He <yi.he@arm.com> Verified-by: Lijian Zhang <lijian.zhang@arm.com>
Diffstat (limited to 'src/vlib')
-rw-r--r--src/vlib/buffer_funcs.h6
1 files changed, 5 insertions, 1 deletions
diff --git a/src/vlib/buffer_funcs.h b/src/vlib/buffer_funcs.h
index 667063cd693..5306af6e218 100644
--- a/src/vlib/buffer_funcs.h
+++ b/src/vlib/buffer_funcs.h
@@ -97,12 +97,16 @@ vlib_get_buffers_with_offset (vlib_main_t * vm, u32 * bi, void **b, int count,
u64x4 b0 = u32x4_extend_to_u64x4 (u32x4_load_unaligned (bi));
/* shift and add to get vlib_buffer_t pointer */
u64x4_store_unaligned ((b0 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b);
-#elif defined (CLIB_HAVE_VEC128) && defined (__x86_64__)
+#elif defined (CLIB_HAVE_VEC128)
u64x2 off = u64x2_splat (buffer_main.buffer_mem_start + offset);
u32x4 bi4 = u32x4_load_unaligned (bi);
u64x2 b0 = u32x4_extend_to_u64x2 ((u32x4) bi4);
+#if defined (__aarch64__)
+ u64x2 b1 = u32x4_extend_to_u64x2_high ((u32x4) bi4);
+#else
bi4 = u32x4_shuffle (bi4, 2, 3, 0, 1);
u64x2 b1 = u32x4_extend_to_u64x2 ((u32x4) bi4);
+#endif
u64x2_store_unaligned ((b0 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b);
u64x2_store_unaligned ((b1 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b + 2);
#else