summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorDamjan Marion <damarion@cisco.com>2021-12-02 13:02:38 +0100
committerOle Tr�an <otroan@employees.org>2021-12-02 13:46:23 +0000
commitefd6de87d3268f1ab499799cefb2e8b32a613f79 (patch)
tree670d14aa4316031eb84ba101529ebc4e662ec2b7 /src
parent9ab2e5d8d711e678e914e4fb8d3d181f868acf3d (diff)
vppinfra: vector shuffle cleanup
Type: refactor Change-Id: I8b3fc2ce30df313467274a174c5ac6adbf296153 Signed-off-by: Damjan Marion <damarion@cisco.com>
Diffstat (limited to 'src')
-rw-r--r--src/examples/sample-plugin/sample/node.c35
-rw-r--r--src/vppinfra/vector.h62
-rw-r--r--src/vppinfra/vector_avx2.h6
-rw-r--r--src/vppinfra/vector_avx512.h6
-rw-r--r--src/vppinfra/vector_neon.h6
-rw-r--r--src/vppinfra/vector_sse42.h18
6 files changed, 80 insertions, 53 deletions
diff --git a/src/examples/sample-plugin/sample/node.c b/src/examples/sample-plugin/sample/node.c
index a31c3e86e08..a9d8b66d788 100644
--- a/src/examples/sample-plugin/sample/node.c
+++ b/src/examples/sample-plugin/sample/node.c
@@ -291,7 +291,6 @@ VLIB_NODE_FN (sample_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
sample_next_t next_index;
u32 pkts_swapped = 0;
/* Vector shuffle mask to swap src, dst */
- u8x16 swapmac = { 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5, 12, 13, 14, 15 };
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
@@ -345,8 +344,10 @@ VLIB_NODE_FN (sample_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
src_dst0 = ((u8x16 *) en0)[0];
src_dst1 = ((u8x16 *) en1)[0];
- src_dst0 = u8x16_shuffle (src_dst0, swapmac);
- src_dst1 = u8x16_shuffle (src_dst1, swapmac);
+ src_dst0 = u8x16_shuffle (src_dst0, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3,
+ 4, 5, 12, 13, 14, 15);
+ src_dst1 = u8x16_shuffle (src_dst1, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3,
+ 4, 5, 12, 13, 14, 15);
((u8x16 *) en0)[0] = src_dst0;
((u8x16 *) en1)[0] = src_dst1;
@@ -418,7 +419,8 @@ VLIB_NODE_FN (sample_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
en0 = vlib_buffer_get_current (b0);
src_dst0 = ((u8x16 *) en0)[0];
- src_dst0 = u8x16_shuffle (src_dst0, swapmac);
+ src_dst0 = u8x16_shuffle (src_dst0, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3,
+ 4, 5, 12, 13, 14, 15);
((u8x16 *) en0)[0] = src_dst0;
sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
@@ -469,7 +471,6 @@ VLIB_NODE_FN (sample_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
#ifdef VERSION_3
-#define u8x16_shuffle __builtin_shuffle
/* This would normally be a stack local, but since it's a constant... */
static const u16 nexts[VLIB_FRAME_SIZE] = { 0 };
@@ -479,7 +480,6 @@ VLIB_NODE_FN (sample_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
u32 n_left_from, *from;
u32 pkts_swapped = 0;
/* Vector shuffle mask to swap src, dst */
- u8x16 swapmac = { 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5, 12, 13, 14, 15 };
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
/* See comment below about sending all pkts to the same place... */
u16 *next __attribute__ ((unused));
@@ -518,10 +518,14 @@ VLIB_NODE_FN (sample_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
src_dst2 = ((u8x16 *) vlib_buffer_get_current (b[2]))[0];
src_dst3 = ((u8x16 *) vlib_buffer_get_current (b[3]))[0];
- src_dst0 = u8x16_shuffle (src_dst0, swapmac);
- src_dst1 = u8x16_shuffle (src_dst1, swapmac);
- src_dst2 = u8x16_shuffle (src_dst2, swapmac);
- src_dst3 = u8x16_shuffle (src_dst3, swapmac);
+ src_dst0 = u8x16_shuffle (src_dst0, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5,
+ 12, 13, 14, 15);
+ src_dst1 = u8x16_shuffle (src_dst1, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5,
+ 12, 13, 14, 15);
+ src_dst2 = u8x16_shuffle (src_dst2, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5,
+ 12, 13, 14, 15);
+ src_dst3 = u8x16_shuffle (src_dst3, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5,
+ 12, 13, 14, 15);
((u8x16 *) vlib_buffer_get_current (b[0]))[0] = src_dst0;
((u8x16 *) vlib_buffer_get_current (b[1]))[0] = src_dst1;
@@ -552,7 +556,8 @@ VLIB_NODE_FN (sample_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
{
u8x16 src_dst0;
src_dst0 = ((u8x16 *) vlib_buffer_get_current (b[0]))[0];
- src_dst0 = u8x16_shuffle (src_dst0, swapmac);
+ src_dst0 = u8x16_shuffle (src_dst0, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5,
+ 12, 13, 14, 15);
((u8x16 *) vlib_buffer_get_current (b[0]))[0] = src_dst0;
vnet_buffer (b[0])->sw_if_index[VLIB_TX] =
vnet_buffer (b[0])->sw_if_index[VLIB_RX];
@@ -611,18 +616,14 @@ VLIB_NODE_FN (sample_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
#ifdef VERSION_4
-#define u8x16_shuffle __builtin_shuffle
-
-static u8x16 swapmac =
- { 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5, 12, 13, 14, 15 };
-
/* Final stage in the pipeline, do the mac swap */
static inline u32
last_stage (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_buffer_t * b)
{
u8x16 src_dst0;
src_dst0 = ((u8x16 *) vlib_buffer_get_current (b))[0];
- src_dst0 = u8x16_shuffle (src_dst0, swapmac);
+ src_dst0 = u8x16_shuffle (src_dst0, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5, 12,
+ 13, 14, 15);
((u8x16 *) vlib_buffer_get_current (b))[0] = src_dst0;
vnet_buffer (b)->sw_if_index[VLIB_TX] =
vnet_buffer (b)->sw_if_index[VLIB_RX];
diff --git a/src/vppinfra/vector.h b/src/vppinfra/vector.h
index 88cf288cb26..49bc2976b3e 100644
--- a/src/vppinfra/vector.h
+++ b/src/vppinfra/vector.h
@@ -126,6 +126,68 @@ foreach_vec
#undef _vector_size
+ /* _shuffle and _shuffle2 */
+#if defined(__GNUC__) && !defined(__clang__)
+#define __builtin_shufflevector(v1, v2, ...) \
+ __builtin_shuffle ((v1), (v2), (__typeof__ (v1)){ __VA_ARGS__ })
+#endif
+
+#define u8x16_shuffle(v1, ...) \
+ (u8x16) __builtin_shufflevector ((u8x16) (v1), (u8x16) (v1), __VA_ARGS__)
+#define u8x32_shuffle(v1, ...) \
+ (u8x32) __builtin_shufflevector ((u8x32) (v1), (u8x32) (v1), __VA_ARGS__)
+#define u8x64_shuffle(v1, ...) \
+ (u8x64) __builtin_shufflevector ((u8x64) (v1), (u8x64) (v1), __VA_ARGS__)
+
+#define u16x8_shuffle(v1, ...) \
+ (u16x8) __builtin_shufflevector ((u16x8) (v1), (u16x8) (v1), __VA_ARGS__)
+#define u16x16_shuffle(v1, ...) \
+ (u16x16) __builtin_shufflevector ((u16x16) (v1), (u16x16) (v1), __VA_ARGS__)
+#define u16x32_shuffle(v1, ...) \
+ (u16u32) __builtin_shufflevector ((u16x32) (v1), (u16x32) (v1), __VA_ARGS__);
+
+#define u32x4_shuffle(v1, ...) \
+ (u32x4) __builtin_shufflevector ((u32x4) (v1), (u32x4) (v1), __VA_ARGS__)
+#define u32x8_shuffle(v1, ...) \
+ (u32x8) __builtin_shufflevector ((u32x8) (v1), (u32x8) (v1), __VA_ARGS__)
+#define u32x16_shuffle(v1, ...) \
+ (u32x16) __builtin_shufflevector ((u32x16) (v1), (u32x16) (v1), __VA_ARGS__)
+
+#define u64x2_shuffle(v1, ...) \
+ (u64x2) __builtin_shufflevector ((u64x2) (v1), (u64x2) (v1), __VA_ARGS__)
+#define u64x4_shuffle(v1, ...) \
+ (u64x4) __builtin_shufflevector ((u64x4) (v1), (u64x4) (v1), __VA_ARGS__)
+#define u64x8_shuffle(v1, ...) \
+ (u64x8) __builtin_shufflevector ((u64x8) (v1), (u64x8) (v1), __VA_ARGS__)
+
+#define u8x16_shuffle2(v1, v2, ...) \
+ (u8x16) __builtin_shufflevector ((u8x16) (v1), (u8x16) (v2), __VA_ARGS__)
+#define u8x32_shuffle2(v1, v2, ...) \
+ (u8x32) __builtin_shufflevector ((u8x32) (v1), (u8x32) (v2), __VA_ARGS__)
+#define u8x64_shuffle2(v1, v2, ...) \
+ (u8x64) __builtin_shufflevector ((u8x64) (v1), (u8x64) (v2), __VA_ARGS__)
+
+#define u16x8_shuffle2(v1, v2, ...) \
+ (u16x8) __builtin_shufflevector ((u16x8) (v1), (u16x8) (v2), __VA_ARGS__)
+#define u16x16_shuffle2(v1, v2, ...) \
+ (u16x16) __builtin_shufflevector ((u16x16) (v1), (u16x16) (v2), __VA_ARGS__)
+#define u16x32_shuffle2(v1, v2, ...) \
+ (u16u32) __builtin_shufflevector ((u16x32) (v1), (u16x32) (v2), __VA_ARGS__);
+
+#define u32x4_shuffle2(v1, v2, ...) \
+ (u32x4) __builtin_shufflevector ((u32x4) (v1), (u32x4) (v2), __VA_ARGS__)
+#define u32x8_shuffle2(v1, v2, ...) \
+ (u32x8) __builtin_shufflevector ((u32x8) (v1), (u32x8) (v2), __VA_ARGS__)
+#define u32x16_shuffle2(v1, v2, ...) \
+ (u32x16) __builtin_shufflevector ((u32x16) (v1), (u32x16) (v2), __VA_ARGS__)
+
+#define u64x2_shuffle2(v1, v2, ...) \
+ (u64x2) __builtin_shufflevector ((u64x2) (v1), (u64x2) (v2), __VA_ARGS__)
+#define u64x4_shuffle2(v1, v2, ...) \
+ (u64x4) __builtin_shufflevector ((u64x4) (v1), (u64x4) (v2), __VA_ARGS__)
+#define u64x8_shuffle2(v1, v2, ...) \
+ (u64x8) __builtin_shufflevector ((u64x8) (v1), (u64x8) (v2), __VA_ARGS__)
+
#define VECTOR_WORD_TYPE(t) t##x
#define VECTOR_WORD_TYPE_LEN(t) (sizeof (VECTOR_WORD_TYPE(t)) / sizeof (t))
diff --git a/src/vppinfra/vector_avx2.h b/src/vppinfra/vector_avx2.h
index 7226c230e68..59857182a93 100644
--- a/src/vppinfra/vector_avx2.h
+++ b/src/vppinfra/vector_avx2.h
@@ -183,12 +183,6 @@ u16x16_byte_swap (u16x16 v)
return (u16x16) _mm256_shuffle_epi8 ((__m256i) v, (__m256i) swap);
}
-static_always_inline u8x32
-u8x32_shuffle (u8x32 v, u8x32 m)
-{
- return (u8x32) _mm256_shuffle_epi8 ((__m256i) v, (__m256i) m);
-}
-
#define u8x32_align_right(a, b, imm) \
(u8x32) _mm256_alignr_epi8 ((__m256i) a, (__m256i) b, imm)
diff --git a/src/vppinfra/vector_avx512.h b/src/vppinfra/vector_avx512.h
index 8acac2a3a9f..33f40ef7b5a 100644
--- a/src/vppinfra/vector_avx512.h
+++ b/src/vppinfra/vector_avx512.h
@@ -196,12 +196,6 @@ u8x64_reflect_u8x16 (u8x64 x)
return (u8x64) _mm512_shuffle_epi8 ((__m512i) x, (__m512i) mask);
}
-static_always_inline u8x64
-u8x64_shuffle (u8x64 v, u8x64 m)
-{
- return (u8x64) _mm512_shuffle_epi8 ((__m512i) v, (__m512i) m);
-}
-
#define u8x64_align_right(a, b, imm) \
(u8x64) _mm512_alignr_epi8 ((__m512i) a, (__m512i) b, imm)
diff --git a/src/vppinfra/vector_neon.h b/src/vppinfra/vector_neon.h
index 80d7bda9f3c..e7b31259f73 100644
--- a/src/vppinfra/vector_neon.h
+++ b/src/vppinfra/vector_neon.h
@@ -129,12 +129,6 @@ u32x4_byte_swap (u32x4 v)
return (u32x4) vrev32q_u8 ((u8x16) v);
}
-static_always_inline u8x16
-u8x16_shuffle (u8x16 v, u8x16 m)
-{
- return (u8x16) vqtbl1q_u8 (v, m);
-}
-
static_always_inline u32x4
u32x4_hadd (u32x4 v1, u32x4 v2)
{
diff --git a/src/vppinfra/vector_sse42.h b/src/vppinfra/vector_sse42.h
index 7e75ad28710..35495d6658f 100644
--- a/src/vppinfra/vector_sse42.h
+++ b/src/vppinfra/vector_sse42.h
@@ -411,24 +411,6 @@ u32x4_sum_elts (u32x4 sum4)
return sum4[0];
}
-static_always_inline u8x16
-u8x16_shuffle (u8x16 v, u8x16 m)
-{
- return (u8x16) _mm_shuffle_epi8 ((__m128i) v, (__m128i) m);
-}
-
-static_always_inline u32x4
-u32x4_shuffle (u32x4 v, const int a, const int b, const int c, const int d)
-{
-#if defined(__clang__) || !__OPTIMIZE__
- u32x4 r = { v[a], v[b], v[c], v[d] };
- return r;
-#else
- return (u32x4) _mm_shuffle_epi32 ((__m128i) v,
- a | b << 2 | c << 4 | d << 6);
-#endif
-}
-
/* _from_ */
/* *INDENT-OFF* */
#define _(f,t,i) \