diff options
author | Damjan Marion <damarion@cisco.com> | 2018-02-20 12:34:40 +0100 |
---|---|---|
committer | Dave Barach <openvpp@barachs.net> | 2018-02-20 21:11:06 +0000 |
commit | 6525c7f9022fbed15c519872833097eb8607118b (patch) | |
tree | c9b1632a16ae1f724d48b1a5bdb2aefab8634e9d | |
parent | ad099335c1c2de17fb0b3b29b008ef8623ee93c0 (diff) |
vppinfra: autogerate vector typedefs and basic inline functions
Change-Id: Ie9f611fa6a962b0937245f5cc949571ba11c5604
Signed-off-by: Damjan Marion <damarion@cisco.com>
-rw-r--r-- | src/vppinfra/vector.h | 222 | ||||
-rw-r--r-- | src/vppinfra/vector_altivec.h | 27 | ||||
-rw-r--r-- | src/vppinfra/vector_iwmmxt.h | 27 | ||||
-rw-r--r-- | src/vppinfra/vector_neon.h | 43 | ||||
-rw-r--r-- | src/vppinfra/vector_sse42.h | 143 |
5 files changed, 83 insertions, 379 deletions
diff --git a/src/vppinfra/vector.h b/src/vppinfra/vector.h index 13d7ba27031..3fdffc469bd 100644 --- a/src/vppinfra/vector.h +++ b/src/vppinfra/vector.h @@ -73,63 +73,50 @@ #define _vector_size(n) __attribute__ ((vector_size (n))) -/* Signed 64 bit. */ -typedef char i8x8 _vector_size (8); -typedef short i16x4 _vector_size (8); -typedef int i32x2 _vector_size (8); - -/* Unsigned 64 bit. */ -typedef unsigned char u8x8 _vector_size (8); -typedef unsigned short u16x4 _vector_size (8); -typedef unsigned int u32x2 _vector_size (8); - -/* Floating point 64 bit. */ -typedef float f32x2 _vector_size (8); - -/* Signed 128 bit. */ -typedef i8 i8x16 _vector_size (16); -typedef i16 i16x8 _vector_size (16); -typedef i32 i32x4 _vector_size (16); -typedef long long i64x2 _vector_size (16); - -/* Unsigned 128 bit. */ -typedef u8 u8x16 _vector_size (16); -typedef u16 u16x8 _vector_size (16); -typedef u32 u32x4 _vector_size (16); -typedef u64 u64x2 _vector_size (16); - -typedef f32 f32x4 _vector_size (16); -typedef f64 f64x2 _vector_size (16); - -/* Signed 256 bit. */ -typedef i8 i8x32 _vector_size (32); -typedef i16 i16x16 _vector_size (32); -typedef i32 i32x8 _vector_size (32); -typedef long long i64x4 _vector_size (32); - -/* Unsigned 256 bit. */ -typedef u8 u8x32 _vector_size (32); -typedef u16 u16x16 _vector_size (32); -typedef u32 u32x8 _vector_size (32); -typedef u64 u64x4 _vector_size (32); - -typedef f32 f32x8 _vector_size (32); -typedef f64 f64x4 _vector_size (32); - -/* Signed 512 bit. */ -typedef i8 i8x64 _vector_size (64); -typedef i16 i16x32 _vector_size (64); -typedef i32 i32x16 _vector_size (64); -typedef long long i64x8 _vector_size (64); - -/* Unsigned 512 bit. */ -typedef u8 u8x64 _vector_size (64); -typedef u16 u16x32 _vector_size (64); -typedef u32 u32x16 _vector_size (64); -typedef u64 u64x8 _vector_size (64); - -typedef f32 f32x16 _vector_size (64); -typedef f64 f64x8 _vector_size (64); +#define foreach_vec64i _(i,8,8) _(i,16,4) _(i,32,2) +#define foreach_vec64u _(u,8,8) _(u,16,4) _(u,32,2) +#define foreach_vec64f _(f,32,2) +#define foreach_vec128i _(i,8,16) _(i,16,8) _(i,32,4) _(i,64,2) +#define foreach_vec128u _(u,8,16) _(u,16,8) _(u,32,4) _(u,64,2) +#define foreach_vec128f _(f,32,4) _(f,64,2) +#define foreach_vec256i _(i,8,32) _(i,16,16) _(i,32,8) _(i,64,4) +#define foreach_vec256u _(u,8,32) _(u,16,16) _(u,32,8) _(u,64,4) +#define foreach_vec256f _(f,32,8) _(f,64,4) +#define foreach_vec512i _(i,8,64) _(i,16,32) _(i,32,16) _(i,64,8) +#define foreach_vec512u _(u,8,64) _(u,16,32) _(u,32,16) _(u,64,8) +#define foreach_vec512f _(f,32,16) _(f,64,8) + +#if defined (CLIB_HAVE_VEC512) +#define foreach_int_vec foreach_vec64i foreach_vec128i foreach_vec256i foreach_vec512i +#define foreach_uint_vec foreach_vec64u foreach_vec128u foreach_vec256u foreach_vec512u +#define foreach_float_vec foreach_vec64f foreach_vec128f foreach_vec256f foreach_vec512f +#elif defined (CLIB_HAVE_VEC256) +#define foreach_int_vec foreach_vec64i foreach_vec128i foreach_vec256i +#define foreach_uint_vec foreach_vec64u foreach_vec128u foreach_vec256u +#define foreach_float_vec foreach_vec64f foreach_vec128f foreach_vec256f +#else +#define foreach_int_vec foreach_vec64i foreach_vec128i +#define foreach_uint_vec foreach_vec64u foreach_vec128u +#define foreach_float_vec foreach_vec64f foreach_vec128f +#endif + +#define foreach_vec foreach_int_vec foreach_uint_vec foreach_float_vec + +/* *INDENT-OFF* */ + +/* Type Definitions */ +#define _(t,s,c) \ +typedef t##s t##s##x##c _vector_size (s/8*c); \ +typedef union { \ + t##s##x##c as_##t##s##x##c; \ + t##s as_##t##s[c]; \ +} t##s##x##c##_union_t; + + foreach_vec64i foreach_vec64u foreach_vec64f + foreach_vec128i foreach_vec128u foreach_vec128f + foreach_vec256i foreach_vec256u foreach_vec256f + foreach_vec512i foreach_vec512u foreach_vec512f +#undef _ /* Vector word sized types. */ #ifndef CLIB_VECTOR_WORD_BITS @@ -167,91 +154,47 @@ typedef u64 u64x _vector_size (8); #define VECTOR_WORD_TYPE(t) t##x #define VECTOR_WORD_TYPE_LEN(t) (sizeof (VECTOR_WORD_TYPE(t)) / sizeof (t)) -/* Union types. */ -#if (defined(CLIB_HAVE_VEC128) || defined(CLIB_HAVE_VEC64)) - -#define _(t) \ - typedef union { \ - t##x as_##t##x; \ - t as_##t[VECTOR_WORD_TYPE_LEN (t)]; \ - } t##x##_union_t; - -_(u8); -_(u16); -_(u32); -_(u64); -_(i8); -_(i16); -_(i32); -_(i64); - +/* this series of macros generate _is_equal, _is_greater, _is_zero, _add + and _sub inline funcitons for each vector type */ +#define _(t, s, c) \ + static_always_inline t##s##x##c \ +t##s##x##c##_is_equal (t##s##x##c v1, t##s##x##c v2) \ +{ return (v1 == v2); } \ + \ +static_always_inline t##s##x##c \ +t##s##x##c##_is_greater (t##s##x##c v1, t##s##x##c v2) \ +{ return (v1 > v2); } \ + \ +static_always_inline t##s##x##c \ +t##s##x##c##_is_zero (t##s##x##c v1) \ +{ t##s##x##c z = {0}; return (v1 == z); } \ + \ +static_always_inline t##s##x##c \ +t##s##x##c##_add (t##s##x##c v1, t##s##x##c v2) \ +{ return (v1 + v2); } \ + \ +static_always_inline t##s##x##c \ +t##s##x##c##_sub (t##s##x##c v1, t##s##x##c v2) \ +{ return (v1 - v2); } + foreach_vec #undef _ -#endif - -#ifdef CLIB_HAVE_VEC64 - -#define _(t,n) \ - typedef union { \ - t##x##n as_##t##x##n; \ - t as_##t[n]; \ - } t##x##n##_union_t; \ - -_(u8, 8); -_(u16, 4); -_(u32, 2); -_(i8, 8); -_(i16, 4); -_(i32, 2); - +/* this macro generate _splat inline funcitons for each scalar vector type */ +#define _(t, s, c) \ + static_always_inline t##s##x##c \ +t##s##x##c##_splat (t##s x) \ +{ \ + t##s##x##c r; \ + int i; \ + \ + for (i = 0; i < c; i++) \ + r[i] = x; \ + \ + return r; \ +} + foreach_int_vec foreach_uint_vec #undef _ -#endif - -#ifdef CLIB_HAVE_VEC128 - -#define _(t,n) \ - typedef union { \ - t##x##n as_##t##x##n; \ - t as_##t[n]; \ - } t##x##n##_union_t; \ - -_(u8, 16); -_(u16, 8); -_(u32, 4); -_(u64, 2); -_(i8, 16); -_(i16, 8); -_(i32, 4); -_(i64, 2); -_(f32, 4); -_(f64, 2); - -#undef _ - -#endif - -/* When we don't have vector types, still define e.g. u32x4_union_t but as an array. */ -#if !defined(CLIB_HAVE_VEC128) && !defined(CLIB_HAVE_VEC64) - -#define _(t,n) \ - typedef union { \ - t as_##t[n]; \ - } t##x##n##_union_t; \ - -_(u8, 16); -_(u16, 8); -_(u32, 4); -_(u64, 2); -_(i8, 16); -_(i16, 8); -_(i32, 4); -_(i64, 2); - -#undef _ - -#endif - #if defined (__SSE4_2__) && __GNUC__ >= 4 #include <vppinfra/vector_sse42.h> #endif @@ -272,8 +215,9 @@ _(i64, 2); #include <vppinfra/vector_funcs.h> #endif -#endif /* included_clib_vector_h */ +/* *INDENT-ON* */ +#endif /* included_clib_vector_h */ /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vppinfra/vector_altivec.h b/src/vppinfra/vector_altivec.h index 0e9de820ed8..c9f7facca30 100644 --- a/src/vppinfra/vector_altivec.h +++ b/src/vppinfra/vector_altivec.h @@ -38,26 +38,6 @@ #ifndef included_vector_altivec_h #define included_vector_altivec_h -/* Splats. */ -#define _(t,n,ti,fi,tr,fr) \ - always_inline t##x##n t##x##n##_splat (t v) \ - { return (t##x##n) __builtin_altivec_##fi ((ti) v); } \ - \ - always_inline t##x##n t##x##n##_splat_word (t##x##n x, int word_index) \ - { return (t##x##n) __builtin_altivec_##fr ((tr) x, word_index); } - -#define u16x8_splat(i) ((u16x8) __builtin_altivec_vspltish (i)) -#define i16x8_splat(i) ((i16x8) __builtin_altivec_vspltish (i)) -#define u32x4_splat(i) ((u32x4) __builtin_altivec_vspltisw (i)) -#define i32x4_splat(i) ((i32x4) __builtin_altivec_vspltisw (i)) - -#define u16x8_splat_word(x,i) ((u16x8) __builtin_altivec_vsplth ((i16x8) (x), (i))) -#define i16x8_splat_word(x,i) ((i16x8) __builtin_altivec_vsplth ((i16x8) (x), (i))) -#define u32x4_splat_word(x,i) ((u32x4) __builtin_altivec_vspltw ((i32x4) (x), (i))) -#define i32x4_splat_word(x,i) ((i32x4) __builtin_altivec_vspltw ((i32x4) (x), (i))) - -#undef _ - /* 128 bit shifts. */ #define _(t,ti,lr,f) \ always_inline t t##_##lr (t x, t y) \ @@ -149,13 +129,6 @@ _signed_binop (32, 4, is_equal, vcmpequw) return u16x8_is_equal (x, zero); } -always_inline u32x4 -u32x4_is_zero (u32x4 x) -{ - u32x4 zero = { 0 }; - return u32x4_is_equal (x, zero); -} - always_inline u32 u32x4_zero_byte_mask (u32x4 x) { diff --git a/src/vppinfra/vector_iwmmxt.h b/src/vppinfra/vector_iwmmxt.h index 8e662045655..6fecb1ab71b 100644 --- a/src/vppinfra/vector_iwmmxt.h +++ b/src/vppinfra/vector_iwmmxt.h @@ -77,33 +77,6 @@ u32x2_interleave_lo (u32x2 a, u32x2 b) return __builtin_arm_wunpckilw (a, b); } -always_inline u32x2 -u32x2_splat (u32 a) -{ - u32x2 x = { a }; - x = u32x2_interleave_lo (x, x); - return x; -} - -always_inline u16x4 -u16x4_splat (u16 a) -{ - u32 t = (u32) a | ((u32) a << 16); - return u32x2_splat (t); -} - -always_inline u8x8 -u8x8_splat (u8 a) -{ - u32 t = (u32) a | ((u32) a << 8); - t |= t << 16; - return u32x2_splat (t); -} - -#define i32x2_splat u32x2_splat -#define i16x4_splat u16x4_splat -#define i8x8_splat u8x8_splat - /* 64 bit shifts. */ /* As of July 2008 the __builtin_arm shifts cause gcc-4.3.1 to crash diff --git a/src/vppinfra/vector_neon.h b/src/vppinfra/vector_neon.h index 6357d68fabb..6a3e64e946d 100644 --- a/src/vppinfra/vector_neon.h +++ b/src/vppinfra/vector_neon.h @@ -17,53 +17,10 @@ #define included_vector_neon_h #include <arm_neon.h> -/* Splats. */ - -#define u8x16_splat(i) vdupq_n_u8(i) -#define u16x8_splat(i) vdupq_n_u16(i) -#define i16x8_splat(i) vdupq_n_s16(i) -#define u32x4_splat(i) vdupq_n_u32(i) -#define i32x4_splat(i) vdupq_n_s32(i) - /* Arithmetic */ -#define u16x8_add(a,b) vaddq_u16(a,b) -#define i16x8_add(a,b) vaddq_s16(a,b) #define u16x8_sub_saturate(a,b) vsubq_u16(a,b) #define i16x8_sub_saturate(a,b) vsubq_s16(a,b) - -/* Compare operations. */ -#define u8x16_is_equal(a,b) vceqq_u8(a,b) -#define i8x16_is_equal(a,b) vceqq_s8(a,b) -#define u16x8_is_equal(a,b) vceqq_u16(a,b) -#define i16x8_is_equal(a,b) vceqq_i16(a,b) -#define u32x4_is_equal(a,b) vceqq_u32(a,b) -#define i32x4_is_equal(a,b) vceqq_s32(a,b) -#define i8x16_is_greater(a,b) vcgtq_s8(a,b) -#define i16x8_is_greater(a,b) vcgtq_u8(a,b) -#define i32x4_is_greater(a,b) vcgtq_s32(a,b) - -always_inline u8x16 -u8x16_is_zero (u8x16 x) -{ - u8x16 zero = { 0 }; - return u8x16_is_equal (x, zero); -} - -always_inline u16x8 -u16x8_is_zero (u16x8 x) -{ - u16x8 zero = { 0 }; - return u16x8_is_equal (x, zero); -} - -always_inline u32x4 -u32x4_is_zero (u32x4 x) -{ - u32x4 zero = { 0 }; - return u32x4_is_equal (x, zero); -} - /* Converts all ones/zeros compare mask to bitmap. */ always_inline u32 u8x16_compare_byte_mask (u8x16 x) diff --git a/src/vppinfra/vector_sse42.h b/src/vppinfra/vector_sse42.h index ce1bb81c93f..dab22deff7c 100644 --- a/src/vppinfra/vector_sse42.h +++ b/src/vppinfra/vector_sse42.h @@ -171,64 +171,6 @@ i32x2_pack (i32x2 lo, i32x2 hi) return (i16x4) _m_packssdw ((__m64) lo, (__m64) hi); } -/* Splats: replicate scalar value into vector. */ -always_inline u64x2 -u64x2_splat (u64 a) -{ - u64x2 x = { a, a }; - return x; -} - -always_inline u32x4 -u32x4_splat (u32 a) -{ - u32x4 x = { a, a, a, a }; - return x; -} - -always_inline u16x8 -u16x8_splat (u16 a) -{ - u16x8 x = { a, a, a, a, a, a, a, a }; - return x; -} - -always_inline u8x16 -u8x16_splat (u8 a) -{ - u8x16 x = { a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a }; - return x; -} - -always_inline u32x2 -u32x2_splat (u32 a) -{ - u32x2 x = { a, a }; - return x; -} - -always_inline u16x4 -u16x4_splat (u16 a) -{ - u16x4 x = { a, a, a, a }; - return x; -} - -always_inline u8x8 -u8x8_splat (u8 a) -{ - u8x8 x = { a, a, a, a, a, a, a, a }; - return x; -} - -#define i64x2_splat u64x2_splat -#define i32x4_splat u32x4_splat -#define i16x8_splat u16x8_splat -#define i8x16_splat u8x16_splat -#define i32x2_splat u32x2_splat -#define i16x4_splat u16x4_splat -#define i8x8_splat u8x8_splat - #ifndef __ICC always_inline u64x2 u64x2_read_lo (u64x2 x, u64 * a) @@ -275,14 +217,6 @@ _(u8x16) _(u16x8) _(u32x4) _(u64x2) _(i8x16) _(i16x8) _(i32x4) _(i64x2) always_inline i##n##x##m \ i##n##x##m##_##f (i##n##x##m x, i##n##x##m y) \ { return (i##n##x##m) _mm_##g##n ((__m128i) x, (__m128i) y); } -/* Addition/subtraction. */ - _signed_binop (8, 16, add, add_epi) -_signed_binop (16, 8, add, add_epi) -_signed_binop (32, 4, add, add_epi) -_signed_binop (64, 2, add, add_epi) -_signed_binop (8, 16, sub, sub_epi) -_signed_binop (16, 8, sub, sub_epi) -_signed_binop (32, 4, sub, sub_epi) _signed_binop (64, 2, sub, sub_epi) /* Addition/subtraction with saturation. */ _signed_binop (8, 16, add_saturate, adds_epu) _signed_binop (16, 8, add_saturate, adds_epu) @@ -469,83 +403,6 @@ _(u64, 2, right, left); #undef _ #endif -/* Compare operations. */ -always_inline u8x16 -u8x16_is_equal (u8x16 x, u8x16 y) -{ - return (u8x16) _mm_cmpeq_epi8 ((__m128i) x, (__m128i) y); -} - -always_inline i8x16 -i8x16_is_equal (i8x16 x, i8x16 y) -{ - return (i8x16) _mm_cmpeq_epi8 ((__m128i) x, (__m128i) y); -} - -always_inline u16x8 -u16x8_is_equal (u16x8 x, u16x8 y) -{ - return (u16x8) _mm_cmpeq_epi16 ((__m128i) x, (__m128i) y); -} - -always_inline i16x8 -i16x8_is_equal (i16x8 x, i16x8 y) -{ - return (i16x8) _mm_cmpeq_epi16 ((__m128i) x, (__m128i) y); -} - -always_inline u32x4 -u32x4_is_equal (u32x4 x, u32x4 y) -{ - return (u32x4) _mm_cmpeq_epi32 ((__m128i) x, (__m128i) y); -} - -always_inline i32x4 -i32x4_is_equal (i32x4 x, i32x4 y) -{ - return (i32x4) _mm_cmpeq_epi32 ((__m128i) x, (__m128i) y); -} - -always_inline u8x16 -i8x16_is_greater (i8x16 x, i8x16 y) -{ - return (u8x16) _mm_cmpgt_epi8 ((__m128i) x, (__m128i) y); -} - -always_inline u16x8 -i16x8_is_greater (i16x8 x, i16x8 y) -{ - return (u16x8) _mm_cmpgt_epi16 ((__m128i) x, (__m128i) y); -} - -always_inline u32x4 -i32x4_is_greater (i32x4 x, i32x4 y) -{ - return (u32x4) _mm_cmpgt_epi32 ((__m128i) x, (__m128i) y); -} - -always_inline u8x16 -u8x16_is_zero (u8x16 x) -{ - u8x16 zero = { 0 }; - return u8x16_is_equal (x, zero); -} - -always_inline u16x8 -u16x8_is_zero (u16x8 x) -{ - u16x8 zero = { 0 }; - return u16x8_is_equal (x, zero); -} - -always_inline u32x4 -u32x4_is_zero (u32x4 x) -{ - u32x4 zero = { 0 }; - return u32x4_is_equal (x, zero); -} - - always_inline int u8x16_is_all_zero (u8x16 x) { |