diff options
author | Damjan Marion <damarion@cisco.com> | 2017-05-09 17:45:50 +0200 |
---|---|---|
committer | Dave Barach <openvpp@barachs.net> | 2017-05-10 14:03:08 +0000 |
commit | bde55230fd34cca68ba47ad856b8a40067bdadc6 (patch) | |
tree | c7755ad4429ce4e535098a4c4292c9f615675a73 /src/vppinfra/vector_sse2.h | |
parent | db84e579ef77476e3c73780e20243ee1799530f3 (diff) |
vppinfra: optimize and simplify splat inline functions
Example for u32x4 case:
Old code generates 2 instructions:
vmovd %edi,%xmm0
vpunpckldq %xmm0,%xmm0,%xmm0
vpunpcklqdq %xmm0,%xmm0,%xmm0
New code uses only one instruction:
mov %edi,-0x4(%rsp)
vbroadcastss -0x4(%rsp),%xmm0
Change-Id: I989dc3025f5fe3f6c880972389ded11f31e2f3f6
Signed-off-by: Damjan Marion <damarion@cisco.com>
Diffstat (limited to 'src/vppinfra/vector_sse2.h')
-rw-r--r-- | src/vppinfra/vector_sse2.h | 28 |
1 files changed, 11 insertions, 17 deletions
diff --git a/src/vppinfra/vector_sse2.h b/src/vppinfra/vector_sse2.h index f782e8fd409..6830d5c6104 100644 --- a/src/vppinfra/vector_sse2.h +++ b/src/vppinfra/vector_sse2.h @@ -175,56 +175,50 @@ i32x2_pack (i32x2 lo, i32x2 hi) always_inline u64x2 u64x2_splat (u64 a) { - u64x2 x = { a }; - x = u64x2_interleave_lo (x, x); + u64x2 x = { a, a }; return x; } always_inline u32x4 u32x4_splat (u32 a) { - u32x4 x = { a }; - x = u32x4_interleave_lo (x, x); - x = (u32x4) u64x2_interleave_lo ((u64x2) x, (u64x2) x); + u32x4 x = { a, a, a, a }; return x; } always_inline u16x8 u16x8_splat (u16 a) { - u32 t = (u32) a | ((u32) a << 16); - return (u16x8) u32x4_splat (t); + u16x8 x = { a, a, a, a, a, a, a, a }; + return x; } always_inline u8x16 u8x16_splat (u8 a) { - u32 t = (u32) a | ((u32) a << 8); - t |= t << 16; - return (u8x16) u16x8_splat (t); + u8x16 x = { a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a }; + return x; } always_inline u32x2 u32x2_splat (u32 a) { - u32x2 x = { a }; - x = u32x2_interleave_lo (x, x); + u32x2 x = { a, a }; return x; } always_inline u16x4 u16x4_splat (u16 a) { - u32 t = (u32) a | ((u32) a << 16); - return (u16x4) u32x2_splat (t); + u16x4 x = { a, a, a, a }; + return x; } always_inline u8x8 u8x8_splat (u8 a) { - u32 t = (u32) a | ((u32) a << 8); - t |= t << 16; - return (u8x8) u32x2_splat (t); + u8x8 x = { a, a, a, a, a, a, a, a }; + return x; } #define i64x2_splat u64x2_splat |