From bde55230fd34cca68ba47ad856b8a40067bdadc6 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Tue, 9 May 2017 17:45:50 +0200 Subject: vppinfra: optimize and simplify splat inline functions Example for u32x4 case: Old code generates 2 instructions: vmovd %edi,%xmm0 vpunpckldq %xmm0,%xmm0,%xmm0 vpunpcklqdq %xmm0,%xmm0,%xmm0 New code uses only one instruction: mov %edi,-0x4(%rsp) vbroadcastss -0x4(%rsp),%xmm0 Change-Id: I989dc3025f5fe3f6c880972389ded11f31e2f3f6 Signed-off-by: Damjan Marion --- src/vppinfra/vector_sse2.h | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) (limited to 'src/vppinfra/vector_sse2.h') diff --git a/src/vppinfra/vector_sse2.h b/src/vppinfra/vector_sse2.h index f782e8fd..6830d5c6 100644 --- a/src/vppinfra/vector_sse2.h +++ b/src/vppinfra/vector_sse2.h @@ -175,56 +175,50 @@ i32x2_pack (i32x2 lo, i32x2 hi) always_inline u64x2 u64x2_splat (u64 a) { - u64x2 x = { a }; - x = u64x2_interleave_lo (x, x); + u64x2 x = { a, a }; return x; } always_inline u32x4 u32x4_splat (u32 a) { - u32x4 x = { a }; - x = u32x4_interleave_lo (x, x); - x = (u32x4) u64x2_interleave_lo ((u64x2) x, (u64x2) x); + u32x4 x = { a, a, a, a }; return x; } always_inline u16x8 u16x8_splat (u16 a) { - u32 t = (u32) a | ((u32) a << 16); - return (u16x8) u32x4_splat (t); + u16x8 x = { a, a, a, a, a, a, a, a }; + return x; } always_inline u8x16 u8x16_splat (u8 a) { - u32 t = (u32) a | ((u32) a << 8); - t |= t << 16; - return (u8x16) u16x8_splat (t); + u8x16 x = { a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a }; + return x; } always_inline u32x2 u32x2_splat (u32 a) { - u32x2 x = { a }; - x = u32x2_interleave_lo (x, x); + u32x2 x = { a, a }; return x; } always_inline u16x4 u16x4_splat (u16 a) { - u32 t = (u32) a | ((u32) a << 16); - return (u16x4) u32x2_splat (t); + u16x4 x = { a, a, a, a }; + return x; } always_inline u8x8 u8x8_splat (u8 a) { - u32 t = (u32) a | ((u32) a << 8); - t |= t << 16; - return (u8x8) u32x2_splat (t); + u8x8 x = { a, a, a, a, a, a, a, a }; + return x; } #define i64x2_splat u64x2_splat -- cgit 1.2.3-korg