summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDamjan Marion <damarion@cisco.com>2017-05-09 17:45:50 +0200
committerDave Barach <openvpp@barachs.net>2017-05-10 14:03:08 +0000
commitbde55230fd34cca68ba47ad856b8a40067bdadc6 (patch)
treec7755ad4429ce4e535098a4c4292c9f615675a73
parentdb84e579ef77476e3c73780e20243ee1799530f3 (diff)
vppinfra: optimize and simplify splat inline functions
Example for u32x4 case: Old code generates 2 instructions: vmovd %edi,%xmm0 vpunpckldq %xmm0,%xmm0,%xmm0 vpunpcklqdq %xmm0,%xmm0,%xmm0 New code uses only one instruction: mov %edi,-0x4(%rsp) vbroadcastss -0x4(%rsp),%xmm0 Change-Id: I989dc3025f5fe3f6c880972389ded11f31e2f3f6 Signed-off-by: Damjan Marion <damarion@cisco.com>
-rw-r--r--src/vppinfra/vector_sse2.h28
1 files changed, 11 insertions, 17 deletions
diff --git a/src/vppinfra/vector_sse2.h b/src/vppinfra/vector_sse2.h
index f782e8fd409..6830d5c6104 100644
--- a/src/vppinfra/vector_sse2.h
+++ b/src/vppinfra/vector_sse2.h
@@ -175,56 +175,50 @@ i32x2_pack (i32x2 lo, i32x2 hi)
always_inline u64x2
u64x2_splat (u64 a)
{
- u64x2 x = { a };
- x = u64x2_interleave_lo (x, x);
+ u64x2 x = { a, a };
return x;
}
always_inline u32x4
u32x4_splat (u32 a)
{
- u32x4 x = { a };
- x = u32x4_interleave_lo (x, x);
- x = (u32x4) u64x2_interleave_lo ((u64x2) x, (u64x2) x);
+ u32x4 x = { a, a, a, a };
return x;
}
always_inline u16x8
u16x8_splat (u16 a)
{
- u32 t = (u32) a | ((u32) a << 16);
- return (u16x8) u32x4_splat (t);
+ u16x8 x = { a, a, a, a, a, a, a, a };
+ return x;
}
always_inline u8x16
u8x16_splat (u8 a)
{
- u32 t = (u32) a | ((u32) a << 8);
- t |= t << 16;
- return (u8x16) u16x8_splat (t);
+ u8x16 x = { a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a };
+ return x;
}
always_inline u32x2
u32x2_splat (u32 a)
{
- u32x2 x = { a };
- x = u32x2_interleave_lo (x, x);
+ u32x2 x = { a, a };
return x;
}
always_inline u16x4
u16x4_splat (u16 a)
{
- u32 t = (u32) a | ((u32) a << 16);
- return (u16x4) u32x2_splat (t);
+ u16x4 x = { a, a, a, a };
+ return x;
}
always_inline u8x8
u8x8_splat (u8 a)
{
- u32 t = (u32) a | ((u32) a << 8);
- t |= t << 16;
- return (u8x8) u32x2_splat (t);
+ u8x8 x = { a, a, a, a, a, a, a, a };
+ return x;
}
#define i64x2_splat u64x2_splat