summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDamjan Marion <damarion@cisco.com>2017-07-05 18:15:08 +0200
committerDamjan Marion <damarion@cisco.com>2017-07-12 21:20:11 +0200
commit331f66a5b4efcc800e0bbaf23d6f158d54fb00d1 (patch)
treed0e2e815f0058a63edc4c294adbca6ee192dc22d
parent02989064e4c26a4940a5292ba6c47023e6dd3131 (diff)
vppinfra: revert clib_memcpy optimization
Looks like some compiler versions are producing wrong code when we are copying 9-16 bytes so reverting back to the original code. Change-Id: I74b5fa54a3b01f6288648f1cb0926030edd3b26f Signed-off-by: Damjan Marion <damarion@cisco.com>
-rw-r--r--src/vppinfra/memcpy_avx.h13
-rw-r--r--src/vppinfra/memcpy_sse3.h11
2 files changed, 14 insertions, 10 deletions
diff --git a/src/vppinfra/memcpy_avx.h b/src/vppinfra/memcpy_avx.h
index e3feb76b6b7..e987d044b58 100644
--- a/src/vppinfra/memcpy_avx.h
+++ b/src/vppinfra/memcpy_avx.h
@@ -51,19 +51,22 @@
#include <stdint.h>
#include <x86intrin.h>
-typedef u8 u8x16u __attribute__ ((vector_size (16), aligned (1)));
-typedef u8 u8x32u __attribute__ ((vector_size (32), aligned (1)));
-
static inline void
clib_mov16 (u8 * dst, const u8 * src)
{
- *(u8x16u *) dst = *(u8x16u *) src;
+ __m128i xmm0;
+
+ xmm0 = _mm_loadu_si128 ((const __m128i *) src);
+ _mm_storeu_si128 ((__m128i *) dst, xmm0);
}
static inline void
clib_mov32 (u8 * dst, const u8 * src)
{
- *(u8x32u *) dst = *(u8x32u *) src;
+ __m256i ymm0;
+
+ ymm0 = _mm256_loadu_si256 ((const __m256i *) src);
+ _mm256_storeu_si256 ((__m256i *) dst, ymm0);
}
static inline void
diff --git a/src/vppinfra/memcpy_sse3.h b/src/vppinfra/memcpy_sse3.h
index 4fc48c86c8b..f61396c8922 100644
--- a/src/vppinfra/memcpy_sse3.h
+++ b/src/vppinfra/memcpy_sse3.h
@@ -51,19 +51,20 @@
#include <stdint.h>
#include <x86intrin.h>
-typedef u8 u8x16u __attribute__ ((vector_size (16), aligned (1)));
-typedef u8 u8x32u __attribute__ ((vector_size (32), aligned (1)));
-
static inline void
clib_mov16 (u8 * dst, const u8 * src)
{
- *(u8x16u *) dst = *(u8x16u *) src;
+ __m128i xmm0;
+
+ xmm0 = _mm_loadu_si128 ((const __m128i *) src);
+ _mm_storeu_si128 ((__m128i *) dst, xmm0);
}
static inline void
clib_mov32 (u8 * dst, const u8 * src)
{
- *(u8x32u *) dst = *(u8x32u *) src;
+ clib_mov16 ((u8 *) dst + 0 * 16, (const u8 *) src + 0 * 16);
+ clib_mov16 ((u8 *) dst + 1 * 16, (const u8 *) src + 1 * 16);
}
static inline void