aboutsummaryrefslogtreecommitdiffstats
path: root/src/vppinfra/memcpy_sse3.h
diff options
context:
space:
mode:
authorDamjan Marion <damarion@cisco.com>2017-12-14 09:30:11 +0100
committerDave Barach <openvpp@barachs.net>2017-12-14 13:37:30 +0000
commitfad3fb362cc39fdfcb83e2f8a04e5a7b5a4403ca (patch)
tree5cf092bf2cff466983ddadfa1e516e3a957f5999 /src/vppinfra/memcpy_sse3.h
parent73e7f427e8865b0af71740c5ecfa55c7ee78dbd1 (diff)
vppinfra: add AVX512 variant of clib_memcpy
Taken from DPDK, also AVX2 variant updated to be in sync with DPDK version. Change-Id: I8a42e4141a5a1a8cfbee328b07bd0c9b38a9eb05 Signed-off-by: Damjan Marion <damarion@cisco.com>
Diffstat (limited to 'src/vppinfra/memcpy_sse3.h')
-rw-r--r--src/vppinfra/memcpy_sse3.h62
1 files changed, 33 insertions, 29 deletions
diff --git a/src/vppinfra/memcpy_sse3.h b/src/vppinfra/memcpy_sse3.h
index f61396c8922..5e4bf7d79c5 100644
--- a/src/vppinfra/memcpy_sse3.h
+++ b/src/vppinfra/memcpy_sse3.h
@@ -222,9 +222,9 @@ clib_memcpy (void *dst, const void *src, size_t n)
return ret;
}
- /**
- * Fast way when copy size doesn't exceed 512 bytes
- */
+ /**
+ * Fast way when copy size doesn't exceed 512 bytes
+ */
if (n <= 32)
{
clib_mov16 ((u8 *) dst, (const u8 *) src);
@@ -295,27 +295,31 @@ clib_memcpy (void *dst, const void *src, size_t n)
return ret;
}
- /**
- * Make store aligned when copy size exceeds 512 bytes,
- * and make sure the first 15 bytes are copied, because
- * unaligned copy functions require up to 15 bytes
- * backwards access.
- */
- dstofss = 16 - ((uword) dst & 0x0F) + 16;
- n -= dstofss;
- clib_mov32 ((u8 *) dst, (const u8 *) src);
- src = (const u8 *) src + dstofss;
- dst = (u8 *) dst + dstofss;
+ /**
+ * Make store aligned when copy size exceeds 512 bytes,
+ * and make sure the first 15 bytes are copied, because
+ * unaligned copy functions require up to 15 bytes
+ * backwards access.
+ */
+ dstofss = (uword) dst & 0x0F;
+ if (dstofss > 0)
+ {
+ dstofss = 16 - dstofss + 16;
+ n -= dstofss;
+ clib_mov32 ((u8 *) dst, (const u8 *) src);
+ src = (const u8 *) src + dstofss;
+ dst = (u8 *) dst + dstofss;
+ }
srcofs = ((uword) src & 0x0F);
- /**
- * For aligned copy
- */
+ /**
+ * For aligned copy
+ */
if (srcofs == 0)
{
- /**
- * Copy 256-byte blocks
- */
+ /**
+ * Copy 256-byte blocks
+ */
for (; n >= 256; n -= 256)
{
clib_mov256 ((u8 *) dst, (const u8 *) src);
@@ -323,20 +327,20 @@ clib_memcpy (void *dst, const void *src, size_t n)
src = (const u8 *) src + 256;
}
- /**
- * Copy whatever left
- */
+ /**
+ * Copy whatever left
+ */
goto COPY_BLOCK_255_BACK15;
}
- /**
- * For copy with unaligned load
- */
+ /**
+ * For copy with unaligned load
+ */
CLIB_MVUNALIGN_LEFT47 (dst, src, n, srcofs);
- /**
- * Copy whatever left
- */
+ /**
+ * Copy whatever left
+ */
goto COPY_BLOCK_64_BACK15;
}