diff options
author | Damjan Marion <damarion@cisco.com> | 2019-03-28 16:19:24 +0100 |
---|---|---|
committer | Florin Coras <florin.coras@gmail.com> | 2019-03-28 17:01:13 +0000 |
commit | 4d3aa07ba34cc571c50046ad7e0575010590334f (patch) | |
tree | 0252fc87805006ea1a60408368026d81bbbfebda | |
parent | 0e967e0998c8c395f1d91ee437137fcb478087da (diff) |
Avoid overwrite in clib_memcpy_le{32,64}
Change-Id: Id4a8b6a31fc3e88af2f075cb97c85d3f9b738d9e
Signed-off-by: Damjan Marion <damarion@cisco.com>
-rw-r--r-- | src/vppinfra/string.h | 56 |
1 files changed, 29 insertions, 27 deletions
diff --git a/src/vppinfra/string.h b/src/vppinfra/string.h index 4755a9868d6..49848e92f39 100644 --- a/src/vppinfra/string.h +++ b/src/vppinfra/string.h @@ -217,63 +217,65 @@ static_always_inline void clib_memcpy_le (u8 * dst, u8 * src, u8 len, u8 max_len) { #if defined (CLIB_HxAVE_VEC256) - u8x32 s, d; + u8x32 s0, s1, d0, d1; u8x32 mask = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 }; u8x32 lv = u8x32_splat (len); u8x32 add = u8x32_splat (32); - s = u8x32_load_unaligned (src); - d = u8x32_load_unaligned (dst); - d = u8x32_blend (d, s, u8x32_is_greater (lv, mask)); - u8x32_store_unaligned (d, dst); + s0 = u8x32_load_unaligned (src); + s1 = u8x32_load_unaligned (src + 32); + d0 = u8x32_load_unaligned (dst); + d1 = u8x32_load_unaligned (dst + 32); + + d0 = u8x32_blend (d0, s0, u8x32_is_greater (lv, mask)); + u8x32_store_unaligned (d0, dst); if (max_len <= 32) return; mask += add; - s = u8x32_load_unaligned (src + 32); - d = u8x32_load_unaligned (dst + 32); - d = u8x32_blend (d, s, u8x32_is_greater (lv, mask)); - u8x32_store_unaligned (d, dst + 32); + d1 = u8x32_blend (d1, s1, u8x32_is_greater (lv, mask)); + u8x32_store_unaligned (d1, dst + 32); #elif defined (CLIB_HAVE_VEC128) && !defined (__aarch64__) - u8x16 s, d; + u8x16 s0, s1, s2, s3, d0, d1, d2, d3; u8x16 mask = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; u8x16 lv = u8x16_splat (len); u8x16 add = u8x16_splat (16); - s = u8x16_load_unaligned (src); - d = u8x16_load_unaligned (dst); - d = u8x16_blend (d, s, u8x16_is_greater (lv, mask)); - u8x16_store_unaligned (d, dst); + s0 = u8x16_load_unaligned (src); + s1 = u8x16_load_unaligned (src + 16); + s2 = u8x16_load_unaligned (src + 32); + s3 = u8x16_load_unaligned (src + 48); + d0 = u8x16_load_unaligned (dst); + d1 = u8x16_load_unaligned (dst + 16); + d2 = u8x16_load_unaligned (dst + 32); + d3 = u8x16_load_unaligned (dst + 48); + + d0 = u8x16_blend (d0, s0, u8x16_is_greater (lv, mask)); + u8x16_store_unaligned (d0, dst); if (max_len <= 16) return; mask += add; - s = u8x16_load_unaligned (src + 16); - d = u8x16_load_unaligned (dst + 16); - d = u8x16_blend (d, s, u8x16_is_greater (lv, mask)); - u8x16_store_unaligned (d, dst + 16); + d1 = u8x16_blend (d1, s1, u8x16_is_greater (lv, mask)); + u8x16_store_unaligned (d1, dst + 16); if (max_len <= 32) return; mask += add; - s = u8x16_load_unaligned (src + 32); - d = u8x16_load_unaligned (dst + 32); - d = u8x16_blend (d, s, u8x16_is_greater (lv, mask)); - u8x16_store_unaligned (d, dst + 32); + d2 = u8x16_blend (d2, s2, u8x16_is_greater (lv, mask)); + u8x16_store_unaligned (d2, dst + 32); mask += add; - s = u8x16_load_unaligned (src + 48); - d = u8x16_load_unaligned (dst + 48); - d = u8x16_blend (d, s, u8x16_is_greater (lv, mask)); - u8x16_store_unaligned (d, dst + 48); + d3 = u8x16_blend (d3, s3, u8x16_is_greater (lv, mask)); + u8x16_store_unaligned (d3, dst + 48); #else - clib_memcpy_fast (dst, src, len); + memmove (dst, src, len); #endif } |