From c59b9a26ed9a6bc083db2868b6993add6fd2ba5b Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Tue, 19 Mar 2019 15:38:40 +0100 Subject: ipsec: esp-encrypt rework Change-Id: Ibe7f806b9d600994e83c9f1be526fdb0a1ef1833 Signed-off-by: Damjan Marion --- src/vppinfra/string.h | 76 +++++++++++++++++++++++++++++++++++++++++++++ src/vppinfra/vector_avx2.h | 13 ++++++++ src/vppinfra/vector_sse42.h | 12 +++++++ 3 files changed, 101 insertions(+) (limited to 'src/vppinfra') diff --git a/src/vppinfra/string.h b/src/vppinfra/string.h index d9cd8fe1af9..4755a9868d6 100644 --- a/src/vppinfra/string.h +++ b/src/vppinfra/string.h @@ -213,6 +213,82 @@ memset_s_inline (void *s, rsize_t smax, int c, rsize_t n) */ #define clib_memset(s,c,n) memset_s_inline(s,n,c,n) +static_always_inline void +clib_memcpy_le (u8 * dst, u8 * src, u8 len, u8 max_len) +{ +#if defined (CLIB_HxAVE_VEC256) + u8x32 s, d; + u8x32 mask = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, + 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 + }; + u8x32 lv = u8x32_splat (len); + u8x32 add = u8x32_splat (32); + + s = u8x32_load_unaligned (src); + d = u8x32_load_unaligned (dst); + d = u8x32_blend (d, s, u8x32_is_greater (lv, mask)); + u8x32_store_unaligned (d, dst); + + if (max_len <= 32) + return; + + mask += add; + s = u8x32_load_unaligned (src + 32); + d = u8x32_load_unaligned (dst + 32); + d = u8x32_blend (d, s, u8x32_is_greater (lv, mask)); + u8x32_store_unaligned (d, dst + 32); + +#elif defined (CLIB_HAVE_VEC128) && !defined (__aarch64__) + u8x16 s, d; + u8x16 mask = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; + u8x16 lv = u8x16_splat (len); + u8x16 add = u8x16_splat (16); + + s = u8x16_load_unaligned (src); + d = u8x16_load_unaligned (dst); + d = u8x16_blend (d, s, u8x16_is_greater (lv, mask)); + u8x16_store_unaligned (d, dst); + + if (max_len <= 16) + return; + + mask += add; + s = u8x16_load_unaligned (src + 16); + d = u8x16_load_unaligned (dst + 16); + d = u8x16_blend (d, s, u8x16_is_greater (lv, mask)); + u8x16_store_unaligned (d, dst + 16); + + if (max_len <= 32) + return; + + mask += add; + s = u8x16_load_unaligned (src + 32); + d = u8x16_load_unaligned (dst + 32); + d = u8x16_blend (d, s, u8x16_is_greater (lv, mask)); + u8x16_store_unaligned (d, dst + 32); + + mask += add; + s = u8x16_load_unaligned (src + 48); + d = u8x16_load_unaligned (dst + 48); + d = u8x16_blend (d, s, u8x16_is_greater (lv, mask)); + u8x16_store_unaligned (d, dst + 48); +#else + clib_memcpy_fast (dst, src, len); +#endif +} + +static_always_inline void +clib_memcpy_le64 (u8 * dst, u8 * src, u8 len) +{ + clib_memcpy_le (dst, src, len, 64); +} + +static_always_inline void +clib_memcpy_le32 (u8 * dst, u8 * src, u8 len) +{ + clib_memcpy_le (dst, src, len, 32); +} + static_always_inline void clib_memset_u64 (void *p, u64 val, uword count) { diff --git a/src/vppinfra/vector_avx2.h b/src/vppinfra/vector_avx2.h index 51625618823..b9d6549da99 100644 --- a/src/vppinfra/vector_avx2.h +++ b/src/vppinfra/vector_avx2.h @@ -247,6 +247,19 @@ u32x8_scatter_one (u32x8 r, int index, void *p) *(u32 *) p = r[index]; } +static_always_inline u8x32 +u8x32_is_greater (u8x32 v1, u8x32 v2) +{ + return (u8x32) _mm256_cmpgt_epi8 ((__m256i) v1, (__m256i) v2); +} + +static_always_inline u8x32 +u8x32_blend (u8x32 v1, u8x32 v2, u8x32 mask) +{ + return (u8x32) _mm256_blendv_epi8 ((__m256i) v1, (__m256i) v2, + (__m256i) mask); +} + #endif /* included_vector_avx2_h */ /* diff --git a/src/vppinfra/vector_sse42.h b/src/vppinfra/vector_sse42.h index 5d6a47d3915..ee5b4dcc7df 100644 --- a/src/vppinfra/vector_sse42.h +++ b/src/vppinfra/vector_sse42.h @@ -691,6 +691,18 @@ u32x4_scatter_one (u32x4 r, int index, void *p) *(u32 *) p = r[index]; } +static_always_inline u8x16 +u8x16_is_greater (u8x16 v1, u8x16 v2) +{ + return (u8x16) _mm_cmpgt_epi8 ((__m128i) v1, (__m128i) v2); +} + +static_always_inline u8x16 +u8x16_blend (u8x16 v1, u8x16 v2, u8x16 mask) +{ + return (u8x16) _mm_blendv_epi8 ((__m128i) v1, (__m128i) v2, (__m128i) mask); +} + #endif /* included_vector_sse2_h */ -- cgit 1.2.3-korg