aboutsummaryrefslogtreecommitdiffstats
path: root/src/vppinfra/string.h
diff options
context:
space:
mode:
authorDamjan Marion <damarion@cisco.com>2019-01-18 19:56:09 +0100
committerFlorin Coras <florin.coras@gmail.com>2019-01-18 22:00:27 +0000
commite58041f242bf4bd120ecc9619b88348d80b94c17 (patch)
treef163418c56ef3e76074685900b00b044cc1b49bd /src/vppinfra/string.h
parent865872ebdb2bbaf3f157e2a9889405b84114d2eb (diff)
deprecate clib_memcpy64_x4
Storing buffer in local template seems to be better option.... Change-Id: I1a2fdd68cb956f99a5b36d2cd810fc623e089bcf Signed-off-by: Damjan Marion <damarion@cisco.com>
Diffstat (limited to 'src/vppinfra/string.h')
-rw-r--r--src/vppinfra/string.h68
1 files changed, 0 insertions, 68 deletions
diff --git a/src/vppinfra/string.h b/src/vppinfra/string.h
index 42f7890f3d0..d9cd8fe1af9 100644
--- a/src/vppinfra/string.h
+++ b/src/vppinfra/string.h
@@ -213,74 +213,6 @@ memset_s_inline (void *s, rsize_t smax, int c, rsize_t n)
*/
#define clib_memset(s,c,n) memset_s_inline(s,n,c,n)
-/*
- * Copy 64 bytes of data to 4 destinations
- * this function is typically used in quad-loop case when whole cacheline
- * needs to be copied to 4 different places. First it reads whole cacheline
- * to 1/2/4 SIMD registers and then it writes data to 4 destinations.
- */
-
-static_always_inline void
-clib_memcpy64_x4 (void *d0, void *d1, void *d2, void *d3, void *s)
-{
-#if defined (__AVX512F__)
- __m512i r0 = _mm512_loadu_si512 (s);
-
- _mm512_storeu_si512 (d0, r0);
- _mm512_storeu_si512 (d1, r0);
- _mm512_storeu_si512 (d2, r0);
- _mm512_storeu_si512 (d3, r0);
-
-#elif defined (__AVX2__)
- __m256i r0 = _mm256_loadu_si256 ((__m256i *) (s + 0 * 32));
- __m256i r1 = _mm256_loadu_si256 ((__m256i *) (s + 1 * 32));
-
- _mm256_storeu_si256 ((__m256i *) (d0 + 0 * 32), r0);
- _mm256_storeu_si256 ((__m256i *) (d0 + 1 * 32), r1);
-
- _mm256_storeu_si256 ((__m256i *) (d1 + 0 * 32), r0);
- _mm256_storeu_si256 ((__m256i *) (d1 + 1 * 32), r1);
-
- _mm256_storeu_si256 ((__m256i *) (d2 + 0 * 32), r0);
- _mm256_storeu_si256 ((__m256i *) (d2 + 1 * 32), r1);
-
- _mm256_storeu_si256 ((__m256i *) (d3 + 0 * 32), r0);
- _mm256_storeu_si256 ((__m256i *) (d3 + 1 * 32), r1);
-
-#elif defined (__SSSE3__)
- __m128i r0 = _mm_loadu_si128 ((__m128i *) (s + 0 * 16));
- __m128i r1 = _mm_loadu_si128 ((__m128i *) (s + 1 * 16));
- __m128i r2 = _mm_loadu_si128 ((__m128i *) (s + 2 * 16));
- __m128i r3 = _mm_loadu_si128 ((__m128i *) (s + 3 * 16));
-
- _mm_storeu_si128 ((__m128i *) (d0 + 0 * 16), r0);
- _mm_storeu_si128 ((__m128i *) (d0 + 1 * 16), r1);
- _mm_storeu_si128 ((__m128i *) (d0 + 2 * 16), r2);
- _mm_storeu_si128 ((__m128i *) (d0 + 3 * 16), r3);
-
- _mm_storeu_si128 ((__m128i *) (d1 + 0 * 16), r0);
- _mm_storeu_si128 ((__m128i *) (d1 + 1 * 16), r1);
- _mm_storeu_si128 ((__m128i *) (d1 + 2 * 16), r2);
- _mm_storeu_si128 ((__m128i *) (d1 + 3 * 16), r3);
-
- _mm_storeu_si128 ((__m128i *) (d2 + 0 * 16), r0);
- _mm_storeu_si128 ((__m128i *) (d2 + 1 * 16), r1);
- _mm_storeu_si128 ((__m128i *) (d2 + 2 * 16), r2);
- _mm_storeu_si128 ((__m128i *) (d2 + 3 * 16), r3);
-
- _mm_storeu_si128 ((__m128i *) (d3 + 0 * 16), r0);
- _mm_storeu_si128 ((__m128i *) (d3 + 1 * 16), r1);
- _mm_storeu_si128 ((__m128i *) (d3 + 2 * 16), r2);
- _mm_storeu_si128 ((__m128i *) (d3 + 3 * 16), r3);
-
-#else
- clib_memcpy_fast (d0, s, 64);
- clib_memcpy_fast (d1, s, 64);
- clib_memcpy_fast (d2, s, 64);
- clib_memcpy_fast (d3, s, 64);
-#endif
-}
-
static_always_inline void
clib_memset_u64 (void *p, u64 val, uword count)
{