summaryrefslogtreecommitdiffstats
path: root/src/vppinfra
diff options
context:
space:
mode:
authorDamjan Marion <damarion@cisco.com>2017-11-10 21:55:45 +0100
committerDave Barach <openvpp@barachs.net>2017-11-13 16:14:50 +0000
commit04f3db3847d242857b9d9d858bcdca538a1be7d7 (patch)
treee0304ad2a3986698d62e9a164f5a0a28565cef01 /src/vppinfra
parent2d8bf304230102a6d9b312b98315418617798175 (diff)
dpdk: introduce AVX512 variants of node functions
Change-Id: If581feca0d51d0420c971801aecdf9250c671b36 Signed-off-by: Damjan Marion <damarion@cisco.com>
Diffstat (limited to 'src/vppinfra')
-rw-r--r--src/vppinfra/clib.h5
-rw-r--r--src/vppinfra/cpu.h7
-rw-r--r--src/vppinfra/string.h65
3 files changed, 77 insertions, 0 deletions
diff --git a/src/vppinfra/clib.h b/src/vppinfra/clib.h
index fbb2a21c6b9..33db3b203f9 100644
--- a/src/vppinfra/clib.h
+++ b/src/vppinfra/clib.h
@@ -78,6 +78,11 @@
#define CLIB_PACKED(x) x __attribute__ ((packed))
#define CLIB_UNUSED(x) x __attribute__ ((unused))
+#define __clib_unused __attribute__ ((unused))
+#define __clib_weak __attribute__ ((weak))
+#define __clib_packed __attribute__ ((packed))
+#define __clib_constructor __attribute__ ((constructor))
+
#define never_inline __attribute__ ((__noinline__))
#if CLIB_DEBUG > 0
diff --git a/src/vppinfra/cpu.h b/src/vppinfra/cpu.h
index 9c149f3fa2a..75b01e606f3 100644
--- a/src/vppinfra/cpu.h
+++ b/src/vppinfra/cpu.h
@@ -51,6 +51,13 @@
return & fn; \
}
+#ifdef CLIB_MULTIARCH_VARIANT
+#define __CLIB_MULTIARCH_FN(a,b) a##_##b
+#define _CLIB_MULTIARCH_FN(a,b) __CLIB_MULTIARCH_FN(a,b)
+#define CLIB_MULTIARCH_FN(fn) _CLIB_MULTIARCH_FN(fn,CLIB_MULTIARCH_VARIANT)
+#else
+#define CLIB_MULTIARCH_FN(fn) fn
+#endif
#define foreach_x86_64_flags \
_ (sse3, 1, ecx, 0) \
diff --git a/src/vppinfra/string.h b/src/vppinfra/string.h
index 69a99a3f0ce..914f6a7bbc4 100644
--- a/src/vppinfra/string.h
+++ b/src/vppinfra/string.h
@@ -72,6 +72,71 @@ void clib_memswap (void *_a, void *_b, uword bytes);
#define clib_memcpy(a,b,c) memcpy(a,b,c)
#endif
+/*
+ * Copy 64 bytes of data to 4 destinations
+ * this function is typically used in quad-loop case when whole cacheline
+ * needs to be copied to 4 different places. First it reads whole cacheline
+ * to 1/2/4 SIMD registers and then it writes data to 4 destinations.
+ */
+
+static_always_inline void
+clib_memcpy64_x4 (void *d0, void *d1, void *d2, void *d3, void *s)
+{
+#if defined (CLIB_HAVE_VEC512)
+ u8x64 __attribute__ ((aligned (1))) r0 = *(((u8x64 *) s) + 0);
+
+ *(((u8x64 *) d0) + 0) = r0;
+ *(((u8x64 *) d1) + 0) = r0;
+ *(((u8x64 *) d2) + 0) = r0;
+ *(((u8x64 *) d3) + 0) = r0;
+#elif defined (CLIB_HAVE_VEC256)
+ u8x32 __attribute__ ((aligned (1))) r0 = *(((u8x32 *) s) + 0);
+ u8x32 __attribute__ ((aligned (1))) r1 = *(((u8x32 *) s) + 1);
+
+ *(((u8x32 *) d0) + 0) = r0;
+ *(((u8x32 *) d0) + 1) = r1;
+
+ *(((u8x32 *) d1) + 0) = r0;
+ *(((u8x32 *) d1) + 1) = r1;
+
+ *(((u8x32 *) d2) + 0) = r0;
+ *(((u8x32 *) d2) + 1) = r1;
+
+ *(((u8x32 *) d3) + 0) = r0;
+ *(((u8x32 *) d3) + 1) = r1;
+#elif defined (CLIB_HAVE_VEC128)
+ u8x16 __attribute__ ((aligned (1))) r0 = *(((u8x16 *) s) + 0);
+ u8x16 __attribute__ ((aligned (1))) r1 = *(((u8x16 *) s) + 1);
+ u8x16 __attribute__ ((aligned (1))) r2 = *(((u8x16 *) s) + 3);
+ u8x16 __attribute__ ((aligned (1))) r3 = *(((u8x16 *) s) + 4);
+
+ *(((u8x16 *) d0) + 0) = r0;
+ *(((u8x16 *) d0) + 1) = r1;
+ *(((u8x16 *) d0) + 2) = r2;
+ *(((u8x16 *) d0) + 3) = r3;
+
+ *(((u8x16 *) d1) + 0) = r0;
+ *(((u8x16 *) d1) + 1) = r1;
+ *(((u8x16 *) d1) + 2) = r2;
+ *(((u8x16 *) d1) + 3) = r3;
+
+ *(((u8x16 *) d2) + 0) = r0;
+ *(((u8x16 *) d2) + 1) = r1;
+ *(((u8x16 *) d2) + 2) = r2;
+ *(((u8x16 *) d2) + 3) = r3;
+
+ *(((u8x16 *) d3) + 0) = r0;
+ *(((u8x16 *) d3) + 1) = r1;
+ *(((u8x16 *) d3) + 2) = r2;
+ *(((u8x16 *) d3) + 3) = r3;
+#else
+ clib_memcpy (d0, s, 64);
+ clib_memcpy (d1, s, 64);
+ clib_memcpy (d2, s, 64);
+ clib_memcpy (d3, s, 64);
+#endif
+}
+
#endif /* included_clib_string_h */
/*