aboutsummaryrefslogtreecommitdiffstats
path: root/vppinfra
diff options
context:
space:
mode:
authorDamjan Marion <damarion@cisco.com>2016-05-11 23:07:18 +0200
committerDamjan Marion <damarion@cisco.com>2016-05-19 18:14:38 +0200
commit1c80e831b728ab378949714d5059a0b5b1822a0a (patch)
treee4f3ecfee143f7dee0e9905570d41ca3ee345c83 /vppinfra
parent82e29c455833b5b12e04c89d2dec1106b499e6b0 (diff)
Add support for multiple microarchitectures in single binary
* compiler -march= parameter is changed from native to corei7 so code is always genereted with instructions which are available on the Nehalem microarchitecture (up to SSE4.2) * compiler -mtune= parameter is added so code is optimized for corei7-avx which equals to Sandy Bridge microarchitecture * set of macros is added which allows run-time detection of available cpu instructions (e.g. clib_cpu_supports_avx()) * set of macros is added which allows us to clone graph node funcitons where cloned function is optmized for different microarchitecture Those macros are using following attributes: __attribute__((flatten)) __attribute__((target("arch=core-avx2))) I.e. If applied to foo_node_fn() macro will generate cloned functions foo_node_fn_avx2() and foo_node_fn_avx512() (future) It will also generate function void * foo_node_fn_multiarch_select() which detects available instruction set and returns pointer to the best matching function clone. Change-Id: I2dce0ac92a5ede95fcb56f47f3d1f3c4c040bac0 Signed-off-by: Damjan Marion <damarion@cisco.com>
Diffstat (limited to 'vppinfra')
-rw-r--r--vppinfra/vppinfra/cpu.c23
-rw-r--r--vppinfra/vppinfra/cpu.h78
2 files changed, 95 insertions, 6 deletions
diff --git a/vppinfra/vppinfra/cpu.c b/vppinfra/vppinfra/cpu.c
index f2dbaf1f7d8..9008ee3dd89 100644
--- a/vppinfra/vppinfra/cpu.c
+++ b/vppinfra/vppinfra/cpu.c
@@ -16,10 +16,6 @@
#include <vppinfra/format.h>
#include <vppinfra/cpu.h>
-#if __x86_64__
-#include <cpuid.h>
-#endif
-
#define foreach_x86_cpu_uarch \
_(0x06, 0x4f, "Broadwell", "Broadwell-EP/EX") \
_(0x06, 0x3d, "Broadwell", "Broadwell") \
@@ -108,4 +104,21 @@ format_cpu_model_name (u8 * s, va_list * args)
#else /* ! __x86_64__ */
return format (s, "unknown");
#endif
-} \ No newline at end of file
+}
+
+u8 *
+format_cpu_flags (u8 * s, va_list * args)
+{
+#if __x86_64__
+#define _(flag, func, reg, bit) \
+ if (clib_cpu_supports_ ## flag()) \
+ s = format (s, #flag " ");
+ foreach_x86_64_flags
+ return s;
+#undef _
+#else /* ! __x86_64__ */
+ return format (s, "unknown");
+#endif
+}
+
+
diff --git a/vppinfra/vppinfra/cpu.h b/vppinfra/vppinfra/cpu.h
index 79cdf74f84b..961af709a63 100644
--- a/vppinfra/vppinfra/cpu.h
+++ b/vppinfra/vppinfra/cpu.h
@@ -16,7 +16,83 @@
#ifndef included_clib_cpu_h
#define included_clib_cpu_h
+#include <vppinfra/format.h>
+
+/*
+ * multiarchitecture support. Adding new entry will produce
+ * new graph node function variant optimized for specific cpu
+ * microarchitecture.
+ * Order is important for runtime selection, as 1st match wins...
+ */
+
+#if __x86_64__ && CLIB_DEBUG == 0
+#define foreach_march_variant(macro, x) \
+ macro(avx2, x, "arch=core-avx2")
+#else
+#define foreach_march_variant(macro, x)
+#endif
+
+
+#if __GNUC__ > 4 && !__clang__
+#define CLIB_CPU_OPTIMIZED __attribute__ ((optimize ("tree-vectorize")))
+#else
+#define CLIB_CPU_OPTIMIZED
+#endif
+
+
+#define CLIB_MULTIARCH_ARCH_CHECK(arch, fn, tgt) \
+ if (clib_cpu_supports_ ## arch()) \
+ return & fn ## _ ##arch;
+
+#define CLIB_MULTIARCH_SELECT_FN(fn,...) \
+ __VA_ARGS__ void * fn ## _multiarch_select(void) \
+{ \
+ foreach_march_variant(CLIB_MULTIARCH_ARCH_CHECK, fn) \
+ return & fn; \
+}
+
+#if __x86_64__
+#include "cpuid.h"
+
+#define foreach_x86_64_flags \
+_ (sse3, 1, ecx, 0) \
+_ (ssse3, 1, ecx, 9) \
+_ (sse41, 1, ecx, 19) \
+_ (sse42, 1, ecx, 20) \
+_ (avx, 1, ecx, 28) \
+_ (avx2, 7, ebx, 5) \
+_ (avx512f, 7, ebx, 16) \
+_ (aes, 1, ecx, 25) \
+_ (sha, 7, ebx, 29)
+
+static inline int
+clib_get_cpuid(const u32 lev, u32 * eax, u32 *ebx, u32 * ecx, u32 * edx)
+{
+ if ((u32) __get_cpuid_max (0x80000000 & lev, 0) < lev)
+ return 0;
+ if (lev == 7)
+ __cpuid_count(lev, 0, *eax, *ebx, *ecx, *edx);
+ else
+ __cpuid(lev, *eax, *ebx, *ecx, *edx);
+ return 1;
+}
+
+
+#define _(flag, func, reg, bit) \
+static inline int \
+clib_cpu_supports_ ## flag() \
+{ \
+ u32 __attribute__((unused)) eax, ebx = 0, ecx = 0, edx = 0; \
+ clib_get_cpuid (func, &eax, &ebx, &ecx, &edx); \
+ \
+ return ((reg & (1 << bit)) != 0); \
+}
+ foreach_x86_64_flags
+#undef _
+#endif
+
format_function_t format_cpu_uarch;
format_function_t format_cpu_model_name;
+format_function_t format_cpu_flags;
-#endif \ No newline at end of file
+#endif