From 1c80e831b728ab378949714d5059a0b5b1822a0a Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Wed, 11 May 2016 23:07:18 +0200 Subject: Add support for multiple microarchitectures in single binary * compiler -march= parameter is changed from native to corei7 so code is always genereted with instructions which are available on the Nehalem microarchitecture (up to SSE4.2) * compiler -mtune= parameter is added so code is optimized for corei7-avx which equals to Sandy Bridge microarchitecture * set of macros is added which allows run-time detection of available cpu instructions (e.g. clib_cpu_supports_avx()) * set of macros is added which allows us to clone graph node funcitons where cloned function is optmized for different microarchitecture Those macros are using following attributes: __attribute__((flatten)) __attribute__((target("arch=core-avx2))) I.e. If applied to foo_node_fn() macro will generate cloned functions foo_node_fn_avx2() and foo_node_fn_avx512() (future) It will also generate function void * foo_node_fn_multiarch_select() which detects available instruction set and returns pointer to the best matching function clone. Change-Id: I2dce0ac92a5ede95fcb56f47f3d1f3c4c040bac0 Signed-off-by: Damjan Marion --- vlib/vlib/node.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'vlib') diff --git a/vlib/vlib/node.h b/vlib/vlib/node.h index 2caede6e..9b33a0a3 100644 --- a/vlib/vlib/node.h +++ b/vlib/vlib/node.h @@ -40,6 +40,7 @@ #ifndef included_vlib_node_h #define included_vlib_node_h +#include #include #include #include /* for vlib_trace_filter_t */ @@ -149,6 +150,32 @@ static void __vlib_add_node_registration_##x (void) \ } \ __VA_ARGS__ vlib_node_registration_t x +#if CLIB_DEBUG > 0 +#define VLIB_NODE_FUNCTION_CLONE_TEMPLATE(arch, fn) +#define VLIB_NODE_FUNCTION_MULTIARCH_CLONE(fn) +#define VLIB_NODE_FUNCTION_MULTIARCH(node, fn) +#else +#define VLIB_NODE_FUNCTION_CLONE_TEMPLATE(arch, fn, tgt) \ + uword \ + __attribute__ ((flatten)) \ + __attribute__ ((target (tgt))) \ + CLIB_CPU_OPTIMIZED \ + fn ## _ ## arch ( struct vlib_main_t * vm, \ + struct vlib_node_runtime_t * node, \ + struct vlib_frame_t * frame) \ + { return fn (vm, node, frame); } + +#define VLIB_NODE_FUNCTION_MULTIARCH_CLONE(fn) \ + foreach_march_variant(VLIB_NODE_FUNCTION_CLONE_TEMPLATE, fn) + +#define VLIB_NODE_FUNCTION_MULTIARCH(node, fn) \ + VLIB_NODE_FUNCTION_MULTIARCH_CLONE(fn) \ + CLIB_MULTIARCH_SELECT_FN(fn, static inline) \ + static void __attribute__((__constructor__)) \ + __vlib_node_function_multiarch_select_##node (void) \ + { node.function = fn ## _multiarch_select(); } +#endif + always_inline vlib_node_registration_t * vlib_node_next_registered (vlib_node_registration_t * c) { -- cgit 1.2.3-korg