diff options
author | Damjan Marion <damarion@cisco.com> | 2016-05-11 23:07:18 +0200 |
---|---|---|
committer | Damjan Marion <damarion@cisco.com> | 2016-05-19 18:14:38 +0200 |
commit | 1c80e831b728ab378949714d5059a0b5b1822a0a (patch) | |
tree | e4f3ecfee143f7dee0e9905570d41ca3ee345c83 /vnet/vnet/interface.h | |
parent | 82e29c455833b5b12e04c89d2dec1106b499e6b0 (diff) |
Add support for multiple microarchitectures in single binary
* compiler -march= parameter is changed from native to corei7
so code is always genereted with instructions which are available
on the Nehalem microarchitecture (up to SSE4.2)
* compiler -mtune= parameter is added so code is optimized for
corei7-avx which equals to Sandy Bridge microarchitecture
* set of macros is added which allows run-time detection of available
cpu instructions (e.g. clib_cpu_supports_avx())
* set of macros is added which allows us to clone graph node funcitons
where cloned function is optmized for different microarchitecture
Those macros are using following attributes:
__attribute__((flatten))
__attribute__((target("arch=core-avx2)))
I.e. If applied to foo_node_fn() macro will generate cloned
functions foo_node_fn_avx2() and foo_node_fn_avx512() (future)
It will also generate function void * foo_node_fn_multiarch_select()
which detects available instruction set and returns pointer to the
best matching function clone.
Change-Id: I2dce0ac92a5ede95fcb56f47f3d1f3c4c040bac0
Signed-off-by: Damjan Marion <damarion@cisco.com>
Diffstat (limited to 'vnet/vnet/interface.h')
-rw-r--r-- | vnet/vnet/interface.h | 26 |
1 files changed, 26 insertions, 0 deletions
diff --git a/vnet/vnet/interface.h b/vnet/vnet/interface.h index 2829a0ccfb3..30dcf276393 100644 --- a/vnet/vnet/interface.h +++ b/vnet/vnet/interface.h @@ -163,6 +163,32 @@ static void __vnet_add_device_class_registration_##x (void) \ } \ __VA_ARGS__ vnet_device_class_t x +#define VLIB_DEVICE_TX_FUNCTION_CLONE_TEMPLATE(arch, fn, tgt) \ + uword \ + __attribute__ ((flatten)) \ + __attribute__ ((target (tgt))) \ + CLIB_CPU_OPTIMIZED \ + fn ## _ ## arch ( vlib_main_t * vm, \ + vlib_node_runtime_t * node, \ + vlib_frame_t * frame) \ + { return fn (vm, node, frame); } + +#define VLIB_DEVICE_TX_FUNCTION_MULTIARCH_CLONE(fn) \ + foreach_march_variant(VLIB_DEVICE_TX_FUNCTION_CLONE_TEMPLATE, fn) + +#if CLIB_DEBUG > 0 +#define VLIB_MULTIARCH_CLONE_AND_SELECT_FN(fn,...) +#define VLIB_DEVICE_TX_FUNCTION_MULTIARCH(dev, fn) +#else +#define VLIB_DEVICE_TX_FUNCTION_MULTIARCH(dev, fn) \ + VLIB_DEVICE_TX_FUNCTION_MULTIARCH_CLONE(fn) \ + CLIB_MULTIARCH_SELECT_FN(fn, static inline) \ + static void __attribute__((__constructor__)) \ + __vlib_device_tx_function_multiarch_select_##dev (void) \ + { dev.tx_function = fn ## _multiarch_select(); } +#endif + + /* Layer-2 (e.g. Ethernet) interface class. */ typedef struct _vnet_hw_interface_class { /* Index into main vector. */ |