aboutsummaryrefslogtreecommitdiffstats
path: root/vnet/vnet/devices/dpdk/node.c
diff options
context:
space:
mode:
authorDamjan Marion <damarion@cisco.com>2016-05-11 23:07:18 +0200
committerDamjan Marion <damarion@cisco.com>2016-05-19 18:14:38 +0200
commit1c80e831b728ab378949714d5059a0b5b1822a0a (patch)
treee4f3ecfee143f7dee0e9905570d41ca3ee345c83 /vnet/vnet/devices/dpdk/node.c
parent82e29c455833b5b12e04c89d2dec1106b499e6b0 (diff)
Add support for multiple microarchitectures in single binary
* compiler -march= parameter is changed from native to corei7 so code is always genereted with instructions which are available on the Nehalem microarchitecture (up to SSE4.2) * compiler -mtune= parameter is added so code is optimized for corei7-avx which equals to Sandy Bridge microarchitecture * set of macros is added which allows run-time detection of available cpu instructions (e.g. clib_cpu_supports_avx()) * set of macros is added which allows us to clone graph node funcitons where cloned function is optmized for different microarchitecture Those macros are using following attributes: __attribute__((flatten)) __attribute__((target("arch=core-avx2))) I.e. If applied to foo_node_fn() macro will generate cloned functions foo_node_fn_avx2() and foo_node_fn_avx512() (future) It will also generate function void * foo_node_fn_multiarch_select() which detects available instruction set and returns pointer to the best matching function clone. Change-Id: I2dce0ac92a5ede95fcb56f47f3d1f3c4c040bac0 Signed-off-by: Damjan Marion <damarion@cisco.com>
Diffstat (limited to 'vnet/vnet/devices/dpdk/node.c')
-rw-r--r--vnet/vnet/devices/dpdk/node.c50
1 files changed, 45 insertions, 5 deletions
diff --git a/vnet/vnet/devices/dpdk/node.c b/vnet/vnet/devices/dpdk/node.c
index a7590a5c566..ca94511b411 100644
--- a/vnet/vnet/devices/dpdk/node.c
+++ b/vnet/vnet/devices/dpdk/node.c
@@ -251,6 +251,8 @@ VLIB_REGISTER_NODE (handoff_dispatch_node) = {
},
};
+VLIB_NODE_FUNCTION_MULTIARCH (handoff_dispatch_node, handoff_dispatch_node_fn)
+
clib_error_t *handoff_dispatch_init (vlib_main_t *vm)
{
handoff_dispatch_main_t * mp = &handoff_dispatch_main;
@@ -488,7 +490,8 @@ static inline u32 dpdk_device_input ( dpdk_main_t * dm,
dpdk_device_t * xd,
vlib_node_runtime_t * node,
u32 cpu_index,
- u16 queue_id)
+ u16 queue_id,
+ int use_efd)
{
u32 n_buffers;
u32 next_index = DPDK_RX_NEXT_ETHERNET_INPUT;
@@ -510,7 +513,7 @@ static inline u32 dpdk_device_input ( dpdk_main_t * dm,
if (n_buffers == 0)
{
/* check if EFD (dpdk) is enabled */
- if (PREDICT_FALSE(dm->efd.enabled))
+ if (PREDICT_FALSE(use_efd && dm->efd.enabled))
{
/* reset a few stats */
xd->efd_agent.last_poll_time = 0;
@@ -546,7 +549,7 @@ static inline u32 dpdk_device_input ( dpdk_main_t * dm,
/* Check for congestion if EFD (Early-Fast-Discard) is enabled
* in any mode (e.g. dpdk, monitor, or drop_all)
*/
- if (PREDICT_FALSE(dm->efd.enabled))
+ if (PREDICT_FALSE(use_efd && dm->efd.enabled))
{
/* update EFD counters */
dpdk_efd_update_counters(xd, n_buffers, dm->efd.enabled);
@@ -793,7 +796,7 @@ dpdk_input (vlib_main_t * vm,
{
xd = vec_elt_at_index(dm->devices, dq->device);
ASSERT(dq->queue_id == 0);
- n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, 0);
+ n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, 0, 0);
}
VIRL_SPEED_LIMIT()
@@ -818,7 +821,7 @@ dpdk_input_rss (vlib_main_t * vm,
vec_foreach (dq, dm->devices_by_cpu[cpu_index])
{
xd = vec_elt_at_index(dm->devices, dq->device);
- n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, dq->queue_id);
+ n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, dq->queue_id, 0);
}
VIRL_SPEED_LIMIT()
@@ -826,6 +829,32 @@ dpdk_input_rss (vlib_main_t * vm,
return n_rx_packets;
}
+uword
+dpdk_input_efd (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * f)
+{
+ dpdk_main_t * dm = &dpdk_main;
+ dpdk_device_t * xd;
+ uword n_rx_packets = 0;
+ dpdk_device_and_queue_t * dq;
+ u32 cpu_index = os_get_cpu_number();
+
+ /*
+ * Poll all devices on this cpu for input/interrupts.
+ */
+ vec_foreach (dq, dm->devices_by_cpu[cpu_index])
+ {
+ xd = vec_elt_at_index(dm->devices, dq->device);
+ n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, dq->queue_id, 1);
+ }
+
+ VIRL_SPEED_LIMIT()
+
+ return n_rx_packets;
+}
+
+
VLIB_REGISTER_NODE (dpdk_input_node) = {
.function = dpdk_input,
.type = VLIB_NODE_TYPE_INPUT,
@@ -850,6 +879,17 @@ VLIB_REGISTER_NODE (dpdk_input_node) = {
},
};
+
+/* handle dpdk_input_rss alternative function */
+VLIB_NODE_FUNCTION_MULTIARCH_CLONE(dpdk_input)
+VLIB_NODE_FUNCTION_MULTIARCH_CLONE(dpdk_input_rss)
+VLIB_NODE_FUNCTION_MULTIARCH_CLONE(dpdk_input_efd)
+
+/* this macro defines dpdk_input_rss_multiarch_select() */
+CLIB_MULTIARCH_SELECT_FN(dpdk_input);
+CLIB_MULTIARCH_SELECT_FN(dpdk_input_rss);
+CLIB_MULTIARCH_SELECT_FN(dpdk_input_efd);
+
/*
* Override the next nodes for the dpdk input nodes.
* Must be invoked prior to VLIB_INIT_FUNCTION calls.