diff options
author | Zhiyong Yang <zhiyong.yang@intel.com> | 2019-05-15 04:25:20 -0400 |
---|---|---|
committer | John Lo <loj@cisco.com> | 2019-05-31 00:55:27 +0000 |
commit | 6865d3c2ac01d78a2f313c4fb3e74cf409328e58 (patch) | |
tree | dd836b0201e21d93a5198c818cf06c7531d643f6 | |
parent | 63d4d1d6e488d9b985000bdeed0c9c8f6854236a (diff) |
bonding: add support for numa awareness
This patch enables bonding numa awareness on multi-socket
server working in active-backeup mode.
The VPP adds capability for automatically preferring slave
with local numa node in order to reduces the load on the
QPI-bus and improve system overall performance in multi-socket
use cases. Users doesn't need to add any extra operation as
usual.
Change-Id: Iec267375fc399a9a0c0a7dca649fadb994d36671
Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
-rw-r--r-- | src/plugins/dpdk/device/init.c | 1 | ||||
-rw-r--r-- | src/vnet/bonding/cli.c | 92 | ||||
-rw-r--r-- | src/vnet/bonding/node.h | 5 | ||||
-rw-r--r-- | src/vnet/interface.h | 4 |
4 files changed, 90 insertions, 12 deletions
diff --git a/src/plugins/dpdk/device/init.c b/src/plugins/dpdk/device/init.c index fccefb17b4d..2e4c8a22743 100644 --- a/src/plugins/dpdk/device/init.c +++ b/src/plugins/dpdk/device/init.c @@ -732,6 +732,7 @@ dpdk_lib_init (dpdk_main_t * dm) { hi->max_packet_bytes = mtu; hi->max_supported_packet_bytes = max_rx_frame; + hi->numa_node = xd->cpu_socket; } if (dm->conf->no_tx_checksum_offload == 0) diff --git a/src/vnet/bonding/cli.c b/src/vnet/bonding/cli.c index bccbb2c036e..cb344c611c2 100644 --- a/src/vnet/bonding/cli.c +++ b/src/vnet/bonding/cli.c @@ -28,6 +28,8 @@ bond_disable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif) bond_if_t *bif; int i; uword p; + vnet_main_t *vnm = vnet_get_main (); + vnet_hw_interface_t *hw; u8 switching_active = 0; bif = bond_get_master_by_dev_instance (sif->bif_dev_instance); @@ -37,22 +39,53 @@ bond_disable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif) p = *vec_elt_at_index (bif->active_slaves, i); if (p == sif->sw_if_index) { - /* Are we disabling the very 1st slave? */ - if (sif->sw_if_index == *vec_elt_at_index (bif->active_slaves, 0)) - switching_active = 1; - + if (sif->sw_if_index == bif->sw_if_index_working) + { + switching_active = 1; + if (bif->mode == BOND_MODE_ACTIVE_BACKUP) + bif->is_local_numa = 0; + } vec_del1 (bif->active_slaves, i); hash_unset (bif->active_slave_by_sw_if_index, sif->sw_if_index); - - /* We got a new slave just becoming active? */ - if ((vec_len (bif->active_slaves) >= 1) && - (bif->mode == BOND_MODE_ACTIVE_BACKUP) && switching_active) - vlib_process_signal_event (bm->vlib_main, bond_process_node.index, - BOND_SEND_GARP_NA, bif->hw_if_index); break; } } + + /* We get a new slave just becoming active */ + if ((bif->mode == BOND_MODE_ACTIVE_BACKUP) && switching_active) + { + if ((vec_len (bif->active_slaves) >= 1)) + { + /* scan all slaves and try to find the first slave with local numa node. */ + vec_foreach_index (i, bif->active_slaves) + { + p = *vec_elt_at_index (bif->active_slaves, i); + hw = vnet_get_sup_hw_interface (vnm, p); + if (vm->numa_node == hw->numa_node) + { + bif->sw_if_index_working = p; + bif->is_local_numa = 1; + vlib_process_signal_event (bm->vlib_main, + bond_process_node.index, + BOND_SEND_GARP_NA, + bif->hw_if_index); + break; + } + } + } + + /* No local numa node is found in the active slave set. Use the first slave */ + if ((bif->is_local_numa == 0) && (vec_len (bif->active_slaves) >= 1)) + { + p = *vec_elt_at_index (bif->active_slaves, 0); + bif->sw_if_index_working = p; + vlib_process_signal_event (bm->vlib_main, bond_process_node.index, + BOND_SEND_GARP_NA, bif->hw_if_index); + } + } clib_spinlock_unlock_if_init (&bif->lockp); + + return; } void @@ -60,6 +93,10 @@ bond_enable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif) { bond_if_t *bif; bond_main_t *bm = &bond_main; + vnet_main_t *vnm = vnet_get_main (); + vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sif->sw_if_index); + int i; + uword p; bif = bond_get_master_by_dev_instance (sif->bif_dev_instance); clib_spinlock_lock_if_init (&bif->lockp); @@ -72,10 +109,41 @@ bond_enable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif) /* First slave becomes active? */ if ((vec_len (bif->active_slaves) == 1) && (bif->mode == BOND_MODE_ACTIVE_BACKUP)) - vlib_process_signal_event (bm->vlib_main, bond_process_node.index, - BOND_SEND_GARP_NA, bif->hw_if_index); + { + bif->sw_if_index_working = sif->sw_if_index; + bif->is_local_numa = (vm->numa_node == hw->numa_node) ? 1 : 0; + vlib_process_signal_event (bm->vlib_main, bond_process_node.index, + BOND_SEND_GARP_NA, bif->hw_if_index); + } + else if ((vec_len (bif->active_slaves) > 1) + && (bif->mode == BOND_MODE_ACTIVE_BACKUP) + && bif->is_local_numa == 0) + { + if (vm->numa_node == hw->numa_node) + { + vec_foreach_index (i, bif->active_slaves) + { + p = *vec_elt_at_index (bif->active_slaves, 0); + if (p == sif->sw_if_index) + break; + + vec_del1 (bif->active_slaves, 0); + hash_unset (bif->active_slave_by_sw_if_index, p); + vec_add1 (bif->active_slaves, p); + hash_set (bif->active_slave_by_sw_if_index, p, p); + } + bif->sw_if_index_working = sif->sw_if_index; + bif->is_local_numa = 1; + vlib_process_signal_event (bm->vlib_main, + bond_process_node.index, + BOND_SEND_GARP_NA, bif->hw_if_index); + + } + } } clib_spinlock_unlock_if_init (&bif->lockp); + + return; } int diff --git a/src/vnet/bonding/node.h b/src/vnet/bonding/node.h index 41e945a05f8..b046f989f20 100644 --- a/src/vnet/bonding/node.h +++ b/src/vnet/bonding/node.h @@ -157,6 +157,11 @@ typedef struct u8 mode; u8 lb; + /* This flag works for active-backup mode only + and marks if the working port is local numa. */ + u8 is_local_numa; + /* current working sw_if_index in active-bakeup mode. */ + u32 sw_if_index_working; /* the last slave index for the rr lb */ u32 lb_rr_last_index; diff --git a/src/vnet/interface.h b/src/vnet/interface.h index c32311c86ac..d87de60bceb 100644 --- a/src/vnet/interface.h +++ b/src/vnet/interface.h @@ -565,6 +565,10 @@ typedef struct vnet_hw_interface_t /* device input device_and_queue runtime index */ uword *dq_runtime_index_by_queue; + /* numa node that hardware device connects to */ + u8 numa_node; + + u8 padding[3]; } vnet_hw_interface_t; extern vnet_device_class_t vnet_local_interface_device_class; |