diff options
author | Steven Luong <sluong@cisco.com> | 2019-08-20 16:58:00 -0700 |
---|---|---|
committer | Damjan Marion <dmarion@me.com> | 2019-09-06 16:07:59 +0000 |
commit | a1876b84e5598fcfad1debe5abb51d152e06a66e (patch) | |
tree | 58e7d58a52b8b0beb85dc99c6071dab4a17f32e1 /src/vnet/bonding/cli.c | |
parent | ffbfe3a2d6aaf4e847a1848c29fc8ce2997ed260 (diff) |
bonding: add weight support for active-backup mode
Not all interfaces have the same characteristics within the bonding group.
For active-backup mode, we should do our best to select the slave that
performs the best as the primary slave. We already did that by preferring
the slave that is local numa. Sometimes, this is not enough. For example,
when all are local numas, the selection is arbitrary. Some slave interfaces
may have higher speed or better qos than the others. But this is hard to
infer.
One rule does not fit all. So we let the operator to optionally specify the
weight for each slave interface. Our primary slave selection rule is now
1. biggest weight
2. is local numa
3. current primary slave (to avoid churn)
4. lowest sw_if_index (for deterministic behavior)
This selection rule only applies to active-backup mode which only one slave
is used for forwarding traffic until it becomes unreachable. At that time,
the next "best" slave candidate is automatically promoted. The slaves are
sorted according to the preference rule when they are up. So there is no need
to find the next best candidate when the primary slave goes down.
Another good thing about this rule is when the down slave comes back up, it
is selected as the primary slave again unless there is indeed a "better"
slave than this down slave that were added during that period.
To set the weight for the slave interface, do this after the interface is
enslaved
set interface bond <interface-name> weight <value>
Type: feature
Signed-off-by: Steven Luong <sluong@cisco.com>
Change-Id: I59ced6d20ce1dec532e667dbe1afd1b4243e04f9
Diffstat (limited to 'src/vnet/bonding/cli.c')
-rw-r--r-- | src/vnet/bonding/cli.c | 274 |
1 files changed, 198 insertions, 76 deletions
diff --git a/src/vnet/bonding/cli.c b/src/vnet/bonding/cli.c index 4e0d30aa598..2acc670a33d 100644 --- a/src/vnet/bonding/cli.c +++ b/src/vnet/bonding/cli.c @@ -29,8 +29,6 @@ bond_disable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif) bond_if_t *bif; int i; uword p; - vnet_main_t *vnm = vnet_get_main (); - vnet_hw_interface_t *hw; u8 switching_active = 0; bif = bond_get_master_by_dev_instance (sif->bif_dev_instance); @@ -40,12 +38,10 @@ bond_disable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif) p = *vec_elt_at_index (bif->active_slaves, i); if (p == sif->sw_if_index) { - if (sif->sw_if_index == bif->sw_if_index_working) - { - switching_active = 1; - if (bif->mode == BOND_MODE_ACTIVE_BACKUP) - bif->is_local_numa = 0; - } + if ((bif->mode == BOND_MODE_ACTIVE_BACKUP) && (i == 0) && + (vec_len (bif->active_slaves) > 1)) + /* deleting the active slave for active-backup */ + switching_active = 1; vec_del1 (bif->active_slaves, i); hash_unset (bif->active_slave_by_sw_if_index, sif->sw_if_index); if (sif->lacp_enabled && bif->numa_only) @@ -64,37 +60,9 @@ bond_disable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif) } /* We get a new slave just becoming active */ - if ((bif->mode == BOND_MODE_ACTIVE_BACKUP) && switching_active) - { - if ((vec_len (bif->active_slaves) >= 1)) - { - /* scan all slaves and try to find the first slave with local numa node. */ - vec_foreach_index (i, bif->active_slaves) - { - p = *vec_elt_at_index (bif->active_slaves, i); - hw = vnet_get_sup_hw_interface (vnm, p); - if (vm->numa_node == hw->numa_node) - { - bif->sw_if_index_working = p; - bif->is_local_numa = 1; - vlib_process_signal_event (bm->vlib_main, - bond_process_node.index, - BOND_SEND_GARP_NA, - bif->hw_if_index); - break; - } - } - } - - /* No local numa node is found in the active slave set. Use the first slave */ - if ((bif->is_local_numa == 0) && (vec_len (bif->active_slaves) >= 1)) - { - p = *vec_elt_at_index (bif->active_slaves, 0); - bif->sw_if_index_working = p; - vlib_process_signal_event (bm->vlib_main, bond_process_node.index, - BOND_SEND_GARP_NA, bif->hw_if_index); - } - } + if (switching_active) + vlib_process_signal_event (bm->vlib_main, bond_process_node.index, + BOND_SEND_GARP_NA, bif->hw_if_index); clib_spinlock_unlock_if_init (&bif->lockp); if (bif->mode == BOND_MODE_LACP) @@ -102,6 +70,71 @@ bond_disable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif) [sif->sw_if_index], sif->actor.state); } +/* + * return 1 if s2 is preferred. + * return -1 if s1 is preferred. + */ +static int +bond_slave_sort (void *a1, void *a2) +{ + u32 *s1 = a1; + u32 *s2 = a2; + slave_if_t *sif1 = bond_get_slave_by_sw_if_index (*s1); + slave_if_t *sif2 = bond_get_slave_by_sw_if_index (*s2); + bond_if_t *bif; + + ASSERT (sif1); + ASSERT (sif2); + /* + * sort entries according to preference rules: + * 1. biggest weight + * 2. numa-node + * 3. current active slave (to prevent churning) + * 4. lowest sw_if_index (for deterministic behavior) + * + */ + if (sif2->weight > sif1->weight) + return 1; + if (sif2->weight < sif1->weight) + return -1; + else + { + if (sif2->is_local_numa > sif1->is_local_numa) + return 1; + if (sif2->is_local_numa < sif1->is_local_numa) + return -1; + else + { + bif = bond_get_master_by_dev_instance (sif1->bif_dev_instance); + /* Favor the current active slave to avoid churning */ + if (bif->active_slaves[0] == sif2->sw_if_index) + return 1; + if (bif->active_slaves[0] == sif1->sw_if_index) + return -1; + /* go for the tiebreaker as the last resort */ + if (sif1->sw_if_index > sif2->sw_if_index) + return 1; + if (sif1->sw_if_index < sif2->sw_if_index) + return -1; + else + ASSERT (0); + } + } + return 0; +} + +static void +bond_sort_slaves (bond_if_t * bif) +{ + bond_main_t *bm = &bond_main; + u32 old_active = bif->active_slaves[0]; + + vec_sort_with_function (bif->active_slaves, bond_slave_sort); + if (old_active != bif->active_slaves[0]) + vlib_process_signal_event (bm->vlib_main, bond_process_node.index, + BOND_SEND_GARP_NA, bif->hw_if_index); +} + void bond_enable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif) { @@ -109,8 +142,6 @@ bond_enable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif) bond_main_t *bm = &bond_main; vnet_main_t *vnm = vnet_get_main (); vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sif->sw_if_index); - int i; - uword p; bif = bond_get_master_by_dev_instance (sif->bif_dev_instance); clib_spinlock_lock_if_init (&bif->lockp); @@ -127,43 +158,17 @@ bond_enable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif) bif->n_numa_slaves++; } else - { - vec_add1 (bif->active_slaves, sif->sw_if_index); - } + vec_add1 (bif->active_slaves, sif->sw_if_index); - /* First slave becomes active? */ - if ((vec_len (bif->active_slaves) == 1) && - (bif->mode == BOND_MODE_ACTIVE_BACKUP)) + sif->is_local_numa = (vm->numa_node == hw->numa_node) ? 1 : 0; + if (bif->mode == BOND_MODE_ACTIVE_BACKUP) { - bif->sw_if_index_working = sif->sw_if_index; - bif->is_local_numa = (vm->numa_node == hw->numa_node) ? 1 : 0; - vlib_process_signal_event (bm->vlib_main, bond_process_node.index, - BOND_SEND_GARP_NA, bif->hw_if_index); - } - else if ((vec_len (bif->active_slaves) > 1) - && (bif->mode == BOND_MODE_ACTIVE_BACKUP) - && bif->is_local_numa == 0) - { - if (vm->numa_node == hw->numa_node) - { - vec_foreach_index (i, bif->active_slaves) - { - p = *vec_elt_at_index (bif->active_slaves, 0); - if (p == sif->sw_if_index) - break; - - vec_del1 (bif->active_slaves, 0); - hash_unset (bif->active_slave_by_sw_if_index, p); - vec_add1 (bif->active_slaves, p); - hash_set (bif->active_slave_by_sw_if_index, p, p); - } - bif->sw_if_index_working = sif->sw_if_index; - bif->is_local_numa = 1; - vlib_process_signal_event (bm->vlib_main, - bond_process_node.index, - BOND_SEND_GARP_NA, bif->hw_if_index); - - } + if (vec_len (bif->active_slaves) == 1) + /* First slave becomes active? */ + vlib_process_signal_event (bm->vlib_main, bond_process_node.index, + BOND_SEND_GARP_NA, bif->hw_if_index); + else + bond_sort_slaves (bif); } } clib_spinlock_unlock_if_init (&bif->lockp); @@ -238,6 +243,8 @@ bond_dump_slave_ifs (slave_interface_details_t ** out_slaveifs, slaveif->sw_if_index = sif->sw_if_index; slaveif->is_passive = sif->is_passive; slaveif->is_long_timeout = sif->is_long_timeout; + slaveif->is_local_numa = sif->is_local_numa; + slaveif->weight = sif->weight; } } *out_slaveifs = r_slaveifs; @@ -862,6 +869,14 @@ show_bond_details (vlib_main_t * vm) { vlib_cli_output (vm, " %U", format_vnet_sw_if_index_name, vnet_get_main (), *sw_if_index); + if (bif->mode == BOND_MODE_ACTIVE_BACKUP) + { + slave_if_t *sif = bond_get_slave_by_sw_if_index (*sw_if_index); + if (sif) + vlib_cli_output (vm, " weight: %u, is_local_numa: %u, " + "sw_if_index: %u", sif->weight, + sif->is_local_numa, sif->sw_if_index); + } } vlib_cli_output (vm, " number of slaves: %d", vec_len (bif->slaves)); vec_foreach (sw_if_index, bif->slaves) @@ -910,6 +925,113 @@ VLIB_CLI_COMMAND (show_bond_command, static) = { }; /* *INDENT-ON* */ +void +bond_set_intf_weight (vlib_main_t * vm, bond_set_intf_weight_args_t * args) +{ + slave_if_t *sif; + bond_if_t *bif; + vnet_main_t *vnm; + u32 old_weight; + + sif = bond_get_slave_by_sw_if_index (args->sw_if_index); + if (!sif) + { + args->rv = VNET_API_ERROR_INVALID_INTERFACE; + args->error = clib_error_return (0, "Interface not enslaved"); + return; + } + bif = bond_get_master_by_dev_instance (sif->bif_dev_instance); + if (!bif) + { + args->rv = VNET_API_ERROR_INVALID_INTERFACE; + args->error = clib_error_return (0, "bond interface not found"); + return; + } + if (bif->mode != BOND_MODE_ACTIVE_BACKUP) + { + args->rv = VNET_API_ERROR_INVALID_ARGUMENT; + args->error = + clib_error_return (0, "Weight valid for active-backup only"); + return; + } + + old_weight = sif->weight; + sif->weight = args->weight; + vnm = vnet_get_main (); + /* + * No need to sort the list if the affected slave is not up (not in active + * slave set), active slave count is 1, or the current slave is already the + * primary slave and new weight > old weight. + */ + if (!vnet_sw_interface_is_up (vnm, sif->sw_if_index) || + (vec_len (bif->active_slaves) == 1) || + ((bif->active_slaves[0] == sif->sw_if_index) && + (sif->weight >= old_weight))) + return; + + bond_sort_slaves (bif); +} + +static clib_error_t * +bond_set_intf_cmd (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + bond_set_intf_weight_args_t args = { 0 }; + u32 sw_if_index = (u32) ~ 0; + unformat_input_t _line_input, *line_input = &_line_input; + vnet_main_t *vnm = vnet_get_main (); + u8 weight_enter = 0; + u32 weight = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return clib_error_return (0, "Missing required arguments."); + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "sw_if_index %d", &sw_if_index)) + ; + else if (unformat (line_input, "%U", unformat_vnet_sw_interface, vnm, + &sw_if_index)) + ; + else if (unformat (line_input, "weight %u", &weight)) + weight_enter = 1; + else + { + clib_error_return (0, "unknown input `%U'", format_unformat_error, + input); + break; + } + } + + unformat_free (line_input); + if (sw_if_index == (u32) ~ 0) + { + args.rv = VNET_API_ERROR_INVALID_INTERFACE; + clib_error_return (0, "Interface name is invalid!"); + } + if (weight_enter == 0) + { + args.rv = VNET_API_ERROR_INVALID_ARGUMENT; + clib_error_return (0, "weight missing"); + } + + args.sw_if_index = sw_if_index; + args.weight = weight; + bond_set_intf_weight (vm, &args); + + return args.error; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND(set_interface_bond_cmd, static) = { + .path = "set interface bond", + .short_help = "set interface bond <interface> | sw_if_index <idx>" + " weight <value>", + .function = bond_set_intf_cmd, +}; +/* *INDENT-ON* */ + clib_error_t * bond_cli_init (vlib_main_t * vm) { |