diff options
Diffstat (limited to 'src/vnet/bonding')
-rw-r--r-- | src/vnet/bonding/bond.api | 20 | ||||
-rw-r--r-- | src/vnet/bonding/bond_api.c | 22 | ||||
-rw-r--r-- | src/vnet/bonding/cli.c | 274 | ||||
-rw-r--r-- | src/vnet/bonding/node.h | 24 |
4 files changed, 258 insertions, 82 deletions
diff --git a/src/vnet/bonding/bond.api b/src/vnet/bonding/bond.api index e699267ccb5..5d9a0563c1a 100644 --- a/src/vnet/bonding/bond.api +++ b/src/vnet/bonding/bond.api @@ -19,7 +19,7 @@ the bonding device driver */ -option version = "1.0.1"; +option version = "1.0.2"; /** \brief Initialize a new bond interface with the given paramters @param client_index - opaque cookie to identify the sender @@ -154,6 +154,8 @@ define sw_interface_slave_dump @param interface_name - name of interface @param is_passve - interface does not initiate the lacp protocol, remote must be active speaker @param is_long_timeout - 90 seconds vs default 3 seconds neighbor timeout + @param is_local_numa - the slave interface is local numa + @param weight - the weight for the slave interface (active-backup mode only) */ define sw_interface_slave_details { @@ -162,6 +164,22 @@ define sw_interface_slave_details u8 interface_name[64]; u8 is_passive; u8 is_long_timeout; + u8 is_local_numa; + u32 weight; +}; + +/** \brief Interface set bond weight + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - slave interface for which to set the weight + @param weight - weight value to be set for the slave interface +*/ +autoreply define sw_interface_set_bond_weight +{ + u32 client_index; + u32 context; + u32 sw_if_index; + u32 weight; }; /* diff --git a/src/vnet/bonding/bond_api.c b/src/vnet/bonding/bond_api.c index 8e1842367e5..74334b52bf2 100644 --- a/src/vnet/bonding/bond_api.c +++ b/src/vnet/bonding/bond_api.c @@ -47,6 +47,7 @@ _(BOND_CREATE, bond_create) \ _(BOND_DELETE, bond_delete) \ _(BOND_ENSLAVE, bond_enslave) \ +_(SW_INTERFACE_SET_BOND_WEIGHT, sw_interface_set_bond_weight) \ _(BOND_DETACH_SLAVE, bond_detach_slave) \ _(SW_INTERFACE_BOND_DUMP, sw_interface_bond_dump)\ _(SW_INTERFACE_SLAVE_DUMP, sw_interface_slave_dump) @@ -117,6 +118,25 @@ vl_api_bond_enslave_t_handler (vl_api_bond_enslave_t * mp) } static void + vl_api_sw_interface_set_bond_weight_t_handler + (vl_api_sw_interface_set_bond_weight_t * mp) +{ + vlib_main_t *vm = vlib_get_main (); + bond_set_intf_weight_args_t _a, *ap = &_a; + vl_api_sw_interface_set_bond_weight_reply_t *rmp; + int rv = 0; + + clib_memset (ap, 0, sizeof (*ap)); + + ap->sw_if_index = ntohl (mp->sw_if_index); + ap->weight = ntohl (mp->weight); + + bond_set_intf_weight (vm, ap); + + REPLY_MACRO (VL_API_SW_INTERFACE_SET_BOND_WEIGHT_REPLY); +} + +static void vl_api_bond_detach_slave_t_handler (vl_api_bond_detach_slave_t * mp) { vlib_main_t *vm = vlib_get_main (); @@ -200,6 +220,8 @@ bond_send_sw_interface_slave_details (vpe_api_main_t * am, strlen ((const char *) slave_if->interface_name))); mp->is_passive = slave_if->is_passive; mp->is_long_timeout = slave_if->is_long_timeout; + mp->is_local_numa = slave_if->is_local_numa; + mp->weight = htonl (slave_if->weight); mp->context = context; vl_api_send_msg (reg, (u8 *) mp); diff --git a/src/vnet/bonding/cli.c b/src/vnet/bonding/cli.c index 4e0d30aa598..2acc670a33d 100644 --- a/src/vnet/bonding/cli.c +++ b/src/vnet/bonding/cli.c @@ -29,8 +29,6 @@ bond_disable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif) bond_if_t *bif; int i; uword p; - vnet_main_t *vnm = vnet_get_main (); - vnet_hw_interface_t *hw; u8 switching_active = 0; bif = bond_get_master_by_dev_instance (sif->bif_dev_instance); @@ -40,12 +38,10 @@ bond_disable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif) p = *vec_elt_at_index (bif->active_slaves, i); if (p == sif->sw_if_index) { - if (sif->sw_if_index == bif->sw_if_index_working) - { - switching_active = 1; - if (bif->mode == BOND_MODE_ACTIVE_BACKUP) - bif->is_local_numa = 0; - } + if ((bif->mode == BOND_MODE_ACTIVE_BACKUP) && (i == 0) && + (vec_len (bif->active_slaves) > 1)) + /* deleting the active slave for active-backup */ + switching_active = 1; vec_del1 (bif->active_slaves, i); hash_unset (bif->active_slave_by_sw_if_index, sif->sw_if_index); if (sif->lacp_enabled && bif->numa_only) @@ -64,37 +60,9 @@ bond_disable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif) } /* We get a new slave just becoming active */ - if ((bif->mode == BOND_MODE_ACTIVE_BACKUP) && switching_active) - { - if ((vec_len (bif->active_slaves) >= 1)) - { - /* scan all slaves and try to find the first slave with local numa node. */ - vec_foreach_index (i, bif->active_slaves) - { - p = *vec_elt_at_index (bif->active_slaves, i); - hw = vnet_get_sup_hw_interface (vnm, p); - if (vm->numa_node == hw->numa_node) - { - bif->sw_if_index_working = p; - bif->is_local_numa = 1; - vlib_process_signal_event (bm->vlib_main, - bond_process_node.index, - BOND_SEND_GARP_NA, - bif->hw_if_index); - break; - } - } - } - - /* No local numa node is found in the active slave set. Use the first slave */ - if ((bif->is_local_numa == 0) && (vec_len (bif->active_slaves) >= 1)) - { - p = *vec_elt_at_index (bif->active_slaves, 0); - bif->sw_if_index_working = p; - vlib_process_signal_event (bm->vlib_main, bond_process_node.index, - BOND_SEND_GARP_NA, bif->hw_if_index); - } - } + if (switching_active) + vlib_process_signal_event (bm->vlib_main, bond_process_node.index, + BOND_SEND_GARP_NA, bif->hw_if_index); clib_spinlock_unlock_if_init (&bif->lockp); if (bif->mode == BOND_MODE_LACP) @@ -102,6 +70,71 @@ bond_disable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif) [sif->sw_if_index], sif->actor.state); } +/* + * return 1 if s2 is preferred. + * return -1 if s1 is preferred. + */ +static int +bond_slave_sort (void *a1, void *a2) +{ + u32 *s1 = a1; + u32 *s2 = a2; + slave_if_t *sif1 = bond_get_slave_by_sw_if_index (*s1); + slave_if_t *sif2 = bond_get_slave_by_sw_if_index (*s2); + bond_if_t *bif; + + ASSERT (sif1); + ASSERT (sif2); + /* + * sort entries according to preference rules: + * 1. biggest weight + * 2. numa-node + * 3. current active slave (to prevent churning) + * 4. lowest sw_if_index (for deterministic behavior) + * + */ + if (sif2->weight > sif1->weight) + return 1; + if (sif2->weight < sif1->weight) + return -1; + else + { + if (sif2->is_local_numa > sif1->is_local_numa) + return 1; + if (sif2->is_local_numa < sif1->is_local_numa) + return -1; + else + { + bif = bond_get_master_by_dev_instance (sif1->bif_dev_instance); + /* Favor the current active slave to avoid churning */ + if (bif->active_slaves[0] == sif2->sw_if_index) + return 1; + if (bif->active_slaves[0] == sif1->sw_if_index) + return -1; + /* go for the tiebreaker as the last resort */ + if (sif1->sw_if_index > sif2->sw_if_index) + return 1; + if (sif1->sw_if_index < sif2->sw_if_index) + return -1; + else + ASSERT (0); + } + } + return 0; +} + +static void +bond_sort_slaves (bond_if_t * bif) +{ + bond_main_t *bm = &bond_main; + u32 old_active = bif->active_slaves[0]; + + vec_sort_with_function (bif->active_slaves, bond_slave_sort); + if (old_active != bif->active_slaves[0]) + vlib_process_signal_event (bm->vlib_main, bond_process_node.index, + BOND_SEND_GARP_NA, bif->hw_if_index); +} + void bond_enable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif) { @@ -109,8 +142,6 @@ bond_enable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif) bond_main_t *bm = &bond_main; vnet_main_t *vnm = vnet_get_main (); vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sif->sw_if_index); - int i; - uword p; bif = bond_get_master_by_dev_instance (sif->bif_dev_instance); clib_spinlock_lock_if_init (&bif->lockp); @@ -127,43 +158,17 @@ bond_enable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif) bif->n_numa_slaves++; } else - { - vec_add1 (bif->active_slaves, sif->sw_if_index); - } + vec_add1 (bif->active_slaves, sif->sw_if_index); - /* First slave becomes active? */ - if ((vec_len (bif->active_slaves) == 1) && - (bif->mode == BOND_MODE_ACTIVE_BACKUP)) + sif->is_local_numa = (vm->numa_node == hw->numa_node) ? 1 : 0; + if (bif->mode == BOND_MODE_ACTIVE_BACKUP) { - bif->sw_if_index_working = sif->sw_if_index; - bif->is_local_numa = (vm->numa_node == hw->numa_node) ? 1 : 0; - vlib_process_signal_event (bm->vlib_main, bond_process_node.index, - BOND_SEND_GARP_NA, bif->hw_if_index); - } - else if ((vec_len (bif->active_slaves) > 1) - && (bif->mode == BOND_MODE_ACTIVE_BACKUP) - && bif->is_local_numa == 0) - { - if (vm->numa_node == hw->numa_node) - { - vec_foreach_index (i, bif->active_slaves) - { - p = *vec_elt_at_index (bif->active_slaves, 0); - if (p == sif->sw_if_index) - break; - - vec_del1 (bif->active_slaves, 0); - hash_unset (bif->active_slave_by_sw_if_index, p); - vec_add1 (bif->active_slaves, p); - hash_set (bif->active_slave_by_sw_if_index, p, p); - } - bif->sw_if_index_working = sif->sw_if_index; - bif->is_local_numa = 1; - vlib_process_signal_event (bm->vlib_main, - bond_process_node.index, - BOND_SEND_GARP_NA, bif->hw_if_index); - - } + if (vec_len (bif->active_slaves) == 1) + /* First slave becomes active? */ + vlib_process_signal_event (bm->vlib_main, bond_process_node.index, + BOND_SEND_GARP_NA, bif->hw_if_index); + else + bond_sort_slaves (bif); } } clib_spinlock_unlock_if_init (&bif->lockp); @@ -238,6 +243,8 @@ bond_dump_slave_ifs (slave_interface_details_t ** out_slaveifs, slaveif->sw_if_index = sif->sw_if_index; slaveif->is_passive = sif->is_passive; slaveif->is_long_timeout = sif->is_long_timeout; + slaveif->is_local_numa = sif->is_local_numa; + slaveif->weight = sif->weight; } } *out_slaveifs = r_slaveifs; @@ -862,6 +869,14 @@ show_bond_details (vlib_main_t * vm) { vlib_cli_output (vm, " %U", format_vnet_sw_if_index_name, vnet_get_main (), *sw_if_index); + if (bif->mode == BOND_MODE_ACTIVE_BACKUP) + { + slave_if_t *sif = bond_get_slave_by_sw_if_index (*sw_if_index); + if (sif) + vlib_cli_output (vm, " weight: %u, is_local_numa: %u, " + "sw_if_index: %u", sif->weight, + sif->is_local_numa, sif->sw_if_index); + } } vlib_cli_output (vm, " number of slaves: %d", vec_len (bif->slaves)); vec_foreach (sw_if_index, bif->slaves) @@ -910,6 +925,113 @@ VLIB_CLI_COMMAND (show_bond_command, static) = { }; /* *INDENT-ON* */ +void +bond_set_intf_weight (vlib_main_t * vm, bond_set_intf_weight_args_t * args) +{ + slave_if_t *sif; + bond_if_t *bif; + vnet_main_t *vnm; + u32 old_weight; + + sif = bond_get_slave_by_sw_if_index (args->sw_if_index); + if (!sif) + { + args->rv = VNET_API_ERROR_INVALID_INTERFACE; + args->error = clib_error_return (0, "Interface not enslaved"); + return; + } + bif = bond_get_master_by_dev_instance (sif->bif_dev_instance); + if (!bif) + { + args->rv = VNET_API_ERROR_INVALID_INTERFACE; + args->error = clib_error_return (0, "bond interface not found"); + return; + } + if (bif->mode != BOND_MODE_ACTIVE_BACKUP) + { + args->rv = VNET_API_ERROR_INVALID_ARGUMENT; + args->error = + clib_error_return (0, "Weight valid for active-backup only"); + return; + } + + old_weight = sif->weight; + sif->weight = args->weight; + vnm = vnet_get_main (); + /* + * No need to sort the list if the affected slave is not up (not in active + * slave set), active slave count is 1, or the current slave is already the + * primary slave and new weight > old weight. + */ + if (!vnet_sw_interface_is_up (vnm, sif->sw_if_index) || + (vec_len (bif->active_slaves) == 1) || + ((bif->active_slaves[0] == sif->sw_if_index) && + (sif->weight >= old_weight))) + return; + + bond_sort_slaves (bif); +} + +static clib_error_t * +bond_set_intf_cmd (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + bond_set_intf_weight_args_t args = { 0 }; + u32 sw_if_index = (u32) ~ 0; + unformat_input_t _line_input, *line_input = &_line_input; + vnet_main_t *vnm = vnet_get_main (); + u8 weight_enter = 0; + u32 weight = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return clib_error_return (0, "Missing required arguments."); + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "sw_if_index %d", &sw_if_index)) + ; + else if (unformat (line_input, "%U", unformat_vnet_sw_interface, vnm, + &sw_if_index)) + ; + else if (unformat (line_input, "weight %u", &weight)) + weight_enter = 1; + else + { + clib_error_return (0, "unknown input `%U'", format_unformat_error, + input); + break; + } + } + + unformat_free (line_input); + if (sw_if_index == (u32) ~ 0) + { + args.rv = VNET_API_ERROR_INVALID_INTERFACE; + clib_error_return (0, "Interface name is invalid!"); + } + if (weight_enter == 0) + { + args.rv = VNET_API_ERROR_INVALID_ARGUMENT; + clib_error_return (0, "weight missing"); + } + + args.sw_if_index = sw_if_index; + args.weight = weight; + bond_set_intf_weight (vm, &args); + + return args.error; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND(set_interface_bond_cmd, static) = { + .path = "set interface bond", + .short_help = "set interface bond <interface> | sw_if_index <idx>" + " weight <value>", + .function = bond_set_intf_cmd, +}; +/* *INDENT-ON* */ + clib_error_t * bond_cli_init (vlib_main_t * vm) { diff --git a/src/vnet/bonding/node.h b/src/vnet/bonding/node.h index 1ad19dec872..1479209369a 100644 --- a/src/vnet/bonding/node.h +++ b/src/vnet/bonding/node.h @@ -110,6 +110,15 @@ typedef struct clib_error_t *error; } bond_detach_slave_args_t; +typedef struct +{ + u32 sw_if_index; + u32 weight; + /* return */ + int rv; + clib_error_t *error; +} bond_set_intf_weight_args_t; + /** BOND interface details struct */ typedef struct { @@ -130,6 +139,8 @@ typedef struct u8 interface_name[64]; u8 is_passive; u8 is_long_timeout; + u8 is_local_numa; + u32 weight; u32 active_slaves; } slave_interface_details_t; @@ -159,11 +170,6 @@ typedef struct u8 mode; u8 lb; - /* This flag works for active-backup mode only - and marks if the working port is local numa. */ - u8 is_local_numa; - /* current working sw_if_index in active-bakeup mode. */ - u32 sw_if_index_working; /* the last slave index for the rr lb */ u32 lb_rr_last_index; @@ -239,6 +245,9 @@ typedef struct /* neighbor vlib hw_if_index */ u32 hw_if_index; + /* weight -- valid only for active backup */ + u32 weight; + /* actor does not initiate the protocol exchange */ u8 is_passive; @@ -336,6 +345,9 @@ typedef struct /* pdu sent */ u64 marker_pdu_sent; + + /* slave is numa node */ + u8 is_local_numa; } slave_if_t; typedef void (*lacp_enable_disable_func) (vlib_main_t * vm, bond_if_t * bif, @@ -398,6 +410,8 @@ void bond_disable_collecting_distributing (vlib_main_t * vm, void bond_enable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif); u8 *format_bond_interface_name (u8 * s, va_list * args); +void bond_set_intf_weight (vlib_main_t * vm, + bond_set_intf_weight_args_t * args); void bond_create_if (vlib_main_t * vm, bond_create_if_args_t * args); int bond_delete_if (vlib_main_t * vm, u32 sw_if_index); void bond_enslave (vlib_main_t * vm, bond_enslave_args_t * args); |