aboutsummaryrefslogtreecommitdiffstats
path: root/src/vnet/bonding
diff options
context:
space:
mode:
Diffstat (limited to 'src/vnet/bonding')
-rw-r--r--src/vnet/bonding/bond.api20
-rw-r--r--src/vnet/bonding/bond_api.c22
-rw-r--r--src/vnet/bonding/cli.c274
-rw-r--r--src/vnet/bonding/node.h24
4 files changed, 258 insertions, 82 deletions
diff --git a/src/vnet/bonding/bond.api b/src/vnet/bonding/bond.api
index e699267ccb5..5d9a0563c1a 100644
--- a/src/vnet/bonding/bond.api
+++ b/src/vnet/bonding/bond.api
@@ -19,7 +19,7 @@
the bonding device driver
*/
-option version = "1.0.1";
+option version = "1.0.2";
/** \brief Initialize a new bond interface with the given paramters
@param client_index - opaque cookie to identify the sender
@@ -154,6 +154,8 @@ define sw_interface_slave_dump
@param interface_name - name of interface
@param is_passve - interface does not initiate the lacp protocol, remote must be active speaker
@param is_long_timeout - 90 seconds vs default 3 seconds neighbor timeout
+ @param is_local_numa - the slave interface is local numa
+ @param weight - the weight for the slave interface (active-backup mode only)
*/
define sw_interface_slave_details
{
@@ -162,6 +164,22 @@ define sw_interface_slave_details
u8 interface_name[64];
u8 is_passive;
u8 is_long_timeout;
+ u8 is_local_numa;
+ u32 weight;
+};
+
+/** \brief Interface set bond weight
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - slave interface for which to set the weight
+ @param weight - weight value to be set for the slave interface
+*/
+autoreply define sw_interface_set_bond_weight
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u32 weight;
};
/*
diff --git a/src/vnet/bonding/bond_api.c b/src/vnet/bonding/bond_api.c
index 8e1842367e5..74334b52bf2 100644
--- a/src/vnet/bonding/bond_api.c
+++ b/src/vnet/bonding/bond_api.c
@@ -47,6 +47,7 @@
_(BOND_CREATE, bond_create) \
_(BOND_DELETE, bond_delete) \
_(BOND_ENSLAVE, bond_enslave) \
+_(SW_INTERFACE_SET_BOND_WEIGHT, sw_interface_set_bond_weight) \
_(BOND_DETACH_SLAVE, bond_detach_slave) \
_(SW_INTERFACE_BOND_DUMP, sw_interface_bond_dump)\
_(SW_INTERFACE_SLAVE_DUMP, sw_interface_slave_dump)
@@ -117,6 +118,25 @@ vl_api_bond_enslave_t_handler (vl_api_bond_enslave_t * mp)
}
static void
+ vl_api_sw_interface_set_bond_weight_t_handler
+ (vl_api_sw_interface_set_bond_weight_t * mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ bond_set_intf_weight_args_t _a, *ap = &_a;
+ vl_api_sw_interface_set_bond_weight_reply_t *rmp;
+ int rv = 0;
+
+ clib_memset (ap, 0, sizeof (*ap));
+
+ ap->sw_if_index = ntohl (mp->sw_if_index);
+ ap->weight = ntohl (mp->weight);
+
+ bond_set_intf_weight (vm, ap);
+
+ REPLY_MACRO (VL_API_SW_INTERFACE_SET_BOND_WEIGHT_REPLY);
+}
+
+static void
vl_api_bond_detach_slave_t_handler (vl_api_bond_detach_slave_t * mp)
{
vlib_main_t *vm = vlib_get_main ();
@@ -200,6 +220,8 @@ bond_send_sw_interface_slave_details (vpe_api_main_t * am,
strlen ((const char *) slave_if->interface_name)));
mp->is_passive = slave_if->is_passive;
mp->is_long_timeout = slave_if->is_long_timeout;
+ mp->is_local_numa = slave_if->is_local_numa;
+ mp->weight = htonl (slave_if->weight);
mp->context = context;
vl_api_send_msg (reg, (u8 *) mp);
diff --git a/src/vnet/bonding/cli.c b/src/vnet/bonding/cli.c
index 4e0d30aa598..2acc670a33d 100644
--- a/src/vnet/bonding/cli.c
+++ b/src/vnet/bonding/cli.c
@@ -29,8 +29,6 @@ bond_disable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif)
bond_if_t *bif;
int i;
uword p;
- vnet_main_t *vnm = vnet_get_main ();
- vnet_hw_interface_t *hw;
u8 switching_active = 0;
bif = bond_get_master_by_dev_instance (sif->bif_dev_instance);
@@ -40,12 +38,10 @@ bond_disable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif)
p = *vec_elt_at_index (bif->active_slaves, i);
if (p == sif->sw_if_index)
{
- if (sif->sw_if_index == bif->sw_if_index_working)
- {
- switching_active = 1;
- if (bif->mode == BOND_MODE_ACTIVE_BACKUP)
- bif->is_local_numa = 0;
- }
+ if ((bif->mode == BOND_MODE_ACTIVE_BACKUP) && (i == 0) &&
+ (vec_len (bif->active_slaves) > 1))
+ /* deleting the active slave for active-backup */
+ switching_active = 1;
vec_del1 (bif->active_slaves, i);
hash_unset (bif->active_slave_by_sw_if_index, sif->sw_if_index);
if (sif->lacp_enabled && bif->numa_only)
@@ -64,37 +60,9 @@ bond_disable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif)
}
/* We get a new slave just becoming active */
- if ((bif->mode == BOND_MODE_ACTIVE_BACKUP) && switching_active)
- {
- if ((vec_len (bif->active_slaves) >= 1))
- {
- /* scan all slaves and try to find the first slave with local numa node. */
- vec_foreach_index (i, bif->active_slaves)
- {
- p = *vec_elt_at_index (bif->active_slaves, i);
- hw = vnet_get_sup_hw_interface (vnm, p);
- if (vm->numa_node == hw->numa_node)
- {
- bif->sw_if_index_working = p;
- bif->is_local_numa = 1;
- vlib_process_signal_event (bm->vlib_main,
- bond_process_node.index,
- BOND_SEND_GARP_NA,
- bif->hw_if_index);
- break;
- }
- }
- }
-
- /* No local numa node is found in the active slave set. Use the first slave */
- if ((bif->is_local_numa == 0) && (vec_len (bif->active_slaves) >= 1))
- {
- p = *vec_elt_at_index (bif->active_slaves, 0);
- bif->sw_if_index_working = p;
- vlib_process_signal_event (bm->vlib_main, bond_process_node.index,
- BOND_SEND_GARP_NA, bif->hw_if_index);
- }
- }
+ if (switching_active)
+ vlib_process_signal_event (bm->vlib_main, bond_process_node.index,
+ BOND_SEND_GARP_NA, bif->hw_if_index);
clib_spinlock_unlock_if_init (&bif->lockp);
if (bif->mode == BOND_MODE_LACP)
@@ -102,6 +70,71 @@ bond_disable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif)
[sif->sw_if_index], sif->actor.state);
}
+/*
+ * return 1 if s2 is preferred.
+ * return -1 if s1 is preferred.
+ */
+static int
+bond_slave_sort (void *a1, void *a2)
+{
+ u32 *s1 = a1;
+ u32 *s2 = a2;
+ slave_if_t *sif1 = bond_get_slave_by_sw_if_index (*s1);
+ slave_if_t *sif2 = bond_get_slave_by_sw_if_index (*s2);
+ bond_if_t *bif;
+
+ ASSERT (sif1);
+ ASSERT (sif2);
+ /*
+ * sort entries according to preference rules:
+ * 1. biggest weight
+ * 2. numa-node
+ * 3. current active slave (to prevent churning)
+ * 4. lowest sw_if_index (for deterministic behavior)
+ *
+ */
+ if (sif2->weight > sif1->weight)
+ return 1;
+ if (sif2->weight < sif1->weight)
+ return -1;
+ else
+ {
+ if (sif2->is_local_numa > sif1->is_local_numa)
+ return 1;
+ if (sif2->is_local_numa < sif1->is_local_numa)
+ return -1;
+ else
+ {
+ bif = bond_get_master_by_dev_instance (sif1->bif_dev_instance);
+ /* Favor the current active slave to avoid churning */
+ if (bif->active_slaves[0] == sif2->sw_if_index)
+ return 1;
+ if (bif->active_slaves[0] == sif1->sw_if_index)
+ return -1;
+ /* go for the tiebreaker as the last resort */
+ if (sif1->sw_if_index > sif2->sw_if_index)
+ return 1;
+ if (sif1->sw_if_index < sif2->sw_if_index)
+ return -1;
+ else
+ ASSERT (0);
+ }
+ }
+ return 0;
+}
+
+static void
+bond_sort_slaves (bond_if_t * bif)
+{
+ bond_main_t *bm = &bond_main;
+ u32 old_active = bif->active_slaves[0];
+
+ vec_sort_with_function (bif->active_slaves, bond_slave_sort);
+ if (old_active != bif->active_slaves[0])
+ vlib_process_signal_event (bm->vlib_main, bond_process_node.index,
+ BOND_SEND_GARP_NA, bif->hw_if_index);
+}
+
void
bond_enable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif)
{
@@ -109,8 +142,6 @@ bond_enable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif)
bond_main_t *bm = &bond_main;
vnet_main_t *vnm = vnet_get_main ();
vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sif->sw_if_index);
- int i;
- uword p;
bif = bond_get_master_by_dev_instance (sif->bif_dev_instance);
clib_spinlock_lock_if_init (&bif->lockp);
@@ -127,43 +158,17 @@ bond_enable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif)
bif->n_numa_slaves++;
}
else
- {
- vec_add1 (bif->active_slaves, sif->sw_if_index);
- }
+ vec_add1 (bif->active_slaves, sif->sw_if_index);
- /* First slave becomes active? */
- if ((vec_len (bif->active_slaves) == 1) &&
- (bif->mode == BOND_MODE_ACTIVE_BACKUP))
+ sif->is_local_numa = (vm->numa_node == hw->numa_node) ? 1 : 0;
+ if (bif->mode == BOND_MODE_ACTIVE_BACKUP)
{
- bif->sw_if_index_working = sif->sw_if_index;
- bif->is_local_numa = (vm->numa_node == hw->numa_node) ? 1 : 0;
- vlib_process_signal_event (bm->vlib_main, bond_process_node.index,
- BOND_SEND_GARP_NA, bif->hw_if_index);
- }
- else if ((vec_len (bif->active_slaves) > 1)
- && (bif->mode == BOND_MODE_ACTIVE_BACKUP)
- && bif->is_local_numa == 0)
- {
- if (vm->numa_node == hw->numa_node)
- {
- vec_foreach_index (i, bif->active_slaves)
- {
- p = *vec_elt_at_index (bif->active_slaves, 0);
- if (p == sif->sw_if_index)
- break;
-
- vec_del1 (bif->active_slaves, 0);
- hash_unset (bif->active_slave_by_sw_if_index, p);
- vec_add1 (bif->active_slaves, p);
- hash_set (bif->active_slave_by_sw_if_index, p, p);
- }
- bif->sw_if_index_working = sif->sw_if_index;
- bif->is_local_numa = 1;
- vlib_process_signal_event (bm->vlib_main,
- bond_process_node.index,
- BOND_SEND_GARP_NA, bif->hw_if_index);
-
- }
+ if (vec_len (bif->active_slaves) == 1)
+ /* First slave becomes active? */
+ vlib_process_signal_event (bm->vlib_main, bond_process_node.index,
+ BOND_SEND_GARP_NA, bif->hw_if_index);
+ else
+ bond_sort_slaves (bif);
}
}
clib_spinlock_unlock_if_init (&bif->lockp);
@@ -238,6 +243,8 @@ bond_dump_slave_ifs (slave_interface_details_t ** out_slaveifs,
slaveif->sw_if_index = sif->sw_if_index;
slaveif->is_passive = sif->is_passive;
slaveif->is_long_timeout = sif->is_long_timeout;
+ slaveif->is_local_numa = sif->is_local_numa;
+ slaveif->weight = sif->weight;
}
}
*out_slaveifs = r_slaveifs;
@@ -862,6 +869,14 @@ show_bond_details (vlib_main_t * vm)
{
vlib_cli_output (vm, " %U", format_vnet_sw_if_index_name,
vnet_get_main (), *sw_if_index);
+ if (bif->mode == BOND_MODE_ACTIVE_BACKUP)
+ {
+ slave_if_t *sif = bond_get_slave_by_sw_if_index (*sw_if_index);
+ if (sif)
+ vlib_cli_output (vm, " weight: %u, is_local_numa: %u, "
+ "sw_if_index: %u", sif->weight,
+ sif->is_local_numa, sif->sw_if_index);
+ }
}
vlib_cli_output (vm, " number of slaves: %d", vec_len (bif->slaves));
vec_foreach (sw_if_index, bif->slaves)
@@ -910,6 +925,113 @@ VLIB_CLI_COMMAND (show_bond_command, static) = {
};
/* *INDENT-ON* */
+void
+bond_set_intf_weight (vlib_main_t * vm, bond_set_intf_weight_args_t * args)
+{
+ slave_if_t *sif;
+ bond_if_t *bif;
+ vnet_main_t *vnm;
+ u32 old_weight;
+
+ sif = bond_get_slave_by_sw_if_index (args->sw_if_index);
+ if (!sif)
+ {
+ args->rv = VNET_API_ERROR_INVALID_INTERFACE;
+ args->error = clib_error_return (0, "Interface not enslaved");
+ return;
+ }
+ bif = bond_get_master_by_dev_instance (sif->bif_dev_instance);
+ if (!bif)
+ {
+ args->rv = VNET_API_ERROR_INVALID_INTERFACE;
+ args->error = clib_error_return (0, "bond interface not found");
+ return;
+ }
+ if (bif->mode != BOND_MODE_ACTIVE_BACKUP)
+ {
+ args->rv = VNET_API_ERROR_INVALID_ARGUMENT;
+ args->error =
+ clib_error_return (0, "Weight valid for active-backup only");
+ return;
+ }
+
+ old_weight = sif->weight;
+ sif->weight = args->weight;
+ vnm = vnet_get_main ();
+ /*
+ * No need to sort the list if the affected slave is not up (not in active
+ * slave set), active slave count is 1, or the current slave is already the
+ * primary slave and new weight > old weight.
+ */
+ if (!vnet_sw_interface_is_up (vnm, sif->sw_if_index) ||
+ (vec_len (bif->active_slaves) == 1) ||
+ ((bif->active_slaves[0] == sif->sw_if_index) &&
+ (sif->weight >= old_weight)))
+ return;
+
+ bond_sort_slaves (bif);
+}
+
+static clib_error_t *
+bond_set_intf_cmd (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ bond_set_intf_weight_args_t args = { 0 };
+ u32 sw_if_index = (u32) ~ 0;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ vnet_main_t *vnm = vnet_get_main ();
+ u8 weight_enter = 0;
+ u32 weight = 0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return clib_error_return (0, "Missing required arguments.");
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "sw_if_index %d", &sw_if_index))
+ ;
+ else if (unformat (line_input, "%U", unformat_vnet_sw_interface, vnm,
+ &sw_if_index))
+ ;
+ else if (unformat (line_input, "weight %u", &weight))
+ weight_enter = 1;
+ else
+ {
+ clib_error_return (0, "unknown input `%U'", format_unformat_error,
+ input);
+ break;
+ }
+ }
+
+ unformat_free (line_input);
+ if (sw_if_index == (u32) ~ 0)
+ {
+ args.rv = VNET_API_ERROR_INVALID_INTERFACE;
+ clib_error_return (0, "Interface name is invalid!");
+ }
+ if (weight_enter == 0)
+ {
+ args.rv = VNET_API_ERROR_INVALID_ARGUMENT;
+ clib_error_return (0, "weight missing");
+ }
+
+ args.sw_if_index = sw_if_index;
+ args.weight = weight;
+ bond_set_intf_weight (vm, &args);
+
+ return args.error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND(set_interface_bond_cmd, static) = {
+ .path = "set interface bond",
+ .short_help = "set interface bond <interface> | sw_if_index <idx>"
+ " weight <value>",
+ .function = bond_set_intf_cmd,
+};
+/* *INDENT-ON* */
+
clib_error_t *
bond_cli_init (vlib_main_t * vm)
{
diff --git a/src/vnet/bonding/node.h b/src/vnet/bonding/node.h
index 1ad19dec872..1479209369a 100644
--- a/src/vnet/bonding/node.h
+++ b/src/vnet/bonding/node.h
@@ -110,6 +110,15 @@ typedef struct
clib_error_t *error;
} bond_detach_slave_args_t;
+typedef struct
+{
+ u32 sw_if_index;
+ u32 weight;
+ /* return */
+ int rv;
+ clib_error_t *error;
+} bond_set_intf_weight_args_t;
+
/** BOND interface details struct */
typedef struct
{
@@ -130,6 +139,8 @@ typedef struct
u8 interface_name[64];
u8 is_passive;
u8 is_long_timeout;
+ u8 is_local_numa;
+ u32 weight;
u32 active_slaves;
} slave_interface_details_t;
@@ -159,11 +170,6 @@ typedef struct
u8 mode;
u8 lb;
- /* This flag works for active-backup mode only
- and marks if the working port is local numa. */
- u8 is_local_numa;
- /* current working sw_if_index in active-bakeup mode. */
- u32 sw_if_index_working;
/* the last slave index for the rr lb */
u32 lb_rr_last_index;
@@ -239,6 +245,9 @@ typedef struct
/* neighbor vlib hw_if_index */
u32 hw_if_index;
+ /* weight -- valid only for active backup */
+ u32 weight;
+
/* actor does not initiate the protocol exchange */
u8 is_passive;
@@ -336,6 +345,9 @@ typedef struct
/* pdu sent */
u64 marker_pdu_sent;
+
+ /* slave is numa node */
+ u8 is_local_numa;
} slave_if_t;
typedef void (*lacp_enable_disable_func) (vlib_main_t * vm, bond_if_t * bif,
@@ -398,6 +410,8 @@ void bond_disable_collecting_distributing (vlib_main_t * vm,
void bond_enable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif);
u8 *format_bond_interface_name (u8 * s, va_list * args);
+void bond_set_intf_weight (vlib_main_t * vm,
+ bond_set_intf_weight_args_t * args);
void bond_create_if (vlib_main_t * vm, bond_create_if_args_t * args);
int bond_delete_if (vlib_main_t * vm, u32 sw_if_index);
void bond_enslave (vlib_main_t * vm, bond_enslave_args_t * args);