diff options
Diffstat (limited to 'src')
35 files changed, 7338 insertions, 1 deletions
diff --git a/src/configure.ac b/src/configure.ac index c4554231308..d0067c079eb 100644 --- a/src/configure.ac +++ b/src/configure.ac @@ -222,6 +222,7 @@ PLUGIN_ENABLED(ioam) PLUGIN_ENABLED(ixge) PLUGIN_ENABLED(kubeproxy) PLUGIN_ENABLED(l2e) +PLUGIN_ENABLED(lacp) PLUGIN_ENABLED(lb) PLUGIN_ENABLED(marvell) PLUGIN_ENABLED(memif) diff --git a/src/plugins/Makefile.am b/src/plugins/Makefile.am index 37b2e259056..03a39dfcd7a 100644 --- a/src/plugins/Makefile.am +++ b/src/plugins/Makefile.am @@ -71,6 +71,10 @@ if ENABLE_KUBEPROXY_PLUGIN include kubeproxy.am endif +if ENABLE_LACP_PLUGIN +include lacp.am +endif + if ENABLE_LB_PLUGIN include lb.am endif diff --git a/src/plugins/lacp.am b/src/plugins/lacp.am new file mode 100644 index 00000000000..c7e571d95fb --- /dev/null +++ b/src/plugins/lacp.am @@ -0,0 +1,47 @@ +# Copyright (c) 2017 Cisco Systems, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +vppplugins_LTLIBRARIES += lacp_plugin.la +vppapitestplugins_LTLIBRARIES += lacp_test_plugin.la + +lacp_plugin_la_LIBADD = +lacp_plugin_la_SOURCES = lacp/lacp.c \ + lacp/lacp_api.c \ + lacp/selection.c \ + lacp/rx_machine.c \ + lacp/tx_machine.c \ + lacp/mux_machine.c \ + lacp/ptx_machine.c \ + lacp/cli.c \ + lacp/input.c \ + lacp/node.c + +lacp_test_plugin_la_SOURCES = \ + lacp/lacp_test.c + +noinst_HEADERS += lacp/protocol.h \ + lacp/machine.h \ + lacp/rx_machine.h \ + lacp/tx_machine.h \ + lacp/mux_machine.h \ + lacp/ptx_machine.h \ + lacp/node.h + +nobase_apiinclude_HEADERS += \ + lacp/lacp_all_api_h.h \ + lacp/lacp_msg_enum.h \ + lacp/lacp.api.h + +API_FILES += lacp/lacp.api + +# vi:syntax=automake diff --git a/src/plugins/lacp/cli.c b/src/plugins/lacp/cli.c new file mode 100644 index 00000000000..10627774f56 --- /dev/null +++ b/src/plugins/lacp/cli.c @@ -0,0 +1,393 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define _GNU_SOURCE +#include <vnet/bonding/node.h> +#include <lacp/node.h> + +int +lacp_dump_ifs (lacp_interface_details_t ** out_lacpifs) +{ + vnet_main_t *vnm = vnet_get_main (); + bond_main_t *bm = &bond_main; + slave_if_t *sif; + bond_if_t *bif; + vnet_hw_interface_t *hi; + lacp_interface_details_t *r_lacpifs = NULL; + lacp_interface_details_t *lacpif = NULL; + + /* *INDENT-OFF* */ + pool_foreach (sif, bm->neighbors, + if ((sif->port_enabled == 0) || (sif->lacp_enabled == 0)) + continue; + vec_add2(r_lacpifs, lacpif, 1); + memset (lacpif, 0, sizeof (*lacpif)); + lacpif->sw_if_index = sif->sw_if_index; + hi = vnet_get_hw_interface (vnm, sif->hw_if_index); + clib_memcpy(lacpif->interface_name, hi->name, + MIN (ARRAY_LEN (lacpif->interface_name) - 1, + strlen ((const char *) hi->name))); + bif = bond_get_master_by_dev_instance (sif->bif_dev_instance); + hi = vnet_get_hw_interface (vnm, bif->hw_if_index); + clib_memcpy(lacpif->bond_interface_name, hi->name, + MIN (ARRAY_LEN (lacpif->bond_interface_name) - 1, + strlen ((const char *) hi->name))); + clib_memcpy (lacpif->actor_system, sif->actor.system, 6); + lacpif->actor_system_priority = sif->actor.system_priority; + lacpif->actor_key = sif->actor.key; + lacpif->actor_port_priority = sif->actor.port_priority; + lacpif->actor_port_number = sif->actor.port_number; + lacpif->actor_state = sif->actor.state; + clib_memcpy (lacpif->partner_system, sif->partner.system, 6); + lacpif->partner_system_priority = sif->partner.system_priority; + lacpif->partner_key = sif->partner.key; + lacpif->partner_port_priority = sif->partner.port_priority; + lacpif->partner_port_number = sif->partner.port_number; + lacpif->partner_state = sif->partner.state; + lacpif->rx_state = sif->rx_state; + lacpif->tx_state = sif->tx_state; + lacpif->ptx_state = sif->ptx_state; + lacpif->mux_state = sif->mux_state; + ); + /* *INDENT-ON* */ + + *out_lacpifs = r_lacpifs; + + return 0; +} + +static void +show_lacp (vlib_main_t * vm, u32 * sw_if_indices) +{ + int i; + slave_if_t *sif; + bond_if_t *bif; + + if (!sw_if_indices) + return; + + vlib_cli_output (vm, "%-55s %-32s %-32s", " ", "actor state", + "partner state"); + vlib_cli_output (vm, "%-25s %-12s %-16s %-31s %-31s", "interface name", + "sw_if_index", "bond interface", + "exp/def/dis/col/syn/agg/tim/act", + "exp/def/dis/col/syn/agg/tim/act"); + + for (i = 0; i < vec_len (sw_if_indices); i++) + { + sif = bond_get_slave_by_sw_if_index (sw_if_indices[i]); + if (!sif || (sif->port_enabled == 0) || (sif->lacp_enabled == 0)) + continue; + bif = bond_get_master_by_dev_instance (sif->bif_dev_instance); + vlib_cli_output (vm, + "%-25U %-12d %-16U %3x %3x %3x %3x %3x %3x %3x %3x " + "%4x %3x %3x %3x %3x %3x %3x %3x", + format_vnet_sw_if_index_name, vnet_get_main (), + sif->sw_if_index, sif->sw_if_index, + format_vnet_sw_if_index_name, vnet_get_main (), + bif->sw_if_index, lacp_bit_test (sif->actor.state, 7), + lacp_bit_test (sif->actor.state, 6), + lacp_bit_test (sif->actor.state, 5), + lacp_bit_test (sif->actor.state, 4), + lacp_bit_test (sif->actor.state, 3), + lacp_bit_test (sif->actor.state, 2), + lacp_bit_test (sif->actor.state, 1), + lacp_bit_test (sif->actor.state, 0), + lacp_bit_test (sif->partner.state, 7), + lacp_bit_test (sif->partner.state, 6), + lacp_bit_test (sif->partner.state, 5), + lacp_bit_test (sif->partner.state, 4), + lacp_bit_test (sif->partner.state, 3), + lacp_bit_test (sif->partner.state, 2), + lacp_bit_test (sif->partner.state, 1), + lacp_bit_test (sif->partner.state, 0)); + vlib_cli_output (vm, + " LAG ID: " + "[(%04x,%02x-%02x-%02x-%02x-%02x-%02x,%04x,%04x,%04x), " + "(%04x,%02x-%02x-%02x-%02x-%02x-%02x,%04x,%04x,%04x)]", + ntohs (sif->actor.system_priority), + sif->actor.system[0], sif->actor.system[1], + sif->actor.system[2], sif->actor.system[3], + sif->actor.system[4], sif->actor.system[5], + ntohs (sif->actor.key), + ntohs (sif->actor.port_priority), + ntohs (sif->actor.port_number), + ntohs (sif->partner.system_priority), + sif->partner.system[0], sif->partner.system[1], + sif->partner.system[2], sif->partner.system[3], + sif->partner.system[4], sif->partner.system[5], + ntohs (sif->partner.key), + ntohs (sif->partner.port_priority), + ntohs (sif->partner.port_number)); + vlib_cli_output (vm, + " RX-state: %U, TX-state: %U, " + "MUX-state: %U, PTX-state: %U", + format_rx_sm_state, sif->rx_state, format_tx_sm_state, + sif->tx_state, format_mux_sm_state, sif->mux_state, + format_ptx_sm_state, sif->ptx_state); + } +} + +static void +show_lacp_details (vlib_main_t * vm, u32 * sw_if_indices) +{ + slave_if_t *sif; + lacp_state_struct *state_entry; + int i; + f64 now; + + if (!sw_if_indices) + return; + + now = vlib_time_now (vm); + for (i = 0; i < vec_len (sw_if_indices); i++) + { + sif = bond_get_slave_by_sw_if_index (sw_if_indices[i]); + if (!sif || (sif->port_enabled == 0) || (sif->lacp_enabled == 0)) + continue; + vlib_cli_output (vm, " %U", format_vnet_sw_if_index_name, + vnet_get_main (), sif->sw_if_index); + vlib_cli_output (vm, " debug: %d", sif->debug); + vlib_cli_output (vm, " loopback port: %d", sif->loopback_port); + vlib_cli_output (vm, " port moved: %d", sif->port_moved); + vlib_cli_output (vm, " ready_n: %d", sif->ready_n); + vlib_cli_output (vm, " ready: %d", sif->ready); + vlib_cli_output (vm, " Actor"); + vlib_cli_output (vm, " system: %U", + format_ethernet_address, sif->actor.system); + vlib_cli_output (vm, " system priority: %u", + ntohs (sif->actor.system_priority)); + vlib_cli_output (vm, " key: %u", ntohs (sif->actor.key)); + vlib_cli_output (vm, " port priority: %u", + ntohs (sif->actor.port_priority)); + vlib_cli_output (vm, " port number: %u", + ntohs (sif->actor.port_number)); + vlib_cli_output (vm, " state: 0x%x", sif->actor.state); + + state_entry = (lacp_state_struct *) & lacp_state_array; + while (state_entry->str) + { + if (sif->actor.state & (1 << state_entry->bit)) + vlib_cli_output (vm, " %s (%d)", state_entry->str, + state_entry->bit); + state_entry++; + } + + vlib_cli_output (vm, " Partner"); + vlib_cli_output (vm, " system: %U", + format_ethernet_address, sif->partner.system); + vlib_cli_output (vm, " system priority: %u", + ntohs (sif->partner.system_priority)); + vlib_cli_output (vm, " key: %u", ntohs (sif->partner.key)); + vlib_cli_output (vm, " port priority: %u", + ntohs (sif->partner.port_priority)); + vlib_cli_output (vm, " port number: %u", + ntohs (sif->partner.port_number)); + vlib_cli_output (vm, " state: 0x%x", sif->partner.state); + + state_entry = (lacp_state_struct *) & lacp_state_array; + while (state_entry->str) + { + if (sif->partner.state & (1 << state_entry->bit)) + vlib_cli_output (vm, " %s (%d)", state_entry->str, + state_entry->bit); + state_entry++; + } + + if (!lacp_timer_is_running (sif->wait_while_timer)) + vlib_cli_output (vm, " wait while timer: not running"); + else + vlib_cli_output (vm, " wait while timer: %=10.2f seconds", + sif->wait_while_timer - now); + if (!lacp_timer_is_running (sif->current_while_timer)) + vlib_cli_output (vm, " current while timer: not running"); + else + vlib_cli_output (vm, " current while timer: %=10.2f seconds", + sif->current_while_timer - now); + if (!lacp_timer_is_running (sif->periodic_timer)) + vlib_cli_output (vm, " periodic timer: not running"); + else + vlib_cli_output (vm, " periodic timer: %=10.2f seconds", + sif->periodic_timer - now); + vlib_cli_output (vm, " RX-state: %U", format_rx_sm_state, + sif->rx_state); + vlib_cli_output (vm, " TX-state: %U", format_tx_sm_state, + sif->tx_state); + vlib_cli_output (vm, " MUX-state: %U", format_mux_sm_state, + sif->mux_state); + vlib_cli_output (vm, " PTX-state: %U", format_ptx_sm_state, + sif->ptx_state); + vlib_cli_output (vm, "\n"); + } +} + +static clib_error_t * +show_lacp_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + bond_main_t *bm = &bond_main; + vnet_main_t *vnm = &vnet_main; + slave_if_t *sif; + clib_error_t *error = 0; + u8 details = 0; + u32 hw_if_index, *sw_if_indices = 0; + vnet_interface_main_t *im = &vnm->interface_main; + vnet_sw_interface_t *sw; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (input, "%U", unformat_vnet_hw_interface, vnm, &hw_if_index)) + { + sw = pool_elt_at_index (im->sw_interfaces, hw_if_index); + sif = bond_get_slave_by_sw_if_index (sw->sw_if_index); + if (!sif) + { + error = clib_error_return (0, "interface is not enslaved"); + goto done; + } + vec_add1 (sw_if_indices, sif->sw_if_index); + } + else if (unformat (input, "details")) + details = 1; + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + goto done; + } + } + + if (vec_len (sw_if_indices) == 0) + { + pool_foreach (sif, bm->neighbors, + vec_add1 (sw_if_indices, sif->sw_if_index); + ); + } + + if (details) + show_lacp_details (vm, sw_if_indices); + else + show_lacp (vm, sw_if_indices); + +done: + vec_free (sw_if_indices); + return error; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_lacp_command, static) = { + .path = "show lacp", + .short_help = "show lacp [<interface>] [details]", + .function = show_lacp_fn, +}; +/* *INDENT-ON* */ + +static clib_error_t * +debug_lacp_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + clib_error_t *error = NULL; + lacp_main_t *lm = &lacp_main; + u8 onoff = 0; + u8 input_found = 0; + u32 hw_if_index = ~0; + slave_if_t *sif; + vnet_main_t *vnm = vnet_get_main (); + vnet_interface_main_t *im = &vnm->interface_main; + vnet_sw_interface_t *sw; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return clib_error_return (0, "missing argument"); + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "%U", + unformat_vnet_hw_interface, vnm, &hw_if_index)) + ; + if (input_found) + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } + else if (unformat (line_input, "on")) + { + input_found = 1; + onoff = 1; + } + else if (unformat (line_input, "off")) + { + input_found = 1; + onoff = 0; + } + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (!input_found) + return clib_error_return (0, "must specify on or off"); + + if (hw_if_index != ~0) + { + sw = pool_elt_at_index (im->sw_interfaces, hw_if_index); + sif = bond_get_slave_by_sw_if_index (sw->sw_if_index); + if (!sif) + return (clib_error_return (0, "Please enslave the interface first")); + sif->debug = onoff; + } + else + lm->debug = onoff; + +done: + unformat_free (line_input); + + return error; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (debug_lacp_command, static) = { + .path = "debug lacp", + .short_help = "debug lacp <interface> <on | off>", + .function = debug_lacp_command_fn, +}; +/* *INDENT-ON* */ + +clib_error_t * +lacp_cli_init (vlib_main_t * vm) +{ + lacp_main_t *lm = &lacp_main; + + lm->vlib_main = vm; + lm->vnet_main = vnet_get_main (); + + return 0; +} + +VLIB_INIT_FUNCTION (lacp_cli_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/lacp/input.c b/src/plugins/lacp/input.c new file mode 100644 index 00000000000..45db3b8455b --- /dev/null +++ b/src/plugins/lacp/input.c @@ -0,0 +1,359 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define _GNU_SOURCE +#include <vnet/bonding/node.h> +#include <lacp/node.h> + +static int +lacp_packet_scan (vlib_main_t * vm, slave_if_t * sif) +{ + lacp_pdu_t *lacpdu = (lacp_pdu_t *) sif->last_rx_pkt; + + if (lacpdu->subtype != LACP_SUBTYPE) + return LACP_ERROR_UNSUPPORTED; + + /* + * According to the spec, no checking on the version number and tlv types. + * But we may check the tlv lengths. + */ + if ((lacpdu->actor.tlv_length != sizeof (lacp_actor_partner_t)) || + (lacpdu->partner.tlv_length != sizeof (lacp_actor_partner_t)) || + (lacpdu->collector.tlv_length != sizeof (lacp_collector_t)) || + (lacpdu->terminator.tlv_length != 0)) + return (LACP_ERROR_BAD_TLV); + + lacp_machine_dispatch (&lacp_rx_machine, vm, sif, + LACP_RX_EVENT_PDU_RECEIVED, &sif->rx_state); + + return LACP_ERROR_NONE; +} + +static void +marker_fill_pdu (marker_pdu_t * marker, slave_if_t * sif) +{ + marker_pdu_t *pkt = (marker_pdu_t *) sif->last_marker_pkt; + + marker->marker_info = pkt->marker_info; + marker->marker_info.tlv_type = MARKER_RESPONSE_INFORMATION; +} + +void +marker_fill_request_pdu (marker_pdu_t * marker, slave_if_t * sif) +{ + marker->marker_info.tlv_type = MARKER_INFORMATION; + marker->marker_info.requester_port = sif->actor.port_number; + clib_memcpy (marker->marker_info.requester_system, sif->actor.system, 6); + marker->marker_info.requester_transaction_id = sif->marker_tx_id; + sif->marker_tx_id++; +} + +static void +send_ethernet_marker_response_pdu (slave_if_t * sif) +{ + lacp_main_t *lm = &lacp_main; + u32 *to_next; + ethernet_marker_pdu_t *h0; + vnet_hw_interface_t *hw; + u32 bi0; + vlib_buffer_t *b0; + vlib_frame_t *f; + vlib_main_t *vm = lm->vlib_main; + vnet_main_t *vnm = lm->vnet_main; + + /* + * see lacp_periodic_init() to understand what's already painted + * into the buffer by the packet template mechanism + */ + h0 = vlib_packet_template_get_packet + (vm, &lm->marker_packet_templates[sif->packet_template_index], &bi0); + + if (!h0) + return; + + /* Add the interface's ethernet source address */ + hw = vnet_get_sup_hw_interface (vnm, sif->sw_if_index); + + clib_memcpy (h0->ethernet.src_address, hw->hw_address, + vec_len (hw->hw_address)); + + marker_fill_pdu (&h0->marker, sif); + + /* Set the outbound packet length */ + b0 = vlib_get_buffer (vm, bi0); + b0->current_length = sizeof (ethernet_marker_pdu_t); + b0->current_data = 0; + b0->total_length_not_including_first_buffer = 0; + + /* And the outbound interface */ + vnet_buffer (b0)->sw_if_index[VLIB_TX] = hw->sw_if_index; + + /* And output the packet on the correct interface */ + f = vlib_get_frame_to_node (vm, hw->output_node_index); + + to_next = vlib_frame_vector_args (f); + to_next[0] = bi0; + f->n_vectors = 1; + + vlib_put_frame_to_node (vm, hw->output_node_index, f); +} + +static int +handle_marker_protocol (vlib_main_t * vm, slave_if_t * sif) +{ + marker_pdu_t *marker = (marker_pdu_t *) sif->last_marker_pkt; + + /* + * According to the spec, no checking on the version number and tlv types. + * But we may check the tlv lengths. + */ + if ((marker->marker_info.tlv_length != sizeof (marker_information_t)) || + (marker->terminator.tlv_length != 0)) + return (LACP_ERROR_BAD_TLV); + + send_ethernet_marker_response_pdu (sif); + + return LACP_ERROR_NONE; +} + +/* + * lacp input routine + */ +lacp_error_t +lacp_input (vlib_main_t * vm, vlib_buffer_t * b0, u32 bi0) +{ + lacp_main_t *lm = &lacp_main; + slave_if_t *sif; + uword nbytes; + lacp_error_t e; + marker_pdu_t *marker; + uword last_packet_signature; + bond_if_t *bif; + + sif = + bond_get_slave_by_sw_if_index (vnet_buffer (b0)->sw_if_index[VLIB_RX]); + if ((sif == 0) || (sif->mode != BOND_MODE_LACP)) + { + return LACP_ERROR_DISABLED; + } + + /* Handle marker protocol */ + marker = (marker_pdu_t *) (b0->data + b0->current_data); + if (marker->subtype == MARKER_SUBTYPE) + { + if (sif->last_marker_pkt) + _vec_len (sif->last_marker_pkt) = 0; + vec_validate (sif->last_marker_pkt, + vlib_buffer_length_in_chain (vm, b0) - 1); + nbytes = vlib_buffer_contents (vm, bi0, sif->last_marker_pkt); + ASSERT (nbytes <= vec_len (sif->last_marker_pkt)); + if (nbytes < sizeof (lacp_pdu_t)) + return LACP_ERROR_TOO_SMALL; + return (handle_marker_protocol (vm, sif)); + } + + /* + * typical clib idiom. Don't repeatedly allocate and free + * the per-neighbor rx buffer. Reset its apparent length to zero + * and reuse it. + */ + if (sif->last_rx_pkt) + _vec_len (sif->last_rx_pkt) = 0; + + /* + * Make sure the per-neighbor rx buffer is big enough to hold + * the data we're about to copy + */ + vec_validate (sif->last_rx_pkt, vlib_buffer_length_in_chain (vm, b0) - 1); + + /* + * Coalesce / copy the buffer chain into the per-neighbor + * rx buffer + */ + nbytes = vlib_buffer_contents (vm, bi0, sif->last_rx_pkt); + ASSERT (nbytes <= vec_len (sif->last_rx_pkt)); + + if (nbytes < sizeof (lacp_pdu_t)) + { + return LACP_ERROR_TOO_SMALL; + } + + last_packet_signature = + hash_memory (sif->last_rx_pkt, vec_len (sif->last_rx_pkt), 0xd00b); + + bif = bond_get_master_by_dev_instance (sif->bif_dev_instance); + if (sif->last_packet_signature_valid && + (sif->last_packet_signature == last_packet_signature) && + hash_get (bif->active_slave_by_sw_if_index, sif->sw_if_index)) + { + lacp_start_current_while_timer (lm->vlib_main, sif, + sif->ttl_in_seconds); + e = LACP_ERROR_CACHE_HIT; + } + else + { + /* Actually scan the packet */ + e = lacp_packet_scan (vm, sif); + sif->last_packet_signature_valid = 1; + sif->last_packet_signature = last_packet_signature; + } + + if (sif->last_rx_pkt) + _vec_len (sif->last_rx_pkt) = 0; + + return e; +} + +/* + * setup neighbor hash table + */ +static clib_error_t * +lacp_init (vlib_main_t * vm) +{ + clib_error_t *error; + + if ((error = vlib_call_init_function (vm, lacp_periodic_init))) + return error; + + return 0; +} + +VLIB_INIT_FUNCTION (lacp_init); + +/* + * packet trace format function, very similar to + * lacp_packet_scan except that we call the per TLV format + * functions instead of the per TLV processing functions + */ +u8 * +lacp_input_format_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + lacp_input_trace_t *t = va_arg (*args, lacp_input_trace_t *); + lacp_pdu_t *lacpdu = &t->pkt.lacpdu; + marker_pdu_t *marker = &t->pkt.marker; + int i, len; + u8 *p; + lacp_state_struct *state_entry; + + s = format (s, "Length: %d\n", t->len); + if (t->len >= sizeof (lacp_pdu_t)) + { + switch (lacpdu->subtype) + { + case MARKER_SUBTYPE: + if (marker->version_number == MARKER_PROTOCOL_VERSION) + s = format (s, " Markerv1\n"); + else + s = format (s, " Subtype %u, Version %u\n", marker->subtype, + marker->version_number); + s = format (s, " Marker Information TLV: type %u\n", + marker->marker_info.tlv_type); + s = format (s, " Marker Information TLV: length %u\n", + marker->marker_info.tlv_length); + s = format (s, " Requester port: %u\n", + marker->marker_info.requester_port); + s = format (s, " Requester system: %U\n", format_ethernet_address, + marker->marker_info.requester_system); + s = format (s, " Requester transaction ID: %u\n", + marker->marker_info.requester_transaction_id); + break; + + case LACP_SUBTYPE: + if (lacpdu->version_number == LACP_ACTOR_LACP_VERSION) + s = format (s, " LACPv1\n"); + else + s = format (s, " Subtype %u, Version %u\n", lacpdu->subtype, + lacpdu->version_number); + s = format (s, " Actor Information TLV: length %u\n", + lacpdu->actor.tlv_length); + s = format (s, " System %U\n", format_ethernet_address, + lacpdu->actor.port_info.system); + s = format (s, " System priority %u\n", + ntohs (lacpdu->actor.port_info.system_priority)); + s = format (s, " Key %u\n", ntohs (lacpdu->actor.port_info.key)); + s = format (s, " Port priority %u\n", + ntohs (lacpdu->actor.port_info.port_priority)); + s = format (s, " Port number %u\n", + ntohs (lacpdu->actor.port_info.port_number)); + s = format (s, " State 0x%x\n", lacpdu->actor.port_info.state); + state_entry = (lacp_state_struct *) & lacp_state_array; + while (state_entry->str) + { + if (lacpdu->actor.port_info.state & (1 << state_entry->bit)) + s = format (s, " %s (%d)\n", state_entry->str, + state_entry->bit); + state_entry++; + } + + s = format (s, " Partner Information TLV: length %u\n", + lacpdu->partner.tlv_length); + s = format (s, " System %U\n", format_ethernet_address, + lacpdu->partner.port_info.system); + s = format (s, " System priority %u\n", + ntohs (lacpdu->partner.port_info.system_priority)); + s = + format (s, " Key %u\n", ntohs (lacpdu->partner.port_info.key)); + s = + format (s, " Port priority %u\n", + ntohs (lacpdu->partner.port_info.port_priority)); + s = + format (s, " Port number %u\n", + ntohs (lacpdu->partner.port_info.port_number)); + s = format (s, " State 0x%x\n", lacpdu->partner.port_info.state); + state_entry = (lacp_state_struct *) & lacp_state_array; + while (state_entry->str) + { + if (lacpdu->partner.port_info.state & (1 << state_entry->bit)) + s = format (s, " %s (%d)\n", state_entry->str, + state_entry->bit); + state_entry++; + } + break; + + default: + break; + } + } + + if (t->len > sizeof (lacp_pdu_t)) + len = sizeof (lacp_pdu_t); + else + len = t->len; + p = (u8 *) lacpdu; + for (i = 0; i < len; i++) + { + if ((i % 16) == 0) + { + if (i) + s = format (s, "\n"); + s = format (s, " 0x%04x: ", i); + } + if ((i % 2) == 0) + s = format (s, " "); + s = format (s, "%02x", p[i]); + } + + return s; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/lacp/lacp.api b/src/plugins/lacp/lacp.api new file mode 100644 index 00000000000..9eb5c7eed07 --- /dev/null +++ b/src/plugins/lacp/lacp.api @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** \file + + This file defines vpe control-plane API messages for + the bonding device driver +*/ + +option version = "1.0.0"; + +/** \brief Dump lacp interfaces request */ +define sw_interface_lacp_dump +{ + u32 client_index; + u32 context; +}; + +/** \brief Reply for lacp dump request + @param sw_if_index - software index of slave interface + @param interface_name - name of slave interface + @param rx_state - rx machine state + @param tx_state - tx machine state + @param mux_state - mux machine state + @param ptx_state - ptx machine state + @param bond_interface_name - name of bond interface + @param actor_system_priority - actor system priority + @param actor_system - actor system + @param actor_key - actor key + @param actor_port_priority - actor port priority + @param actor_port_number - actor port number + @param actor_state - actor state + @param partner_system_priority - partner system priority + @param partner_system - partner system + @param partner_key - partner key + @param partner_port_priority - partner port priority + @param partner_port_number - partner port number + @param partner_state - partner state +*/ +define sw_interface_lacp_details +{ + u32 context; + u32 sw_if_index; + u8 interface_name[64]; + u32 rx_state; + u32 tx_state; + u32 mux_state; + u32 ptx_state; + u8 bond_interface_name[64]; + u16 actor_system_priority; + u8 actor_system[6]; + u16 actor_key; + u16 actor_port_priority; + u16 actor_port_number; + u8 actor_state; + u16 partner_system_priority; + u8 partner_system[6]; + u16 partner_key; + u16 partner_port_priority; + u16 partner_port_number; + u8 partner_state; +}; + +/* + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/lacp/lacp.c b/src/plugins/lacp/lacp.c new file mode 100644 index 00000000000..5fe505a4f0f --- /dev/null +++ b/src/plugins/lacp/lacp.c @@ -0,0 +1,428 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <stdint.h> +#include <vlib/vlib.h> +#include <vlib/unix/unix.h> +#include <vnet/plugin/plugin.h> +#include <vpp/app/version.h> +#include <vppinfra/hash.h> +#include <vnet/bonding/node.h> +#include <lacp/node.h> + +lacp_main_t lacp_main; + +/* + * Generate lacp pdu + */ +static void +lacp_fill_pdu (lacp_pdu_t * lacpdu, slave_if_t * sif) +{ + /* Actor TLV */ + lacpdu->actor.port_info = sif->actor; + + /* Partner TLV */ + lacpdu->partner.port_info = sif->partner; +} + +/* + * send a lacp pkt on an ethernet interface + */ +static void +lacp_send_ethernet_lacp_pdu (slave_if_t * sif) +{ + lacp_main_t *lm = &lacp_main; + u32 *to_next; + ethernet_lacp_pdu_t *h0; + vnet_hw_interface_t *hw; + u32 bi0; + vlib_buffer_t *b0; + vlib_frame_t *f; + vlib_main_t *vm = lm->vlib_main; + vnet_main_t *vnm = lm->vnet_main; + + /* + * see lacp_periodic_init() to understand what's already painted + * into the buffer by the packet template mechanism + */ + h0 = vlib_packet_template_get_packet + (vm, &lm->packet_templates[sif->packet_template_index], &bi0); + + if (!h0) + return; + + /* Add the interface's ethernet source address */ + hw = vnet_get_sup_hw_interface (vnm, sif->sw_if_index); + + clib_memcpy (h0->ethernet.src_address, hw->hw_address, + vec_len (hw->hw_address)); + + lacp_fill_pdu (&h0->lacp, sif); + + /* Set the outbound packet length */ + b0 = vlib_get_buffer (vm, bi0); + b0->current_length = sizeof (ethernet_lacp_pdu_t); + b0->current_data = 0; + b0->total_length_not_including_first_buffer = 0; + + /* And the outbound interface */ + vnet_buffer (b0)->sw_if_index[VLIB_TX] = hw->sw_if_index; + + /* And output the packet on the correct interface */ + f = vlib_get_frame_to_node (vm, hw->output_node_index); + + to_next = vlib_frame_vector_args (f); + to_next[0] = bi0; + f->n_vectors = 1; + + vlib_put_frame_to_node (vm, hw->output_node_index, f); + + sif->last_lacpdu_time = vlib_time_now (vm); +} + +/* + * Decide which lacp packet template to use + */ +static int +lacp_pick_packet_template (slave_if_t * sif) +{ + sif->packet_template_index = LACP_PACKET_TEMPLATE_ETHERNET; + + return 0; +} + +void +lacp_send_lacp_pdu (vlib_main_t * vm, slave_if_t * sif) +{ + lacp_main_t *lm = &lacp_main; + + if (sif->mode != BOND_MODE_LACP) + { + lacp_stop_timer (&sif->periodic_timer); + return; + } + + if (sif->packet_template_index == (u8) ~ 0) + { + /* If we don't know how to talk to this peer, don't try again */ + if (lacp_pick_packet_template (sif)) + { + lacp_stop_timer (&sif->periodic_timer); + return; + } + } + + switch (sif->packet_template_index) + { + case LACP_PACKET_TEMPLATE_ETHERNET: + lacp_send_ethernet_lacp_pdu (sif); + break; + + default: + ASSERT (0); + } + + lacp_start_periodic_timer (lm->vlib_main, sif, sif->is_long_timeout ? + LACP_SLOW_PERIODIC_TIMER : + LACP_FAST_PERIODIC_TIMER); +} + +void +lacp_periodic (vlib_main_t * vm) +{ + bond_main_t *bm = &bond_main; + lacp_main_t *lm = &lacp_main; + slave_if_t *sif; + + /* *INDENT-OFF* */ + pool_foreach (sif, bm->neighbors, + ({ + if (sif->port_enabled == 0) + continue; + + if (lacp_timer_is_running (sif->current_while_timer) && + lacp_timer_is_expired (lm->vlib_main, sif->current_while_timer)) + { + lacp_machine_dispatch (&lacp_rx_machine, vm, sif, + LACP_RX_EVENT_TIMER_EXPIRED, &sif->rx_state); + } + + if (lacp_timer_is_running (sif->periodic_timer) && + lacp_timer_is_expired (lm->vlib_main, sif->periodic_timer)) + { + lacp_machine_dispatch (&lacp_ptx_machine, vm, sif, + LACP_PTX_EVENT_TIMER_EXPIRED, &sif->ptx_state); + } + if (lacp_timer_is_running (sif->wait_while_timer) && + lacp_timer_is_expired (lm->vlib_main, sif->wait_while_timer)) + { + sif->ready_n = 1; + lacp_stop_timer (&sif->wait_while_timer); + lacp_selection_logic (vm, sif); + } + })); + /* *INDENT-ON* */ +} + +static void +lacp_interface_enable_disable (vlib_main_t * vm, bond_if_t * bif, + slave_if_t * sif, u8 enable) +{ + lacp_main_t *lm = &lacp_main; + uword port_number; + + if (enable) + { + port_number = clib_bitmap_first_clear (bif->port_number_bitmap); + bif->port_number_bitmap = clib_bitmap_set (bif->port_number_bitmap, + port_number, 1); + // bitmap starts at 0. Our port number starts at 1. + lacp_init_neighbor (sif, bif->hw_address, port_number + 1, sif->group); + lacp_init_state_machines (vm, sif); + lm->lacp_int++; + if (lm->lacp_int == 1) + { + vlib_process_signal_event (vm, lm->lacp_process_node_index, + LACP_PROCESS_EVENT_START, 0); + } + } + else + { + lm->lacp_int--; + if (lm->lacp_int == 0) + { + vlib_process_signal_event (vm, lm->lacp_process_node_index, + LACP_PROCESS_EVENT_STOP, 0); + } + } +} + +static clib_error_t * +lacp_periodic_init (vlib_main_t * vm) +{ + lacp_main_t *lm = &lacp_main; + ethernet_lacp_pdu_t h; + ethernet_marker_pdu_t m; + u8 dst[] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x02 }; + + /* initialize binary API */ + lacp_plugin_api_hookup (vm); + + /* Create the ethernet lacp packet template */ + + memset (&h, 0, sizeof (h)); + + memcpy (h.ethernet.dst_address, dst, sizeof (h.ethernet.dst_address)); + + /* leave src address blank (fill in at send time) */ + + h.ethernet.type = htons (ETHERNET_TYPE_SLOW_PROTOCOLS); + + h.lacp.subtype = LACP_SUBTYPE; + h.lacp.version_number = LACP_ACTOR_LACP_VERSION; + + /* Actor TLV */ + h.lacp.actor.tlv_type = LACP_ACTOR_INFORMATION; + h.lacp.actor.tlv_length = sizeof (lacp_actor_partner_t); + + /* Partner TLV */ + h.lacp.partner.tlv_type = LACP_PARTNER_INFORMATION; + h.lacp.partner.tlv_length = sizeof (lacp_actor_partner_t); + + /* Collector TLV */ + h.lacp.collector.tlv_type = LACP_COLLECTOR_INFORMATION; + h.lacp.collector.tlv_length = sizeof (lacp_collector_t); + h.lacp.collector.max_delay = 0; + + /* Terminator TLV */ + h.lacp.terminator.tlv_type = LACP_TERMINATOR_INFORMATION; + h.lacp.terminator.tlv_length = 0; + + vlib_packet_template_init + (vm, &lm->packet_templates[LACP_PACKET_TEMPLATE_ETHERNET], + /* data */ &h, + sizeof (h), + /* alloc chunk size */ 8, + "lacp-ethernet"); + + /* Create the ethernet marker protocol packet template */ + + memset (&m, 0, sizeof (m)); + + memcpy (m.ethernet.dst_address, dst, sizeof (m.ethernet.dst_address)); + + /* leave src address blank (fill in at send time) */ + + m.ethernet.type = htons (ETHERNET_TYPE_SLOW_PROTOCOLS); + + m.marker.subtype = MARKER_SUBTYPE; + m.marker.version_number = MARKER_PROTOCOL_VERSION; + + m.marker.marker_info.tlv_length = sizeof (marker_information_t); + + /* Terminator TLV */ + m.marker.terminator.tlv_type = MARKER_TERMINATOR_INFORMATION; + m.marker.terminator.tlv_length = 0; + + vlib_packet_template_init + (vm, &lm->marker_packet_templates[MARKER_PACKET_TEMPLATE_ETHERNET], + /* data */ &m, + sizeof (m), + /* alloc chunk size */ 8, + "marker-ethernet"); + + bond_register_callback (lacp_interface_enable_disable); + + return 0; +} + +int +lacp_machine_dispatch (lacp_machine_t * machine, vlib_main_t * vm, + slave_if_t * sif, int event, int *state) +{ + lacp_fsm_state_t *transition; + int rc = 0; + + transition = &machine->tables[*state].state_table[event]; + LACP_DBG2 (sif, event, *state, machine, transition); + *state = transition->next_state; + if (transition->action) + rc = (*transition->action) ((void *) vm, (void *) sif); + + return rc; +} + +void +lacp_init_neighbor (slave_if_t * sif, u8 * hw_address, u16 port_number, + u32 group) +{ + lacp_stop_timer (&sif->wait_while_timer); + lacp_stop_timer (&sif->current_while_timer); + lacp_stop_timer (&sif->actor_churn_timer); + lacp_stop_timer (&sif->partner_churn_timer); + lacp_stop_timer (&sif->periodic_timer); + lacp_stop_timer (&sif->last_lacpdu_time); + sif->lacp_enabled = 1; + sif->loopback_port = 0; + sif->ready = 0; + sif->ready_n = 0; + sif->port_moved = 0; + sif->ntt = 0; + sif->selected = LACP_PORT_UNSELECTED; + sif->actor.state = LACP_STATE_AGGREGATION; + if (sif->ttl_in_seconds == LACP_SHORT_TIMOUT_TIME) + sif->actor.state |= LACP_STATE_LACP_TIMEOUT; + if (sif->is_passive == 0) + sif->actor.state |= LACP_STATE_LACP_ACTIVITY; + clib_memcpy (sif->actor.system, hw_address, 6); + sif->actor.system_priority = htons (LACP_DEFAULT_SYSTEM_PRIORITY); + sif->actor.key = htons (group); + sif->actor.port_number = htons (port_number); + sif->actor.port_priority = htons (LACP_DEFAULT_PORT_PRIORITY); + + sif->partner.system_priority = htons (LACP_DEFAULT_SYSTEM_PRIORITY); + sif->partner.key = htons (group); + sif->partner.port_number = htons (port_number); + sif->partner.port_priority = htons (LACP_DEFAULT_PORT_PRIORITY); + sif->partner.key = htons (group); + sif->partner.state = LACP_STATE_LACP_ACTIVITY; + + sif->actor_admin = sif->actor; + sif->partner_admin = sif->partner; +} + +void +lacp_init_state_machines (vlib_main_t * vm, slave_if_t * sif) +{ + lacp_init_tx_machine (vm, sif); + lacp_init_mux_machine (vm, sif); + lacp_init_ptx_machine (vm, sif); + lacp_init_rx_machine (vm, sif); +} + +VLIB_INIT_FUNCTION (lacp_periodic_init); + +static clib_error_t * +lacp_sw_interface_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags) +{ + lacp_main_t *lm = &lacp_main; + slave_if_t *sif; + vlib_main_t *vm = lm->vlib_main; + + sif = bond_get_slave_by_sw_if_index (sw_if_index); + if (sif) + { + sif->port_enabled = flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP; + if (sif->port_enabled == 0) + { + if (sif->lacp_enabled) + { + lacp_init_state_machines (vm, sif); + lacp_init_neighbor (sif, sif->actor_admin.system, + ntohs (sif->actor_admin.port_number), + ntohs (sif->actor_admin.key)); + } + } + } + + return 0; +} + +VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (lacp_sw_interface_up_down); + +static clib_error_t * +lacp_hw_interface_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) +{ + lacp_main_t *lm = &lacp_main; + slave_if_t *sif; + vnet_sw_interface_t *sw; + vlib_main_t *vm = lm->vlib_main; + vnet_interface_main_t *im = &vnm->interface_main; + + sw = pool_elt_at_index (im->sw_interfaces, hw_if_index); + sif = bond_get_slave_by_sw_if_index (sw->sw_if_index); + if (sif) + { + if (!(flags & VNET_HW_INTERFACE_FLAG_LINK_UP)) + { + if (sif->lacp_enabled) + { + lacp_init_state_machines (vm, sif); + lacp_init_neighbor (sif, sif->actor_admin.system, + ntohs (sif->actor_admin.port_number), + ntohs (sif->actor_admin.key)); + } + } + } + + return 0; +} + +VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION (lacp_hw_interface_up_down); + +/* *INDENT-OFF* */ +VLIB_PLUGIN_REGISTER () = { + .version = VPP_BUILD_VER, + .description = "Link Aggregation Control Protocol", +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/lacp/lacp_all_api_h.h b/src/plugins/lacp/lacp_all_api_h.h new file mode 100644 index 00000000000..188c8fd3fb6 --- /dev/null +++ b/src/plugins/lacp/lacp_all_api_h.h @@ -0,0 +1,18 @@ +/* + * lacp_all_api_h.h - plug-in api #include file + * + * Copyright (c) <current-year> <your-organization> + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* Include the generated file, see BUILT_SOURCES in Makefile.am */ +#include <lacp/lacp.api.h> diff --git a/src/plugins/lacp/lacp_api.c b/src/plugins/lacp/lacp_api.c new file mode 100644 index 00000000000..129c3605527 --- /dev/null +++ b/src/plugins/lacp/lacp_api.c @@ -0,0 +1,217 @@ +/* + *------------------------------------------------------------------ + * lacp_api.c - lacp api + * + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <vlib/vlib.h> +#include <vnet/ethernet/ethernet.h> +#include <vlib/unix/unix.h> +#include <lacp/node.h> + +#include <vlibapi/api.h> +#include <vlibmemory/api.h> + + +/* define message IDs */ +#include <lacp/lacp_msg_enum.h> + +/* define message structures */ +#define vl_typedefs +#include <lacp/lacp_all_api_h.h> +#undef vl_typedefs + +/* define generated endian-swappers */ +#define vl_endianfun +#include <lacp/lacp_all_api_h.h> +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) +#define vl_printfun +#include <lacp/lacp_all_api_h.h> +#undef vl_printfun + +/* Get the API version number */ +#define vl_api_version(n,v) static u32 api_version=(v); +#include <lacp/lacp_all_api_h.h> +#undef vl_api_version + +/* + * A handy macro to set up a message reply. + * Assumes that the following variables are available: + * mp - pointer to request message + * rmp - pointer to reply message type + * rv - return value + */ +#define REPLY_MACRO(t) \ +do { \ + svm_queue_t * q = \ + vl_api_client_index_to_input_queue (mp->client_index); \ + if (!q) \ + return; \ + \ + rmp = vl_msg_api_alloc (sizeof (*rmp)); \ + rmp->_vl_msg_id = htons ((t)+lm->msg_id_base); \ + rmp->context = mp->context; \ + rmp->retval = htonl (rv); \ + \ + vl_msg_api_send_shmem (q, (u8 *)&rmp); \ +} while(0); + +#define REPLY_MACRO2(t, body) \ +do { \ + svm_queue_t * q = \ + vl_api_client_index_to_input_queue (mp->client_index); \ + if (!q) \ + return; \ + \ + rmp = vl_msg_api_alloc (sizeof (*rmp)); \ + rmp->_vl_msg_id = htons ((t)+lm->msg_id_base); \ + rmp->context = mp->context; \ + rmp->retval = htonl (rv); \ + do {body;} while (0); \ + vl_msg_api_send_shmem (q, (u8 *)&rmp); \ +} while(0); + +#define foreach_lacp_plugin_api_msg \ +_(SW_INTERFACE_LACP_DUMP, sw_interface_lacp_dump) + +static void +lacp_send_sw_interface_details (vl_api_registration_t * reg, + lacp_interface_details_t * lacp_if, + u32 context) +{ + lacp_main_t *lm = &lacp_main; + vl_api_sw_interface_lacp_details_t *mp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = htons (VL_API_SW_INTERFACE_LACP_DETAILS + lm->msg_id_base); + mp->sw_if_index = htonl (lacp_if->sw_if_index); + + /* These fields in network order already */ + mp->actor_system_priority = lacp_if->actor_system_priority; + mp->actor_key = lacp_if->actor_key; + mp->actor_port_priority = lacp_if->actor_port_priority; + mp->actor_port_number = lacp_if->actor_port_number; + mp->actor_state = lacp_if->actor_state; + clib_memcpy (mp->actor_system, lacp_if->actor_system, 6); + mp->partner_system_priority = lacp_if->partner_system_priority; + mp->partner_key = lacp_if->partner_key; + mp->partner_port_priority = lacp_if->partner_port_priority; + mp->partner_port_number = lacp_if->partner_port_number; + mp->partner_state = lacp_if->partner_state; + + clib_memcpy (mp->partner_system, lacp_if->partner_system, 6); + clib_memcpy (mp->interface_name, lacp_if->interface_name, + MIN (ARRAY_LEN (mp->interface_name) - 1, + strlen ((const char *) lacp_if->interface_name))); + clib_memcpy (mp->bond_interface_name, lacp_if->bond_interface_name, + MIN (ARRAY_LEN (mp->bond_interface_name) - 1, + strlen ((const char *) lacp_if->bond_interface_name))); + mp->rx_state = htonl (lacp_if->rx_state); + mp->tx_state = htonl (lacp_if->tx_state); + mp->mux_state = htonl (lacp_if->mux_state); + mp->ptx_state = htonl (lacp_if->ptx_state); + + mp->context = context; + vl_api_send_msg (reg, (u8 *) mp); +} + +/** + * @brief Message handler for lacp_dump API. + * @param mp vl_api_lacp_dump_t * mp the api message + */ +void +vl_api_sw_interface_lacp_dump_t_handler (vl_api_sw_interface_lacp_dump_t * mp) +{ + int rv; + vl_api_registration_t *reg; + lacp_interface_details_t *lacpifs = NULL; + lacp_interface_details_t *lacp_if = NULL; + + reg = vl_api_client_index_to_registration (mp->client_index); + if (!reg) + return; + + rv = lacp_dump_ifs (&lacpifs); + if (rv) + return; + + vec_foreach (lacp_if, lacpifs) + { + lacp_send_sw_interface_details (reg, lacp_if, mp->context); + } + + vec_free (lacpifs); +} + +#define vl_msg_name_crc_list +#include <lacp/lacp_all_api_h.h> +#undef vl_msg_name_crc_list + +static void +setup_message_id_table (lacp_main_t * lm, api_main_t * am) +{ +#define _(id,n,crc) \ + vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id + lm->msg_id_base); + foreach_vl_msg_name_crc_lacp; +#undef _ +} + +/* Set up the API message handling tables */ +clib_error_t * +lacp_plugin_api_hookup (vlib_main_t * vm) +{ + lacp_main_t *lm = &lacp_main; + api_main_t *am = &api_main; + u8 *name; + + /* Construct the API name */ + name = format (0, "lacp_%08x%c", api_version, 0); + + /* Ask for a correctly-sized block of API message decode slots */ + lm->msg_id_base = vl_msg_api_get_msg_ids + ((char *) name, VL_MSG_FIRST_AVAILABLE); + +#define _(N,n) \ + vl_msg_api_set_handlers((VL_API_##N + lm->msg_id_base), \ + #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_lacp_plugin_api_msg; +#undef _ + + /* + * Set up the (msg_name, crc, message-id) table + */ + setup_message_id_table (lm, am); + + vec_free (name); + return 0; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/lacp/lacp_doc.md b/src/plugins/lacp/lacp_doc.md new file mode 100644 index 00000000000..f196e0aa60b --- /dev/null +++ b/src/plugins/lacp/lacp_doc.md @@ -0,0 +1,92 @@ +# VPP Link Aggregation Control Protocol (LACP) implementation {#lacp_doc} + +This document is to describe the usage of VPP LACP implementation. + + +## LACP + +The Link Aggregation Control Protocol (LACP) is an 802.3ad standard which +provides a protocol for exchanging information between Partner Systems on a +link to allow their protocol instances to reach agreement on the Link Aggregation +Group to which the link belongs and enable transmission and reception for the +higher layer. Multiple links may be bundled to the same Aggregation Group to form +a high bandwidth transmission medium and create a fault-tolerant link. + + +### Configuration + +1. Create the bond interface +create bond mode lacp [hw-addr <mac-address>] [load-balance { l2 | l23 | l34 }] + +2. Enslave the physical interface to the bond +enslave interface <interface> to <bond-interface-name> [passive] [long-timeout]" + +3. Delete the bond interface +delete bond {<interface> | sw_if_index <sw_idx>} + +4. Detach the slave interface from the bond +detach interface <interface> + +### Configuration example + +create bond mode lacp +set interface state BondEthernet0 up +enslave interface TenGigabitEthernet7/0/0 to BondEthernet1 +enslave interface TenGigabitEthernet7/0/1 to BondEthernet1 +enslave interface TenGigabitEthernet5/0/0 to BondEthernet1 +enslave interface TenGigabitEthernet5/0/1 to BondEthernet1 + +detach interface TenGigabitEthernet5/0/1 + +delete bond BondEthernet0 + +### Operational data + +show lacp [<interface>] [details] + +Example: + +show lacp + + +DBGvpp# sh lacp +sh lacp + actor state partner state +interface name sw_if_index bond interface exp/def/dis/col/syn/agg/tim/act exp/def/dis/col/syn/agg/tim/act +GigabitEthernet2/0/1 1 BondEthernet0 0 0 1 1 1 1 1 1 0 0 1 1 1 1 1 1 + LAG ID: [(ffff,e4-c7-22-f3-26-71,0000,00ff,0001), (ffff,fc-99-47-4a-0c-8b,0009,00ff,0001)] + RX-state: CURRENT, TX-state: TRANSMIT, MUX-state: COLLECTING_DISTRIBUTING, PTX-state: PERIODIC_TX +TenGigabitEthernet4/0/0 2 BondEthernet1 0 0 1 1 1 1 1 1 0 0 1 1 1 1 0 1 + LAG ID: [(ffff,90-e2-ba-76-cf-2d,0001,00ff,0001), (8000,00-2a-6a-e5-50-c1,0140,8000,011d)] + RX-state: CURRENT, TX-state: TRANSMIT, MUX-state: COLLECTING_DISTRIBUTING, PTX-state: PERIODIC_TX +TenGigabitEthernet4/0/1 3 BondEthernet1 0 0 1 1 1 1 1 1 0 0 1 1 1 1 0 1 + LAG ID: [(ffff,90-e2-ba-76-cf-2d,0001,00ff,0002), (8000,00-2a-6a-e5-50-c1,0140,8000,011e)] + RX-state: CURRENT, TX-state: TRANSMIT, MUX-state: COLLECTING_DISTRIBUTING, PTX-state: PERIODIC_TX +TenGigabitEthernet8/0/1 7 BondEthernet1 0 0 1 1 1 1 1 1 0 0 1 1 1 1 0 1 + LAG ID: [(ffff,90-e2-ba-76-cf-2d,0001,00ff,0003), (8000,00-2a-6a-e5-50-01,007a,8000,0114)] + RX-state: CURRENT, TX-state: TRANSMIT, MUX-state: COLLECTING_DISTRIBUTING, PTX-state: PERIODIC_TX +TenGigabitEthernet8/0/0 6 BondEthernet1 0 0 1 1 1 1 1 1 0 0 1 1 1 1 0 1 + LAG ID: [(ffff,90-e2-ba-76-cf-2d,0001,00ff,0004), (8000,00-2a-6a-e5-50-01,007a,8000,0115)] + RX-state: CURRENT, TX-state: TRANSMIT, MUX-state: COLLECTING_DISTRIBUTING, PTX-state: PERIODIC_TX +TenGigabitEthernet6/0/1 5 BondEthernet2 0 0 1 1 1 1 1 1 0 0 1 1 1 1 1 1 + LAG ID: [(ffff,90-e2-ba-36-31-21,0002,00ff,0001), (ffff,90-e2-ba-29-f5-31,000f,00ff,0002)] + RX-state: CURRENT, TX-state: TRANSMIT, MUX-state: COLLECTING_DISTRIBUTING, PTX-state: PERIODIC_TX +TenGigabitEthernet6/0/0 4 BondEthernet2 0 0 1 1 1 1 1 1 0 0 1 1 1 1 1 1 + LAG ID: [(ffff,90-e2-ba-36-31-21,0002,00ff,0002), (ffff,90-e2-ba-29-f5-31,000f,00ff,0001)] + RX-state: CURRENT, TX-state: TRANSMIT, MUX-state: COLLECTING_DISTRIBUTING, PTX-state: PERIODIC_TX +DBGvpp# + +show bond [details] + + +DBGvpp# sh bond +sh bond +interface name sw_if_index mode load balance active slaves slaves +BondEthernet0 10 lacp l2 1 1 +BondEthernet1 11 lacp l34 4 4 +BondEthernet2 12 lacp l23 2 2 +DBGvpp# + +### Debugging + +debug lacp [<interface>] <on | off>
\ No newline at end of file diff --git a/src/plugins/lacp/lacp_msg_enum.h b/src/plugins/lacp/lacp_msg_enum.h new file mode 100644 index 00000000000..138683fe3e5 --- /dev/null +++ b/src/plugins/lacp/lacp_msg_enum.h @@ -0,0 +1,31 @@ +/* + * lacp_msg_enum.h - vpp engine plug-in message enumeration + * + * Copyright (c) <current-year> <your-organization> + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_lacp_msg_enum_h +#define included_lacp_msg_enum_h + +#include <vppinfra/byte_order.h> + +#define vl_msg_id(n,h) n, +typedef enum +{ +#include <lacp/lacp_all_api_h.h> + /* We'll want to know how many messages IDs we need... */ + VL_MSG_FIRST_AVAILABLE, +} vl_msg_id_t; +#undef vl_msg_id + +#endif /* included_lacp_msg_enum_h */ diff --git a/src/plugins/lacp/lacp_test.c b/src/plugins/lacp/lacp_test.c new file mode 100644 index 00000000000..0a8631d4df1 --- /dev/null +++ b/src/plugins/lacp/lacp_test.c @@ -0,0 +1,231 @@ +/* + * lacp VAT support + * + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <inttypes.h> + +#include <vat/vat.h> +#include <vlibapi/api.h> +#include <vlibmemory/api.h> + +#include <vppinfra/error.h> +#include <lacp/node.h> + +#define __plugin_msg_base lacp_test_main.msg_id_base +#include <vlibapi/vat_helper_macros.h> + +/* declare message IDs */ +#include <lacp/lacp_msg_enum.h> + +/* Get CRC codes of the messages defined outside of this plugin */ +#define vl_msg_name_crc_list +#include <vpp/api/vpe_all_api_h.h> +#undef vl_msg_name_crc_list + +/* define message structures */ +#define vl_typedefs +#include <vpp/api/vpe_all_api_h.h> +#include <lacp/lacp_all_api_h.h> +#undef vl_typedefs + +/* declare message handlers for each api */ + +#define vl_endianfun /* define message structures */ +#include <lacp/lacp_all_api_h.h> +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) +#define vl_printfun +#include <lacp/lacp_all_api_h.h> +#undef vl_printfun + +/* Get the API version number. */ +#define vl_api_version(n,v) static u32 api_version=(v); +#include <lacp/lacp_all_api_h.h> +#undef vl_api_version + +typedef struct +{ + /* API message ID base */ + u16 msg_id_base; + u32 ping_id; + vat_main_t *vat_main; +} lacp_test_main_t; + +lacp_test_main_t lacp_test_main; + +/* + * Table of message reply handlers, must include boilerplate handlers + * we just generated + */ +#define foreach_vpe_api_reply_msg \ +_(SW_INTERFACE_LACP_DETAILS, sw_interface_lacp_details) + +/* lacp-dump API */ +static void vl_api_sw_interface_lacp_details_t_handler + (vl_api_sw_interface_lacp_details_t * mp) +{ + vat_main_t *vam = &vat_main; + + fformat (vam->ofp, + "%-25s %-12d %-16s %3x %3x %3x %3x %3x %3x %3x %3x " + "%4x %3x %3x %3x %3x %3x %3x %3x\n", + mp->interface_name, ntohl (mp->sw_if_index), + mp->bond_interface_name, + lacp_bit_test (mp->actor_state, 7), + lacp_bit_test (mp->actor_state, 6), + lacp_bit_test (mp->actor_state, 5), + lacp_bit_test (mp->actor_state, 4), + lacp_bit_test (mp->actor_state, 3), + lacp_bit_test (mp->actor_state, 2), + lacp_bit_test (mp->actor_state, 1), + lacp_bit_test (mp->actor_state, 0), + lacp_bit_test (mp->partner_state, 7), + lacp_bit_test (mp->partner_state, 6), + lacp_bit_test (mp->partner_state, 5), + lacp_bit_test (mp->partner_state, 4), + lacp_bit_test (mp->partner_state, 3), + lacp_bit_test (mp->partner_state, 2), + lacp_bit_test (mp->partner_state, 1), + lacp_bit_test (mp->partner_state, 0)); + fformat (vam->ofp, + " LAG ID: [(%04x,%02x-%02x-%02x-%02x-%02x-%02x,%04x,%04x,%04x), " + "(%04x,%02x-%02x-%02x-%02x-%02x-%02x,%04x,%04x,%04x)]\n", + ntohs (mp->actor_system_priority), mp->actor_system[0], + mp->actor_system[1], mp->actor_system[2], mp->actor_system[3], + mp->actor_system[4], mp->actor_system[5], ntohs (mp->actor_key), + ntohs (mp->actor_port_priority), ntohs (mp->actor_port_number), + ntohs (mp->partner_system_priority), mp->partner_system[0], + mp->partner_system[1], mp->partner_system[2], + mp->partner_system[3], mp->partner_system[4], + mp->partner_system[5], ntohs (mp->partner_key), + ntohs (mp->partner_port_priority), + ntohs (mp->partner_port_number)); + fformat (vam->ofp, + " RX-state: %U, TX-state: %U, MUX-state: %U, PTX-state: %U\n", + format_rx_sm_state, ntohl (mp->rx_state), format_tx_sm_state, + ntohl (mp->tx_state), format_mux_sm_state, ntohl (mp->mux_state), + format_ptx_sm_state, ntohl (mp->ptx_state)); +} + +static int +api_sw_interface_lacp_dump (vat_main_t * vam) +{ + lacp_test_main_t *lm = &lacp_test_main; + vl_api_sw_interface_lacp_dump_t *mp; + vl_api_control_ping_t *mp_ping; + int ret; + + if (vam->json_output) + { + clib_warning ("JSON output not supported for sw_interface_lacp_dump"); + return -99; + } + + fformat (vam->ofp, "%-55s %-32s %-32s\n", " ", "actor state", + "partner state"); + fformat (vam->ofp, "%-25s %-12s %-16s %-31s %-31s\n", "interface name", + "sw_if_index", "bond interface", "exp/def/dis/col/syn/agg/tim/act", + "exp/def/dis/col/syn/agg/tim/act"); + + /* Get list of lacp interfaces */ + M (SW_INTERFACE_LACP_DUMP, mp); + S (mp); + + /* Use a control ping for synchronization */ + mp_ping = vl_msg_api_alloc_as_if_client (sizeof (*mp_ping)); + mp_ping->_vl_msg_id = htons (lm->ping_id); + mp_ping->client_index = vam->my_client_index; + + fformat (vam->ofp, "Sending ping id=%d\n", lm->ping_id); + + vam->result_ready = 0; + S (mp_ping); + + W (ret); + return ret; +} + +/* + * List of messages that the api test plugin sends, + * and that the data plane plugin processes + */ +#define foreach_vpe_api_msg \ +_(sw_interface_lacp_dump, "") + +static void +lacp_vat_api_hookup (vat_main_t * vam) +{ + lacp_test_main_t *lm __attribute__ ((unused)) = &lacp_test_main; + /* Hook up handlers for replies from the data plane plug-in */ +#define _(N,n) \ + vl_msg_api_set_handlers((VL_API_##N + lm->msg_id_base), \ + #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_vpe_api_reply_msg; +#undef _ + + /* API messages we can send */ +#define _(n,h) \ + hash_set_mem (vam->function_by_name, #n, api_##n); + foreach_vpe_api_msg; +#undef _ + + /* Help strings */ +#define _(n,h) hash_set_mem (vam->help_by_name, #n, h); + foreach_vpe_api_msg; +#undef _ +} + +clib_error_t * +vat_plugin_register (vat_main_t * vam) +{ + lacp_test_main_t *lm = &lacp_test_main; + u8 *name; + + lm->vat_main = vam; + + /* Ask the vpp engine for the first assigned message-id */ + name = format (0, "lacp_%08x%c", api_version, 0); + lm->msg_id_base = vl_client_get_first_plugin_msg_id ((char *) name); + + /* Get the control ping ID */ +#define _(id,n,crc) \ + const char *id ## _CRC __attribute__ ((unused)) = #n "_" #crc; + foreach_vl_msg_name_crc_vpe; +#undef _ + lm->ping_id = vl_msg_api_get_msg_index ((u8 *) (VL_API_CONTROL_PING_CRC)); + + if (lm->msg_id_base != (u16) ~ 0) + lacp_vat_api_hookup (vam); + + vec_free (name); + + return 0; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/lacp/machine.h b/src/plugins/lacp/machine.h new file mode 100644 index 00000000000..0590b6cf101 --- /dev/null +++ b/src/plugins/lacp/machine.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LACP_MACHINE_H__ +#define __LACP_MACHINE_H__ + +#include <stdint.h> + +#define LACP_NOACTION ((int (*)(void *, void *))0) +#define LACP_ACTION_ROUTINE(rtn) ((int(*)(void *, void *))rtn) + +typedef int (*action_func) (void *, void *); + +typedef struct +{ + action_func action; + int next_state; +} lacp_fsm_state_t; + +typedef void (*debug_func) (slave_if_t * sif, int event, int state, + lacp_fsm_state_t * transition); + +typedef struct +{ + lacp_fsm_state_t *state_table; +} lacp_fsm_machine_t; + +typedef struct +{ + lacp_fsm_machine_t *tables; + debug_func debug; +} lacp_machine_t; + +extern int lacp_machine_dispatch (lacp_machine_t * machine, vlib_main_t * vm, + slave_if_t * sif, int event, int *state); + +#endif /* __LACP_MACHINE_H__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/lacp/mux_machine.c b/src/plugins/lacp/mux_machine.c new file mode 100644 index 00000000000..f33c2642f20 --- /dev/null +++ b/src/plugins/lacp/mux_machine.c @@ -0,0 +1,236 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define _GNU_SOURCE + +#include <vlib/vlib.h> +#include <vnet/bonding/node.h> +#include <lacp/node.h> + +/* + * LACP State = DETACHED + */ +static lacp_fsm_state_t lacp_mux_state_detached[] = { + {LACP_ACTION_DETACHED, LACP_MUX_STATE_DETACHED}, // event 0 BEGIN + {LACP_ACTION_WAITING, LACP_MUX_STATE_WAITING}, // event 1 SELECTED + {LACP_ACTION_WAITING, LACP_MUX_STATE_WAITING}, // event 2 STANDBY + {LACP_ACTION_DETACHED, LACP_MUX_STATE_DETACHED}, // event 3 UNSELECTED + {LACP_ACTION_DETACHED, LACP_MUX_STATE_DETACHED}, // event 4 READY + {LACP_ACTION_DETACHED, LACP_MUX_STATE_DETACHED}, // event 5 SYNC +}; + +/* + * LACP State = WAITING + */ +static lacp_fsm_state_t lacp_mux_state_waiting[] = { + {LACP_ACTION_DETACHED, LACP_MUX_STATE_DETACHED}, // event 0 BEGIN + {LACP_ACTION_WAITING, LACP_MUX_STATE_WAITING}, // event 1 SELECTED + {LACP_ACTION_WAITING, LACP_MUX_STATE_WAITING}, // event 2 STANDBY + {LACP_ACTION_DETACHED, LACP_MUX_STATE_DETACHED}, // event 3 UNSELECTED + {LACP_ACTION_ATTACHED, LACP_MUX_STATE_ATTACHED}, // event 4 READY + {LACP_ACTION_WAITING, LACP_MUX_STATE_WAITING}, // event 5 SYNC +}; + +/* + * LACP State = ATTACHED + */ +static lacp_fsm_state_t lacp_mux_state_attached[] = { + {LACP_ACTION_DETACHED, LACP_MUX_STATE_DETACHED}, // event 0 BEGIN + {LACP_ACTION_ATTACHED, LACP_MUX_STATE_ATTACHED}, // event 1 SELECTED + {LACP_ACTION_DETACHED, LACP_MUX_STATE_DETACHED}, // event 2 STANDBY + {LACP_ACTION_DETACHED, LACP_MUX_STATE_DETACHED}, // event 3 UNSELECTED + {LACP_ACTION_ATTACHED, LACP_MUX_STATE_ATTACHED}, // event 4 READY + {LACP_ACTION_COLLECTING_DISTRIBUTING, LACP_MUX_STATE_COLLECTING_DISTRIBUTING}, // event 5_SYNC +}; + +/* + * LACP State = COLLECTING_DISTRIBUTING + */ +static lacp_fsm_state_t lacp_mux_state_collecting_distributing[] = { + {LACP_ACTION_DETACHED, LACP_MUX_STATE_DETACHED}, // event 0 BEGIN + {LACP_ACTION_COLLECTING_DISTRIBUTING, LACP_MUX_STATE_COLLECTING_DISTRIBUTING}, // event 1 SELECTED + {LACP_ACTION_COLLECTING_DISTRIBUTING, LACP_MUX_STATE_COLLECTING_DISTRIBUTING}, // event 2 STANDBY + {LACP_ACTION_ATTACHED, LACP_MUX_STATE_ATTACHED}, // event 3 UNSELECTED + {LACP_ACTION_COLLECTING_DISTRIBUTING, LACP_MUX_STATE_COLLECTING_DISTRIBUTING}, // event 4 READY + {LACP_ACTION_COLLECTING_DISTRIBUTING, LACP_MUX_STATE_COLLECTING_DISTRIBUTING}, // event 5 SYNC +}; + +static lacp_fsm_machine_t lacp_mux_fsm_table[] = { + {lacp_mux_state_detached}, + {lacp_mux_state_waiting}, + {lacp_mux_state_attached}, + {lacp_mux_state_collecting_distributing}, +}; + +lacp_machine_t lacp_mux_machine = { + lacp_mux_fsm_table, + lacp_mux_debug_func, +}; + +static void +lacp_detach_mux_from_aggregator (vlib_main_t * vm, slave_if_t * sif) +{ + sif->actor.state &= ~LACP_STATE_SYNCHRONIZATION; + sif->ready = 0; + sif->ready_n = 0; +} + +static void +lacp_attach_mux_to_aggregator (vlib_main_t * vm, slave_if_t * sif) +{ + sif->actor.state |= LACP_STATE_SYNCHRONIZATION; +} + +int +lacp_mux_action_detached (void *p1, void *p2) +{ + vlib_main_t *vm = (vlib_main_t *) p1; + slave_if_t *sif = (slave_if_t *) p2; + + lacp_detach_mux_from_aggregator (vm, sif); + sif->actor.state &= ~LACP_STATE_COLLECTING; + bond_disable_collecting_distributing (vm, sif); + sif->actor.state &= ~LACP_STATE_DISTRIBUTING; + sif->ntt = 1; + lacp_machine_dispatch (&lacp_tx_machine, vm, sif, LACP_TX_EVENT_NTT, + &sif->tx_state); + + if (sif->selected == LACP_PORT_SELECTED) + lacp_machine_dispatch (&lacp_mux_machine, vm, sif, + LACP_MUX_EVENT_SELECTED, &sif->mux_state); + + if (sif->selected == LACP_PORT_STANDBY) + lacp_machine_dispatch (&lacp_mux_machine, vm, sif, LACP_MUX_EVENT_STANDBY, + &sif->mux_state); + + return 0; +} + +int +lacp_mux_action_attached (void *p1, void *p2) +{ + vlib_main_t *vm = (vlib_main_t *) p1; + slave_if_t *sif = (slave_if_t *) p2; + + lacp_attach_mux_to_aggregator (vm, sif); + sif->actor.state &= ~LACP_STATE_COLLECTING; + bond_disable_collecting_distributing (vm, sif); + sif->actor.state &= ~LACP_STATE_DISTRIBUTING; + sif->ntt = 1; + lacp_machine_dispatch (&lacp_tx_machine, vm, sif, LACP_TX_EVENT_NTT, + &sif->tx_state); + + if ((sif->selected == LACP_PORT_UNSELECTED) || + (sif->selected == LACP_PORT_STANDBY)) + lacp_machine_dispatch (&lacp_mux_machine, vm, sif, + LACP_MUX_EVENT_UNSELECTED, &sif->mux_state); + + if ((sif->selected == LACP_PORT_SELECTED) && + (sif->partner.state & LACP_STATE_SYNCHRONIZATION)) + lacp_machine_dispatch (&lacp_mux_machine, vm, sif, LACP_MUX_EVENT_SYNC, + &sif->mux_state); + return 0; +} + +int +lacp_mux_action_waiting (void *p1, void *p2) +{ + vlib_main_t *vm = (vlib_main_t *) p1; + slave_if_t *sif = (slave_if_t *) p2; + lacp_main_t *lm = &lacp_main; + + if (!lacp_timer_is_running (sif->wait_while_timer)) + lacp_start_wait_while_timer (lm->vlib_main, sif, + LACP_AGGREGATE_WAIT_TIME); + + if ((sif->selected == LACP_PORT_SELECTED) && sif->ready) + lacp_machine_dispatch (&lacp_mux_machine, vm, sif, + LACP_MUX_EVENT_READY, &sif->mux_state); + + if (sif->selected == LACP_PORT_UNSELECTED) + lacp_machine_dispatch (&lacp_mux_machine, vm, sif, + LACP_MUX_EVENT_UNSELECTED, &sif->mux_state); + + return 0; +} + +int +lacp_mux_action_collecting_distributing (void *p1, void *p2) +{ + vlib_main_t *vm = (vlib_main_t *) p1; + slave_if_t *sif = (slave_if_t *) p2; + + sif->actor.state |= LACP_STATE_SYNCHRONIZATION | LACP_STATE_COLLECTING | + LACP_STATE_DISTRIBUTING; + bond_enable_collecting_distributing (vm, sif); + sif->ntt = 1; + lacp_machine_dispatch (&lacp_tx_machine, vm, sif, LACP_TX_EVENT_NTT, + &sif->tx_state); + if ((sif->selected == LACP_PORT_UNSELECTED) || + (sif->selected == LACP_PORT_STANDBY) || + !(sif->partner.state & LACP_STATE_SYNCHRONIZATION)) + lacp_machine_dispatch (&lacp_mux_machine, vm, sif, + LACP_MUX_EVENT_UNSELECTED, &sif->mux_state); + + + return 0; +} + +static u8 * +format_mux_event (u8 * s, va_list * args) +{ + static lacp_event_struct lacp_mux_event_array[] = { +#define _(b, s, n) {.bit = b, .str = #s, }, + foreach_lacp_mux_event +#undef _ + {.str = NULL} + }; + int e = va_arg (*args, int); + lacp_event_struct *event_entry = + (lacp_event_struct *) & lacp_mux_event_array; + + if (e >= (sizeof (lacp_mux_event_array) / sizeof (*event_entry))) + s = format (s, "Bad event %d", e); + else + s = format (s, "%s", event_entry[e].str); + + return s; +} + +void +lacp_mux_debug_func (slave_if_t * sif, int event, int state, + lacp_fsm_state_t * transition) +{ + clib_warning ("%U-MUX: event %U, old state %U, new state %U", + format_vnet_sw_if_index_name, vnet_get_main (), + sif->sw_if_index, format_mux_event, + event, format_mux_sm_state, state, format_mux_sm_state, + transition->next_state); +} + +void +lacp_init_mux_machine (vlib_main_t * vm, slave_if_t * sif) +{ + lacp_machine_dispatch (&lacp_mux_machine, vm, sif, LACP_MUX_EVENT_BEGIN, + &sif->mux_state); +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/lacp/mux_machine.h b/src/plugins/lacp/mux_machine.h new file mode 100644 index 00000000000..48e9a0bed4e --- /dev/null +++ b/src/plugins/lacp/mux_machine.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LACP_MUX_MACHINE_H__ +#define __LACP_MUX_MACHINE_H__ + +#include <stdint.h> +#include <lacp/machine.h> + +#define foreach_lacp_mux_event \ + _(0, BEGIN, "begin") \ + _(1, SELECTED, "selected") \ + _(2, STANDBY, "standby") \ + _(3, UNSELECTED, "unselected") \ + _(4, READY, "ready") \ + _(5, SYNC, "sync") + +typedef enum +{ +#define _(a, b, c) LACP_MUX_EVENT_##b = (a), + foreach_lacp_mux_event +#undef _ +} lacp_mux_event_t; + +#define foreach_lacp_mux_sm_state \ + _(0, DETACHED, "detached") \ + _(1, WAITING, "waiting") \ + _(2, ATTACHED, "attached") \ + _(3, COLLECTING_DISTRIBUTING, "collecting distributing") + +typedef enum +{ +#define _(a, b, c) LACP_MUX_STATE_##b = (a), + foreach_lacp_mux_sm_state +#undef _ +} lacp_mux_sm_state_t; + +extern lacp_machine_t lacp_mux_machine; + +int lacp_mux_action_detached (void *p1, void *p2); +int lacp_mux_action_attached (void *p1, void *p2); +int lacp_mux_action_waiting (void *p1, void *p2); +int lacp_mux_action_collecting_distributing (void *p1, void *p2); +void lacp_mux_debug_func (slave_if_t * sif, int event, int state, + lacp_fsm_state_t * transition); + +#define LACP_ACTION_DETACHED LACP_ACTION_ROUTINE(lacp_mux_action_detached) +#define LACP_ACTION_ATTACHED LACP_ACTION_ROUTINE(lacp_mux_action_attached) +#define LACP_ACTION_WAITING LACP_ACTION_ROUTINE(lacp_mux_action_waiting) +#define LACP_ACTION_COLLECTING_DISTRIBUTING \ + LACP_ACTION_ROUTINE(lacp_mux_action_collecting_distributing) + +static inline void +lacp_start_wait_while_timer (vlib_main_t * vm, slave_if_t * sif, + u8 expiration) +{ + sif->wait_while_timer = vlib_time_now (vm) + expiration; +} + +#endif /* __LACP_MUX_MACHINE_H__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/lacp/node.c b/src/plugins/lacp/node.c new file mode 100644 index 00000000000..8eb78876461 --- /dev/null +++ b/src/plugins/lacp/node.c @@ -0,0 +1,209 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define _GNU_SOURCE +#include <vnet/bonding/node.h> +#include <vnet/ethernet/packet.h> +#include <lacp/node.h> + +lacp_state_struct lacp_state_array[] = { +#define _(b, s, n) {.bit = b, .str = #s, }, + foreach_lacp_state_flag +#undef _ + {.str = NULL} +}; + +static vlib_node_registration_t lacp_process_node; + +/** \file + + 2 x LACP graph nodes: an "interior" node to process + incoming announcements, and a "process" node to periodically + send announcements. + + The interior node is neither pipelined nor dual-looped, because + it would be very unusual to see more than one LACP packet in + a given input frame. So, it's a very simple / straighforward + example. +*/ + +/* + * packet counter strings + * Dump these counters via the "show error" CLI command + */ +static char *lacp_error_strings[] = { +#define _(sym,string) string, + foreach_lacp_error +#undef _ +}; + +/* + * We actually send all lacp pkts to the "error" node after scanning + * them, so the graph node has only one next-index. The "error-drop" + * node automatically bumps our per-node packet counters for us. + */ +typedef enum +{ + LACP_INPUT_NEXT_NORMAL, + LACP_INPUT_N_NEXT, +} lacp_next_t; + +/* + * Process a frame of lacp packets + * Expect 1 packet / frame + */ +static uword +lacp_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 n_left_from, *from; + lacp_input_trace_t *t0; + uword n_trace = vlib_get_trace_count (vm, node); + + from = vlib_frame_vector_args (frame); /* array of buffer indices */ + n_left_from = frame->n_vectors; /* number of buffer indices */ + + while (n_left_from > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0, error0; + + bi0 = from[0]; + b0 = vlib_get_buffer (vm, bi0); + + next0 = LACP_INPUT_NEXT_NORMAL; + + /* scan this lacp pkt. error0 is the counter index to bump */ + error0 = lacp_input (vm, b0, bi0); + b0->error = node->errors[error0]; + + /* If this pkt is traced, snapshoot the data */ + if (PREDICT_FALSE (n_trace > 0)) + { + int len; + vlib_trace_buffer (vm, node, next0, b0, + /* follow_chain */ 0); + vlib_set_trace_count (vm, node, --n_trace); + t0 = vlib_add_trace (vm, node, b0, sizeof (*t0)); + len = (b0->current_length < sizeof (t0->pkt)) + ? b0->current_length : sizeof (t0->pkt); + t0->len = len; + clib_memcpy (&t0->pkt, vlib_buffer_get_current (b0), len); + } + /* push this pkt to the next graph node, always error-drop */ + vlib_set_next_frame_buffer (vm, node, next0, bi0); + + from += 1; + n_left_from -= 1; + } + + return frame->n_vectors; +} + +/* + * lacp input graph node declaration + */ +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (lacp_input_node, static) = { + .function = lacp_node_fn, + .name = "lacp-input", + .vector_size = sizeof (u32), + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = LACP_N_ERROR, + .error_strings = lacp_error_strings, + + .format_trace = lacp_input_format_trace, + + .n_next_nodes = LACP_INPUT_N_NEXT, + .next_nodes = { + [LACP_INPUT_NEXT_NORMAL] = "error-drop", + }, +}; +/* *INDENT-ON* */ + +/* + * lacp periodic function + */ +static uword +lacp_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) +{ + lacp_main_t *lm = &lacp_main; + f64 poll_time_remaining; + uword event_type, *event_data = 0; + u8 enabled = 0; + + /* So we can send events to the lacp process */ + lm->lacp_process_node_index = lacp_process_node.index; + + ethernet_register_input_type (vm, ETHERNET_TYPE_SLOW_PROTOCOLS /* LACP */ , + lacp_input_node.index); + + poll_time_remaining = 0.2; + while (1) + { + if (enabled) + poll_time_remaining = + vlib_process_wait_for_event_or_clock (vm, poll_time_remaining); + else + vlib_process_wait_for_event (vm); + + event_type = vlib_process_get_events (vm, &event_data); + switch (event_type) + { + case ~0: /* no events => timeout */ + break; + case LACP_PROCESS_EVENT_START: + enabled = 1; + break; + case LACP_PROCESS_EVENT_STOP: + enabled = 0; + continue; + default: + clib_warning ("BUG: event type 0x%wx", event_type); + break; + } + if (event_data) + _vec_len (event_data) = 0; + + if (vlib_process_suspend_time_is_zero (poll_time_remaining)) + { + lacp_periodic (vm); + poll_time_remaining = 0.2; + } + } + + return 0; +} + +/* + * lacp periodic node declaration + */ +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (lacp_process_node, static) = { + .function = lacp_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "lacp-process", +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/lacp/node.h b/src/plugins/lacp/node.h new file mode 100644 index 00000000000..26cf7a36ccc --- /dev/null +++ b/src/plugins/lacp/node.h @@ -0,0 +1,276 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_lacp_node_h__ +#define __included_lacp_node_h__ + +#include <vlib/vlib.h> +#include <vlib/unix/unix.h> +#include <vppinfra/format.h> +#include <vppinfra/hash.h> +#include <lacp/protocol.h> +#include <lacp/rx_machine.h> +#include <lacp/tx_machine.h> +#include <lacp/mux_machine.h> +#include <lacp/ptx_machine.h> + +typedef enum +{ + LACP_PACKET_TEMPLATE_ETHERNET, + LACP_N_PACKET_TEMPLATES, +} lacp_packet_template_id_t; + +typedef enum +{ + MARKER_PACKET_TEMPLATE_ETHERNET, + MARKER_N_PACKET_TEMPLATES, +} marker_packet_template_id_t; + +enum +{ + LACP_PROCESS_EVENT_START = 1, + LACP_PROCESS_EVENT_STOP = 2, +} lacp_process_event_t; + +#define LACP_DBG(n, args...) \ + { \ + lacp_main_t *_lm = &lacp_main; \ + if (_lm->debug || n->debug) \ + clib_warning (args); \ + } + +#define LACP_DBG2(n, e, s, m, t) \ + { \ + lacp_main_t *_lm = &lacp_main; \ + if ((m)->debug && (_lm->debug || (n)->debug)) \ + (*m->debug)(n, e, s, t); \ + } + +/* Packet counters */ +#define foreach_lacp_error \ +_ (NONE, "good lacp packets -- consumed") \ +_ (CACHE_HIT, "good lacp packets -- cache hit") \ +_ (UNSUPPORTED, "unsupported slow protocol packets") \ +_ (TOO_SMALL, "bad lacp packets -- packet too small") \ +_ (BAD_TLV, "bad lacp packets -- bad TLV length") \ +_ (DISABLED, "lacp packets received on disabled interfaces") + +typedef enum +{ +#define _(sym,str) LACP_ERROR_##sym, + foreach_lacp_error +#undef _ + LACP_N_ERROR, +} lacp_error_t; + +/* lacp packet trace capture */ +typedef struct +{ + u32 len; + union + { + marker_pdu_t marker; + lacp_pdu_t lacpdu; + } pkt; +} lacp_input_trace_t; + +/** LACP interface details struct */ +typedef struct +{ + u32 sw_if_index; + u8 interface_name[64]; + u32 rx_state; + u32 tx_state; + u32 mux_state; + u32 ptx_state; + u8 bond_interface_name[64]; + u16 actor_system_priority; + u8 actor_system[6]; + u16 actor_key; + u16 actor_port_priority; + u16 actor_port_number; + u8 actor_state; + u16 partner_system_priority; + u8 partner_system[6]; + u16 partner_key; + u16 partner_port_priority; + u16 partner_port_number; + u8 partner_state; +} lacp_interface_details_t; + +typedef struct +{ + /** API message ID base */ + u16 msg_id_base; + + /* convenience variables */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; + + /* Background process node index */ + u32 lacp_process_node_index; + + /* Packet templates for different encap types */ + vlib_packet_template_t packet_templates[LACP_N_PACKET_TEMPLATES]; + + /* Packet templates for different encap types */ + vlib_packet_template_t marker_packet_templates[MARKER_N_PACKET_TEMPLATES]; + + /* LACP interface count */ + u32 lacp_int; + + /* debug is on or off */ + u8 debug; +} lacp_main_t; + +extern lacp_state_struct lacp_state_array[]; +extern lacp_main_t lacp_main; + +clib_error_t *lacp_plugin_api_hookup (vlib_main_t * vm); +int lacp_dump_ifs (lacp_interface_details_t ** out_bondids); +lacp_error_t lacp_input (vlib_main_t * vm, vlib_buffer_t * b0, u32 bi0); +void lacp_periodic (vlib_main_t * vm); +u8 *lacp_input_format_trace (u8 * s, va_list * args); +void lacp_init_neighbor (slave_if_t * sif, u8 * hw_address, + u16 port_number, u32 group); +void lacp_init_state_machines (vlib_main_t * vm, slave_if_t * sif); +void lacp_init_rx_machine (vlib_main_t * vm, slave_if_t * sif); +void lacp_init_tx_machine (vlib_main_t * vm, slave_if_t * sif); +void lacp_init_ptx_machine (vlib_main_t * vm, slave_if_t * sif); +void lacp_init_mux_machine (vlib_main_t * vm, slave_if_t * sif); +void lacp_selection_logic (vlib_main_t * vm, slave_if_t * sif); +void lacp_send_lacp_pdu (vlib_main_t * vm, slave_if_t * sif); + +static inline void +lacp_stop_timer (f64 * timer) +{ + *timer = 0.0; +} + +static inline u8 +lacp_timer_is_running (f64 timer) +{ + return (timer != 0.0); +} + +static inline u8 +lacp_timer_is_expired (vlib_main_t * vm, f64 timer) +{ + f64 now = vlib_time_now (vm); + + return (now >= timer); +} + +static inline u8 * +format_rx_sm_state (u8 * s, va_list * args) +{ + lacp_state_struct lacp_rx_sm_state_array[] = { +#define _(b, s, n) {.bit = b, .str = #s, }, + foreach_lacp_rx_sm_state +#undef _ + {.str = NULL} + }; + int state = va_arg (*args, int); + lacp_state_struct *state_entry = + (lacp_state_struct *) & lacp_rx_sm_state_array; + + if (state >= (sizeof (lacp_rx_sm_state_array) / sizeof (*state_entry))) + s = format (s, "Bad state %d", state); + else + s = format (s, "%s", state_entry[state].str); + + return s; +} + +static inline u8 * +format_tx_sm_state (u8 * s, va_list * args) +{ + lacp_state_struct lacp_tx_sm_state_array[] = { +#define _(b, s, n) {.bit = b, .str = #s, }, + foreach_lacp_tx_sm_state +#undef _ + {.str = NULL} + }; + int state = va_arg (*args, int); + lacp_state_struct *state_entry = + (lacp_state_struct *) & lacp_tx_sm_state_array; + + if (state >= (sizeof (lacp_tx_sm_state_array) / sizeof (*state_entry))) + s = format (s, "Bad state %d", state); + else + s = format (s, "%s", state_entry[state].str); + + return s; +} + +static inline u8 * +format_mux_sm_state (u8 * s, va_list * args) +{ + lacp_state_struct lacp_mux_sm_state_array[] = { +#define _(b, s, n) {.bit = b, .str = #s, }, + foreach_lacp_mux_sm_state +#undef _ + {.str = NULL} + }; + int state = va_arg (*args, int); + lacp_state_struct *state_entry = + (lacp_state_struct *) & lacp_mux_sm_state_array; + + if (state >= (sizeof (lacp_mux_sm_state_array) / sizeof (*state_entry))) + s = format (s, "Bad state %d", state); + else + s = format (s, "%s", state_entry[state].str); + + return s; +} + +static inline u8 * +format_ptx_sm_state (u8 * s, va_list * args) +{ + lacp_state_struct lacp_ptx_sm_state_array[] = { +#define _(b, s, n) {.bit = b, .str = #s, }, + foreach_lacp_ptx_sm_state +#undef _ + {.str = NULL} + }; + int state = va_arg (*args, int); + lacp_state_struct *state_entry = + (lacp_state_struct *) & lacp_ptx_sm_state_array; + + if (state >= (sizeof (lacp_ptx_sm_state_array) / sizeof (*state_entry))) + s = format (s, "Bad state %d", state); + else + s = format (s, "%s", state_entry[state].str); + + return s; +} + +static inline int +lacp_bit_test (u8 val, u8 bit) +{ + if (val & (1 << bit)) + return 1; + else + return 0; +} + +#endif /* __included_lacp_node_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/lacp/protocol.h b/src/plugins/lacp/protocol.h new file mode 100644 index 00000000000..05a3f04a227 --- /dev/null +++ b/src/plugins/lacp/protocol.h @@ -0,0 +1,178 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __included_lacp_protocol_h__ +#define __included_lacp_protocol_h__ + +#include <vnet/ethernet/ethernet.h> +#include <vnet/bonding/node.h> + +#define LACP_CHURN_DETECTION_TIME 60 +#define LACP_AGGREGATE_WAIT_TIME 2.0 + +#define LACP_SUBTYPE 1 +#define LACP_ACTOR_LACP_VERSION 1 + +#define foreach_lacp_tlv \ + _ (TERMINATOR_INFORMATION, 0) \ + _ (ACTOR_INFORMATION, 1) \ + _ (PARTNER_INFORMATION , 2) \ + _ (COLLECTOR_INFORMATION, 3) + +typedef enum +{ +#define _(f,n) LACP_##f = (n), + foreach_lacp_tlv +#undef _ +} lacp_tlv_t; + +#define foreach_lacp_port \ + _ (UNSELECTED, 0) \ + _ (SELECTED, 1) \ + _ (STANDBY, 2) + +typedef enum +{ +#define _(f,n) LACP_PORT_##f = (n), + foreach_lacp_port +#undef _ +} lacp_port_t; + +/* Port state */ +#define foreach_lacp_state \ + _(0, LACP_ACTIVITY, "activity") \ + _(1, LACP_TIMEOUT, "lacp timeout") \ + _(2, AGGREGATION, "aggregation") \ + _(3, SYNCHRONIZATION, "synchronization") \ + _(4, COLLECTING, "collecting") \ + _(5, DISTRIBUTING, "distributing") \ + _(6, DEFAULTED, "defaulted") \ + _(7, EXPIRED, "expired") + +typedef enum +{ +#define _(a, b, c) LACP_STATE_##b = (1 << a), + foreach_lacp_state +#undef _ +} lacp_state_t; + +#define foreach_lacp_state_flag \ + _(0, LACP_STATE_LACP_ACTIViTY, "activity") \ + _(1, LACP_STATE_LACP_TIMEOUT, "lacp timeout") \ + _(2, LACP_STATE_AGGREGATION, "aggregation") \ + _(3, LACP_STATE_SYNCHRONIZATION, "synchronization") \ + _(4, LACP_STATE_COLLECTIING, "collecting") \ + _(5, LACP_STATE_DISTRIBUTING, "distributing") \ + _(6, LACP_STATE_DEFAULTED, "defaulted") \ + _(7, LACP_STATE_EXPIRED, "expired") + +typedef struct +{ + u8 bit; + char *str; +} lacp_state_struct; + +typedef struct +{ + u8 bit; + char *str; +} lacp_event_struct; + +#define LACP_MAX_TX_IN_SECOND 3 +#define LACP_DEFAULT_PORT_PRIORITY 0x00ff +#define LACP_DEFAULT_SYSTEM_PRIORITY 0xffff + +typedef CLIB_PACKED (struct + { + u8 tlv_type; + u8 tlv_length; + lacp_port_info_t port_info; u8 reserved[3]; + }) lacp_actor_partner_t; + +typedef CLIB_PACKED (struct + { + u8 tlv_type; u8 tlv_length; u16 max_delay; + u8 reserved[12]; + }) lacp_collector_t; + +typedef CLIB_PACKED (struct + { + u8 tlv_type; u8 tlv_length; + u8 pad[50]; + }) lacp_terminator_t; + +typedef CLIB_PACKED (struct + { + u8 subtype; u8 version_number; + lacp_actor_partner_t actor; lacp_actor_partner_t partner; + lacp_collector_t collector; lacp_terminator_t terminator; + }) lacp_pdu_t; + +typedef CLIB_PACKED (struct + { + ethernet_header_t ethernet; lacp_pdu_t lacp; + }) ethernet_lacp_pdu_t; + +#define MARKER_SUBTYPE 2 +#define MARKER_PROTOCOL_VERSION 1 + +#define foreach_marker_tlv \ + _ (TERMINATOR_INFORMATION, 0) \ + _ (INFORMATION, 1) \ + _ (RESPONSE_INFORMATION , 2) + +typedef enum +{ +#define _(f,n) MARKER_##f = (n), + foreach_marker_tlv +#undef _ +} marker_tlv_t; + +typedef CLIB_PACKED (struct + { + u8 tlv_type; u8 tlv_length; + u8 reserved[90]; + }) marker_terminator_t; + +typedef CLIB_PACKED (struct + { + u8 tlv_type; + u8 tlv_length; + u16 requester_port; u8 requester_system[6]; + u32 requester_transaction_id; u8 pad[2]; + }) marker_information_t; + +typedef CLIB_PACKED (struct + { + u8 subtype; + u8 version_number; + marker_information_t marker_info; + marker_terminator_t terminator; + }) marker_pdu_t; + +typedef CLIB_PACKED (struct + { + ethernet_header_t ethernet; marker_pdu_t marker; + }) ethernet_marker_pdu_t; + +#endif /* __included_lacp_protocol_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/lacp/ptx_machine.c b/src/plugins/lacp/ptx_machine.c new file mode 100644 index 00000000000..ac83444b08a --- /dev/null +++ b/src/plugins/lacp/ptx_machine.c @@ -0,0 +1,202 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define _GNU_SOURCE + +#include <vnet/bonding/node.h> +#include <lacp/node.h> + +/* + * LACP State = NO_PERIODIC + */ +static lacp_fsm_state_t lacp_ptx_state_no_periodic[] = { + {LACP_ACTION_NO_PERIODIC, LACP_PTX_STATE_FAST_PERIODIC}, // event 0 BEGIN + {LACP_NOACTION, LACP_PTX_STATE_NO_PERIODIC}, // event 1 LONG_TIMEOUT + {LACP_NOACTION, LACP_PTX_STATE_NO_PERIODIC}, // event 2 TIMER_EXPIRED + {LACP_NOACTION, LACP_PTX_STATE_NO_PERIODIC}, // event 3 SHORT_TIMEOUT +}; + +/* + * LACP State = FAST_PERIODIC + */ +static lacp_fsm_state_t lacp_ptx_state_fast_periodic[] = { + {LACP_ACTION_FAST_PERIODIC, LACP_PTX_STATE_FAST_PERIODIC}, // event 0 BEGIN + {LACP_ACTION_SLOW_PERIODIC, LACP_PTX_STATE_SLOW_PERIODIC}, // event 1 LONG_TIMEOUT + {LACP_ACTION_TIMER_EXPIRED, LACP_PTX_STATE_PERIODIC_TX}, // event 2 TIMER_EXPIRED + {LACP_ACTION_FAST_PERIODIC, LACP_PTX_STATE_FAST_PERIODIC}, // event 3 SHORT_TIMEOUT +}; + +/* + * LACP State = SLOW_PERIODIC + */ +static lacp_fsm_state_t lacp_ptx_state_slow_periodic[] = { + {LACP_ACTION_NO_PERIODIC, LACP_PTX_STATE_NO_PERIODIC}, // event 0 BEGIN + {LACP_ACTION_SLOW_PERIODIC, LACP_PTX_STATE_SLOW_PERIODIC}, // event 1 LONG_TIMEOUT + {LACP_ACTION_TIMER_EXPIRED, LACP_PTX_STATE_PERIODIC_TX}, // event 2 TIMER_EXPIRED + {LACP_ACTION_FAST_PERIODIC, LACP_PTX_STATE_FAST_PERIODIC}, // event 3 SHORT_TIMEOUT +}; + +/* + * LACP State = PERIODIC_TX + */ +static lacp_fsm_state_t lacp_ptx_state_periodic_tx[] = { + {LACP_ACTION_NO_PERIODIC, LACP_PTX_STATE_NO_PERIODIC}, // event 0 BEGIN + {LACP_NOACTION, LACP_PTX_STATE_PERIODIC_TX}, // event 1 LONG_TIMEOUT + {LACP_ACTION_TIMER_EXPIRED, LACP_PTX_STATE_PERIODIC_TX}, // event 2 TIMER_EXPIRED + {LACP_NOACTION, LACP_PTX_STATE_PERIODIC_TX}, // event 3 SHORT_TIMEOUT +}; + + +static lacp_fsm_machine_t lacp_ptx_fsm_table[] = { + {lacp_ptx_state_no_periodic}, + {lacp_ptx_state_fast_periodic}, + {lacp_ptx_state_slow_periodic}, + {lacp_ptx_state_periodic_tx}, +}; + +lacp_machine_t lacp_ptx_machine = { + lacp_ptx_fsm_table, + lacp_ptx_debug_func, +}; + +int +lacp_ptx_action_no_periodic (void *p1, void *p2) +{ + vlib_main_t *vm = (vlib_main_t *) p1; + slave_if_t *sif = (slave_if_t *) p2; + + lacp_stop_timer (&sif->periodic_timer); + + lacp_machine_dispatch (&lacp_ptx_machine, vm, sif, + LACP_PTX_EVENT_BEGIN, &sif->ptx_state); + + return 0; +} + +int +lacp_ptx_action_slow_periodic (void *p1, void *p2) +{ + vlib_main_t *vm = (vlib_main_t *) p1; + slave_if_t *sif = (slave_if_t *) p2; + u8 timer_expired; + lacp_main_t *lm = &lacp_main; + + if (lacp_timer_is_running (sif->periodic_timer) && + lacp_timer_is_expired (lm->vlib_main, sif->periodic_timer)) + timer_expired = 1; + else + timer_expired = 0; + + lacp_start_periodic_timer (lm->vlib_main, sif, LACP_SLOW_PERIODIC_TIMER); + + if (timer_expired || (sif->partner.state & LACP_STATE_LACP_TIMEOUT)) + lacp_machine_dispatch (&lacp_ptx_machine, vm, sif, + LACP_PTX_EVENT_TIMER_EXPIRED, &sif->ptx_state); + + return 0; +} + +int +lacp_ptx_action_fast_periodic (void *p1, void *p2) +{ + vlib_main_t *vm = (vlib_main_t *) p1; + slave_if_t *sif = (slave_if_t *) p2; + u8 timer_expired; + lacp_main_t *lm = &lacp_main; + + if (lacp_timer_is_running (sif->periodic_timer) && + lacp_timer_is_expired (lm->vlib_main, sif->periodic_timer)) + timer_expired = 1; + else + timer_expired = 0; + + lacp_start_periodic_timer (lm->vlib_main, sif, LACP_FAST_PERIODIC_TIMER); + + if (timer_expired) + lacp_machine_dispatch (&lacp_ptx_machine, vm, sif, + LACP_PTX_EVENT_TIMER_EXPIRED, &sif->ptx_state); + + if (!(sif->partner.state & LACP_STATE_LACP_TIMEOUT)) + lacp_machine_dispatch (&lacp_ptx_machine, vm, sif, + LACP_PTX_EVENT_LONG_TIMEOUT, &sif->ptx_state); + + return 0; +} + +int +lacp_ptx_action_timer_expired (void *p1, void *p2) +{ + vlib_main_t *vm = (vlib_main_t *) p1; + slave_if_t *sif = (slave_if_t *) p2; + + sif->ntt = 1; + lacp_machine_dispatch (&lacp_tx_machine, vm, sif, LACP_TX_EVENT_NTT, + &sif->tx_state); + if (sif->partner.state & LACP_STATE_LACP_TIMEOUT) + lacp_machine_dispatch (&lacp_ptx_machine, vm, sif, + LACP_PTX_EVENT_SHORT_TIMEOUT, &sif->ptx_state); + else + lacp_machine_dispatch (&lacp_ptx_machine, vm, sif, + LACP_PTX_EVENT_LONG_TIMEOUT, &sif->ptx_state); + + return 0; +} + +static u8 * +format_ptx_event (u8 * s, va_list * args) +{ + static lacp_event_struct lacp_ptx_event_array[] = { +#define _(b, s, n) {.bit = b, .str = #s, }, + foreach_lacp_ptx_event +#undef _ + {.str = NULL} + }; + int e = va_arg (*args, int); + lacp_event_struct *event_entry = + (lacp_event_struct *) & lacp_ptx_event_array; + + if (e >= (sizeof (lacp_ptx_event_array) / sizeof (*event_entry))) + s = format (s, "Bad event %d", e); + else + s = format (s, "%s", event_entry[e].str); + + return s; +} + +void +lacp_ptx_debug_func (slave_if_t * sif, int event, int state, + lacp_fsm_state_t * transition) +{ + clib_warning ("%U-PTX: event %U, old state %U, new state %U", + format_vnet_sw_if_index_name, vnet_get_main (), + sif->sw_if_index, format_ptx_event, + event, format_ptx_sm_state, state, format_ptx_sm_state, + transition->next_state); +} + +void +lacp_init_ptx_machine (vlib_main_t * vm, slave_if_t * sif) +{ + lacp_machine_dispatch (&lacp_ptx_machine, vm, sif, LACP_PTX_EVENT_BEGIN, + &sif->ptx_state); +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/lacp/ptx_machine.h b/src/plugins/lacp/ptx_machine.h new file mode 100644 index 00000000000..a9af4bb89d3 --- /dev/null +++ b/src/plugins/lacp/ptx_machine.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ___LACP_PTX_MACHINE_H__ +#define ___LACP_PTX_MACHINE_H__ + +#include <stdint.h> +#include <lacp/machine.h> + +#define foreach_lacp_ptx_event \ + _(0, BEGIN, "begin") \ + _(1, LONG_TIMEOUT, "long tiemout") \ + _(2, TIMER_EXPIRED, "timer expired") \ + _(3, SHORT_TIMEOUT, "short timeout") + +typedef enum +{ +#define _(a, b, c) LACP_PTX_EVENT_##b = (a), + foreach_lacp_ptx_event +#undef _ +} lacp_ptx_event_t; + +#define foreach_lacp_ptx_sm_state \ + _(0, NO_PERIODIC, "no periodic") \ + _(1, FAST_PERIODIC, "fast periodic") \ + _(2, SLOW_PERIODIC, "slow periodic") \ + _(3, PERIODIC_TX, "periodic transmission") + +typedef enum +{ +#define _(a, b, c) LACP_PTX_STATE_##b = (a), + foreach_lacp_ptx_sm_state +#undef _ +} lacp_ptx_sm_state_t; + +extern lacp_machine_t lacp_ptx_machine; + +int lacp_ptx_action_no_periodic (void *p1, void *p2); +int lacp_ptx_action_slow_periodic (void *p1, void *p2); +int lacp_ptx_action_fast_periodic (void *p1, void *p2); +int lacp_ptx_action_timer_expired (void *p1, void *p2); +void lacp_ptx_debug_func (slave_if_t * sif, int event, int state, + lacp_fsm_state_t * transition); + +#define LACP_ACTION_NO_PERIODIC \ + LACP_ACTION_ROUTINE(lacp_ptx_action_no_periodic) +#define LACP_ACTION_SLOW_PERIODIC \ + LACP_ACTION_ROUTINE(lacp_ptx_action_slow_periodic) +#define LACP_ACTION_FAST_PERIODIC \ + LACP_ACTION_ROUTINE(lacp_ptx_action_fast_periodic) +#define LACP_ACTION_TIMER_EXPIRED \ + LACP_ACTION_ROUTINE(lacp_ptx_action_timer_expired) + +static inline void +lacp_start_periodic_timer (vlib_main_t * vm, slave_if_t * sif, u8 expiration) +{ + sif->periodic_timer = vlib_time_now (vm) + expiration; +} + +#endif /* __LACP_PTX_MACHINE_H__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/lacp/rx_machine.c b/src/plugins/lacp/rx_machine.c new file mode 100644 index 00000000000..374e3f84939 --- /dev/null +++ b/src/plugins/lacp/rx_machine.c @@ -0,0 +1,425 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define _GNU_SOURCE + +#include <vnet/bonding/node.h> +#include <lacp/node.h> + +/* + * LACP State = INITIALIZE + */ +static lacp_fsm_state_t lacp_rx_state_initialize[] = { + {LACP_ACTION_INITIALIZE, LACP_RX_STATE_PORT_DISABLED}, // event 0 BEGIN + {LACP_ACTION_INITIALIZE, LACP_RX_STATE_PORT_DISABLED}, // event 1 PORT_DISABLED + {LACP_ACTION_INITIALIZE, LACP_RX_STATE_PORT_DISABLED}, // event 2 PORT_MOVED + {LACP_NOACTION, LACP_RX_STATE_INITIALIZE}, // event 3 LACP_ENABLED + {LACP_NOACTION, LACP_RX_STATE_INITIALIZE}, // event 4 LACP_DISABLED + {LACP_NOACTION, LACP_RX_STATE_INITIALIZE}, // event 5 PDU_RECEIVED + {LACP_NOACTION, LACP_RX_STATE_INITIALIZE}, // event 6 TIMER_EXPIRED +}; + +/* + * LACP State = PORT_DISABLED + */ +static lacp_fsm_state_t lacp_rx_state_port_disabled[] = { + {LACP_ACTION_PORT_DISABLED, LACP_RX_STATE_PORT_DISABLED}, // event 0 BEGIN + {LACP_ACTION_PORT_DISABLED, LACP_RX_STATE_PORT_DISABLED}, // event 1 PORT_DISABLED + {LACP_ACTION_INITIALIZE, LACP_RX_STATE_INITIALIZE}, // event 2 PORT_MOVED + {LACP_ACTION_EXPIRED, LACP_RX_STATE_EXPIRED}, // event 3 LACP_ENABLED + {LACP_ACTION_LACP_DISABLED, LACP_RX_STATE_LACP_DISABLED}, // event 4 LACP_DISABLED + {LACP_NOACTION, LACP_RX_STATE_PORT_DISABLED}, // event 5 PDU_RECEIVED + {LACP_NOACTION, LACP_RX_STATE_PORT_DISABLED}, // event 6 TIMER_EXPIRED +}; + +/* + * LACP State = EXPIRED + */ +static lacp_fsm_state_t lacp_rx_state_expired[] = { + {LACP_ACTION_INITIALIZE, LACP_RX_STATE_INITIALIZE}, // event 0 BEGIN + {LACP_NOACTION, LACP_RX_STATE_EXPIRED}, // event 1 PORT_DISABLED + {LACP_NOACTION, LACP_RX_STATE_EXPIRED}, // event 2 PORT_MOVED + {LACP_NOACTION, LACP_RX_STATE_EXPIRED}, // event 3 LACP_ENABLED + {LACP_NOACTION, LACP_RX_STATE_EXPIRED}, // event 4 LACP_DISABLED + {LACP_ACTION_CURRENT, LACP_RX_STATE_CURRENT}, // event 5 PDU_RECEIVED + {LACP_ACTION_DEFAULTED, LACP_RX_STATE_DEFAULTED}, // event 6 TIMER_EXPIRED +}; + +/* + * LACP State = LACP_DISABLED + */ +static lacp_fsm_state_t lacp_rx_state_lacp_disabled[] = { + {LACP_ACTION_INITIALIZE, LACP_RX_STATE_INITIALIZE}, // event 0 BEGIN + {LACP_NOACTION, LACP_RX_STATE_LACP_DISABLED}, // event 1 PORT_DISABLED + {LACP_NOACTION, LACP_RX_STATE_LACP_DISABLED}, // event 2 PORT_MOVED + {LACP_ACTION_EXPIRED, LACP_RX_STATE_EXPIRED}, // event 3 LACP_ENABLED XXX + {LACP_ACTION_LACP_DISABLED, LACP_RX_STATE_LACP_DISABLED}, // event 4 LACP_DISABLED + {LACP_NOACTION, LACP_RX_STATE_LACP_DISABLED}, // event 5 PDU_RECEIVED + {LACP_NOACTION, LACP_RX_STATE_LACP_DISABLED}, // event 6 TIMER_EXPIRED +}; + +/* + * LACP State = DEFAULTED + */ +static lacp_fsm_state_t lacp_rx_state_defaulted[] = { + {LACP_ACTION_INITIALIZE, LACP_RX_STATE_INITIALIZE}, // event 0 BEGIN + {LACP_NOACTION, LACP_RX_STATE_DEFAULTED}, // event 1 PORT_DISABLED + {LACP_NOACTION, LACP_RX_STATE_DEFAULTED}, // event 2 PORT_MOVED + {LACP_NOACTION, LACP_RX_STATE_DEFAULTED}, // event 3 LACP_ENABLED + {LACP_ACTION_LACP_DISABLED, LACP_RX_STATE_LACP_DISABLED}, // event 4 LACP_DISABLED + {LACP_ACTION_CURRENT, LACP_RX_STATE_CURRENT}, // event 5 PDU_RECEIVED + {LACP_ACTION_DEFAULTED, LACP_RX_STATE_DEFAULTED}, // event 6 TIMER_EXPIRED +}; + +/* + * LACP State = CURRENT + */ +static lacp_fsm_state_t lacp_rx_state_current[] = { + {LACP_ACTION_INITIALIZE, LACP_RX_STATE_INITIALIZE}, // event 0 BEGIN + {LACP_NOACTION, LACP_RX_STATE_CURRENT}, // event 1 PORT_DISABLED + {LACP_NOACTION, LACP_RX_STATE_CURRENT}, // event 1 PORT_MOVED + {LACP_NOACTION, LACP_RX_STATE_CURRENT}, // event 2 LACP_ENABLED + {LACP_ACTION_LACP_DISABLED, LACP_RX_STATE_LACP_DISABLED}, // event 3 LACP_DISABLED + {LACP_ACTION_CURRENT, LACP_RX_STATE_CURRENT}, // event 4 PDU_RECEIVED + {LACP_ACTION_EXPIRED, LACP_RX_STATE_EXPIRED}, // event 5 TIMER_EXPIRED +}; + +static lacp_fsm_machine_t lacp_rx_fsm_table[] = { + {lacp_rx_state_initialize}, + {lacp_rx_state_port_disabled}, + {lacp_rx_state_expired}, + {lacp_rx_state_lacp_disabled}, + {lacp_rx_state_defaulted}, + {lacp_rx_state_current}, +}; + +lacp_machine_t lacp_rx_machine = { + lacp_rx_fsm_table, + lacp_rx_debug_func, +}; + +static void +lacp_set_port_unselected (vlib_main_t * vm, slave_if_t * sif) +{ + sif->selected = LACP_PORT_UNSELECTED; + + switch (sif->mux_state) + { + case LACP_MUX_STATE_DETACHED: + break; + case LACP_MUX_STATE_WAITING: + break; + case LACP_MUX_STATE_ATTACHED: + return; + break; + case LACP_MUX_STATE_COLLECTING_DISTRIBUTING: + if (sif->partner.state & LACP_STATE_SYNCHRONIZATION) + return; + break; + default: + break; + } + lacp_machine_dispatch (&lacp_mux_machine, vm, sif, + LACP_MUX_EVENT_UNSELECTED, &sif->mux_state); +} + +static void +lacp_update_default_selected (vlib_main_t * vm, slave_if_t * sif) +{ + if ((sif->partner_admin.state & LACP_STATE_AGGREGATION) != + (sif->partner.state & LACP_STATE_AGGREGATION) || + memcmp (&sif->partner, &sif->partner_admin, + sizeof (sif->partner) - sizeof (sif->partner.state))) + { + lacp_set_port_unselected (vm, sif); + } +} + +static void +lacp_record_default (slave_if_t * sif) +{ + sif->partner = sif->partner_admin; + sif->actor.state |= LACP_STATE_DEFAULTED; +} + +static void +lacp_update_selected (vlib_main_t * vm, slave_if_t * sif) +{ + lacp_pdu_t *lacpdu = (lacp_pdu_t *) sif->last_rx_pkt; + + if ((lacpdu->actor.port_info.state & LACP_STATE_AGGREGATION) != + (sif->partner.state & LACP_STATE_AGGREGATION) || + memcmp (&sif->partner, &lacpdu->actor.port_info, + sizeof (sif->partner) - sizeof (sif->partner.state))) + { + lacp_set_port_unselected (vm, sif); + } +} + +static void +lacp_update_ntt (vlib_main_t * vm, slave_if_t * sif) +{ + lacp_pdu_t *lacpdu = (lacp_pdu_t *) sif->last_rx_pkt; + u8 states = LACP_STATE_LACP_ACTIVITY | LACP_STATE_LACP_TIMEOUT | + LACP_STATE_SYNCHRONIZATION | LACP_STATE_AGGREGATION; + + if ((states & lacpdu->partner.port_info.state) != + (states & sif->actor.state) + || memcmp (&sif->actor, &lacpdu->partner.port_info, + sizeof (sif->actor) - sizeof (sif->actor.state))) + { + sif->ntt = 1; + lacp_machine_dispatch (&lacp_tx_machine, vm, sif, LACP_TX_EVENT_NTT, + &sif->tx_state); + } +} + +/* + * compare lacpdu partner info against sif->partner. Return 1 if they match, 0 + * otherwise. + */ +static u8 +lacp_compare_partner (slave_if_t * sif) +{ + lacp_pdu_t *lacpdu = (lacp_pdu_t *) sif->last_rx_pkt; + + if ((!memcmp (&sif->partner, &lacpdu->actor.port_info, + sizeof (sif->partner) - sizeof (sif->partner.state)) && + ((sif->actor.state & LACP_STATE_AGGREGATION) == + (lacpdu->partner.port_info.state & LACP_STATE_AGGREGATION))) || + ((lacpdu->actor.port_info.state & LACP_STATE_AGGREGATION) == 0)) + return 1; + + return 0; +} + +static void +lacp_record_pdu (slave_if_t * sif) +{ + lacp_pdu_t *lacpdu = (lacp_pdu_t *) sif->last_rx_pkt; + u8 match; + + match = lacp_compare_partner (sif); + sif->partner = lacpdu->actor.port_info; + sif->actor.state &= ~LACP_STATE_DEFAULTED; + if (match && (lacpdu->actor.port_info.state & LACP_STATE_SYNCHRONIZATION)) + sif->partner.state |= LACP_STATE_SYNCHRONIZATION; + else + sif->partner.state &= ~LACP_STATE_SYNCHRONIZATION; +} + +static void +lacp_set_port_moved (vlib_main_t * vm, slave_if_t * sif, u8 val) +{ + sif->port_moved = val; + + if (sif->port_moved) + lacp_machine_dispatch (&lacp_rx_machine, vm, sif, + LACP_RX_EVENT_PORT_MOVED, &sif->rx_state); + else if (!sif->port_enabled) + lacp_machine_dispatch (&lacp_rx_machine, vm, sif, + LACP_RX_EVENT_PORT_DISABLED, &sif->rx_state); +} + +int +lacp_rx_action_initialize (void *p1, void *p2) +{ + vlib_main_t *vm = (vlib_main_t *) p1; + slave_if_t *sif = (slave_if_t *) p2; + + lacp_set_port_unselected (vm, sif); + lacp_record_default (sif); + sif->actor.state &= ~LACP_STATE_EXPIRED; + lacp_set_port_moved (vm, sif, 0); + /* UCT */ + lacp_machine_dispatch (&lacp_rx_machine, vm, sif, + LACP_RX_EVENT_BEGIN, &sif->rx_state); + + return 0; +} + +int +lacp_rx_action_port_disabled (void *p1, void *p2) +{ + vlib_main_t *vm = (vlib_main_t *) p1; + slave_if_t *sif = (slave_if_t *) p2; + + sif->partner.state &= ~LACP_STATE_SYNCHRONIZATION; + if (sif->port_moved) + { + lacp_machine_dispatch (&lacp_rx_machine, vm, sif, + LACP_RX_EVENT_PORT_MOVED, &sif->rx_state); + } + if (sif->port_enabled) + { + if (sif->lacp_enabled) + lacp_machine_dispatch (&lacp_rx_machine, vm, sif, + LACP_RX_EVENT_LACP_ENABLED, &sif->rx_state); + else + lacp_machine_dispatch (&lacp_rx_machine, vm, sif, + LACP_RX_EVENT_LACP_DISABLED, &sif->rx_state); + } + + return 0; +} + +int +lacp_rx_action_expired (void *p1, void *p2) +{ + vlib_main_t *vm = (vlib_main_t *) p1; + slave_if_t *sif = (slave_if_t *) p2; + u8 timer_expired; + lacp_main_t *lm = &lacp_main; + + sif->partner.state &= ~LACP_STATE_SYNCHRONIZATION; + sif->partner.state |= LACP_STATE_LACP_TIMEOUT; + lacp_machine_dispatch (&lacp_ptx_machine, vm, sif, + LACP_PTX_EVENT_SHORT_TIMEOUT, &sif->ptx_state); + if (lacp_timer_is_running (sif->current_while_timer) && + lacp_timer_is_expired (lm->vlib_main, sif->current_while_timer)) + timer_expired = 1; + else + timer_expired = 0; + lacp_start_current_while_timer (lm->vlib_main, sif, LACP_SHORT_TIMOUT_TIME); + sif->actor.state |= LACP_STATE_EXPIRED; + if (timer_expired) + lacp_machine_dispatch (&lacp_rx_machine, vm, sif, + LACP_RX_EVENT_TIMER_EXPIRED, &sif->rx_state); + if (sif->last_rx_pkt && vec_len (sif->last_rx_pkt)) + lacp_machine_dispatch (&lacp_rx_machine, vm, sif, + LACP_RX_EVENT_PDU_RECEIVED, &sif->rx_state); + + return 0; +} + +int +lacp_rx_action_lacp_disabled (void *p1, void *p2) +{ + vlib_main_t *vm = (vlib_main_t *) p1; + slave_if_t *sif = (slave_if_t *) p2; + + lacp_set_port_unselected (vm, sif); + lacp_record_default (sif); + sif->partner.state &= ~LACP_STATE_AGGREGATION; + sif->actor.state &= ~LACP_STATE_EXPIRED; + + return 0; +} + +int +lacp_rx_action_defaulted (void *p1, void *p2) +{ + vlib_main_t *vm = (vlib_main_t *) p1; + slave_if_t *sif = (slave_if_t *) p2; + + lacp_update_default_selected (vm, sif); + lacp_record_default (sif); + sif->actor.state &= ~LACP_STATE_EXPIRED; + if (sif->last_rx_pkt && vec_len (sif->last_rx_pkt)) + lacp_machine_dispatch (&lacp_rx_machine, vm, sif, + LACP_RX_EVENT_PDU_RECEIVED, &sif->rx_state); + + return 0; +} + +static int +lacp_port_is_moved (vlib_main_t * vm, slave_if_t * sif) +{ + bond_main_t *bm = &bond_main; + slave_if_t *sif2; + lacp_pdu_t *lacpdu = (lacp_pdu_t *) sif->last_rx_pkt; + + /* *INDENT-OFF* */ + pool_foreach (sif2, bm->neighbors, { + { + if ((sif != sif2) && (sif2->rx_state == LACP_RX_STATE_PORT_DISABLED) && + !memcmp (sif2->partner.system, + lacpdu->partner.port_info.system, 6) && + (sif2->partner.port_number == lacpdu->partner.port_info.port_number)) + return 1; + } + }); + /* *INDENT-ON* */ + + return 0; +} + +int +lacp_rx_action_current (void *p1, void *p2) +{ + vlib_main_t *vm = (vlib_main_t *) p1; + slave_if_t *sif = (slave_if_t *) p2; + lacp_main_t *lm = &lacp_main; + + lacp_update_selected (vm, sif); + lacp_update_ntt (vm, sif); + lacp_record_pdu (sif); + lacp_start_current_while_timer (lm->vlib_main, sif, sif->ttl_in_seconds); + sif->actor.state &= ~LACP_STATE_EXPIRED; + if (lacp_port_is_moved (vm, sif)) + lacp_set_port_moved (vm, sif, 1); + lacp_selection_logic (vm, sif); + + return 0; +} + +static u8 * +format_rx_event (u8 * s, va_list * args) +{ + static lacp_event_struct lacp_rx_event_array[] = { +#define _(b, s, n) {.bit = b, .str = #s, }, + foreach_lacp_rx_event +#undef _ + {.str = NULL} + }; + int e = va_arg (*args, int); + lacp_event_struct *event_entry = + (lacp_event_struct *) & lacp_rx_event_array; + + if (e >= (sizeof (lacp_rx_event_array) / sizeof (*event_entry))) + s = format (s, "Bad event %d", e); + else + s = format (s, "%s", event_entry[e].str); + + return s; +} + +void +lacp_rx_debug_func (slave_if_t * sif, int event, int state, + lacp_fsm_state_t * transition) +{ + clib_warning ("%U-RX: event %U, old state %U, new state %U", + format_vnet_sw_if_index_name, vnet_get_main (), + sif->sw_if_index, format_rx_event, + event, format_rx_sm_state, state, format_rx_sm_state, + transition->next_state); +} + +void +lacp_init_rx_machine (vlib_main_t * vm, slave_if_t * sif) +{ + lacp_machine_dispatch (&lacp_rx_machine, vm, sif, LACP_RX_EVENT_BEGIN, + &sif->rx_state); + lacp_machine_dispatch (&lacp_rx_machine, vm, sif, + LACP_RX_EVENT_LACP_ENABLED, &sif->rx_state); +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/lacp/rx_machine.h b/src/plugins/lacp/rx_machine.h new file mode 100644 index 00000000000..706dbd08c67 --- /dev/null +++ b/src/plugins/lacp/rx_machine.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LACP_RX_MACHINE_H__ +#define __LACP_RX_MACHINE_H__ + +#include <stdint.h> +#include <lacp/machine.h> + +#define foreach_lacp_rx_event \ + _(0, BEGIN, "begin") \ + _(1, PORT_DISABLED, "port disabled") \ + _(2, PORT_MOVED, "port moved") \ + _(3, LACP_ENABLED, "lacp enabled") \ + _(4, LACP_DISABLED, "lacp disabled") \ + _(5, PDU_RECEIVED, "pdu received") \ + _(6, TIMER_EXPIRED, "timer expired") + +typedef enum +{ +#define _(a, b, c) LACP_RX_EVENT_##b = (a), + foreach_lacp_rx_event +#undef _ +} lacp_rx_event_t; + +#define foreach_lacp_rx_sm_state \ + _(0, INITIALIZE, "initialize") \ + _(1, PORT_DISABLED, "port disabled") \ + _(2, EXPIRED, "expired") \ + _(3, LACP_DISABLED, "lacp disabled") \ + _(4, DEFAULTED, "defaulted") \ + _(5, CURRENT, "current") + +typedef enum +{ +#define _(a, b, c) LACP_RX_STATE_##b = (a), + foreach_lacp_rx_sm_state +#undef _ +} lacp_rx_sm_state_t; + +extern lacp_machine_t lacp_rx_machine; + +int lacp_rx_action_initialize (void *, void *); +int lacp_rx_action_port_disabled (void *, void *); +int lacp_rx_action_pdu_received (void *, void *); +int lacp_rx_action_expired (void *, void *); +int lacp_rx_action_lacp_disabled (void *, void *); +int lacp_rx_action_defaulted (void *, void *); +int lacp_rx_action_current (void *, void *); +void lacp_rx_debug_func (slave_if_t * sif, int event, int state, + lacp_fsm_state_t * transition); + +#define LACP_ACTION_INITIALIZE \ + LACP_ACTION_ROUTINE(lacp_rx_action_initialize) +#define LACP_ACTION_PORT_DISABLED \ + LACP_ACTION_ROUTINE(lacp_rx_action_port_disabled) +#define LACP_ACTION_EXPIRED \ + LACP_ACTION_ROUTINE(lacp_rx_action_expired) +#define LACP_ACTION_LACP_DISABLED \ + LACP_ACTION_ROUTINE(lacp_rx_action_lacp_disabled) +#define LACP_ACTION_DEFAULTED LACP_ACTION_ROUTINE(lacp_rx_action_defaulted) +#define LACP_ACTION_CURRENT LACP_ACTION_ROUTINE(lacp_rx_action_current) + +static inline void +lacp_start_current_while_timer (vlib_main_t * vm, slave_if_t * sif, + u8 expiration) +{ + sif->current_while_timer = vlib_time_now (vm) + expiration; +} + +#endif /* __LACP_RX_MACHINE_H__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/lacp/selection.c b/src/plugins/lacp/selection.c new file mode 100644 index 00000000000..898b6a95023 --- /dev/null +++ b/src/plugins/lacp/selection.c @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <stdint.h> +#include <vlib/vlib.h> +#include <vlib/unix/unix.h> +#include <vnet/bonding/node.h> +#include <lacp/node.h> + +static void +lacp_set_port_selected (vlib_main_t * vm, slave_if_t * sif) +{ + /* Handle loopback port */ + if (!memcmp (sif->partner.system, sif->actor.system, 6) && + (sif->partner.key == sif->actor.key)) + { + sif->loopback_port = 1; + sif->actor.state &= ~LACP_STATE_AGGREGATION; + } + sif->selected = LACP_PORT_SELECTED; + + switch (sif->mux_state) + { + case LACP_MUX_STATE_DETACHED: + break; + case LACP_MUX_STATE_WAITING: + if (!sif->ready) + return; + break; + case LACP_MUX_STATE_ATTACHED: + if (!(sif->partner.state & LACP_STATE_SYNCHRONIZATION)) + return; + break; + case LACP_MUX_STATE_COLLECTING_DISTRIBUTING: + break; + default: + break; + } + lacp_machine_dispatch (&lacp_mux_machine, vm, sif, LACP_MUX_EVENT_SELECTED, + &sif->mux_state); +} + +void +lacp_selection_logic (vlib_main_t * vm, slave_if_t * sif) +{ + slave_if_t *sif2; + bond_if_t *bif; + u32 *sw_if_index; + + bif = bond_get_master_by_dev_instance (sif->bif_dev_instance); + vec_foreach (sw_if_index, bif->slaves) + { + sif2 = bond_get_slave_by_sw_if_index (*sw_if_index); + if (sif2 && (sif2->actor.state & LACP_STATE_SYNCHRONIZATION) && + (sif2->ready_n == 0)) + goto out; + } + + vec_foreach (sw_if_index, bif->slaves) + { + sif2 = bond_get_slave_by_sw_if_index (*sw_if_index); + if (sif2) + { + sif2->ready = 1; + if (sif2->selected == LACP_PORT_SELECTED) + lacp_machine_dispatch (&lacp_mux_machine, vm, sif2, + LACP_MUX_EVENT_READY, &sif2->mux_state); + } + } +out: + lacp_set_port_selected (vm, sif); +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/lacp/tx_machine.c b/src/plugins/lacp/tx_machine.c new file mode 100644 index 00000000000..794b4f10d95 --- /dev/null +++ b/src/plugins/lacp/tx_machine.c @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define _GNU_SOURCE + +#include <vnet/bonding/node.h> +#include <lacp/node.h> + +/* + * LACP State = TRANSMIT + */ +static lacp_fsm_state_t lacp_tx_state_transmit[] = { + {LACP_ACTION_TRANSMIT, LACP_TX_STATE_TRANSMIT}, // event 0 BEGIN + {LACP_ACTION_TRANSMIT, LACP_TX_STATE_TRANSMIT}, // event 1 NTT +}; + +static lacp_fsm_machine_t lacp_tx_fsm_table[] = { + {lacp_tx_state_transmit}, +}; + +lacp_machine_t lacp_tx_machine = { + lacp_tx_fsm_table, + lacp_tx_debug_func, +}; + +int +lacp_tx_action_transmit (void *p1, void *p2) +{ + vlib_main_t *vm = (vlib_main_t *) p1; + slave_if_t *sif = (slave_if_t *) p2; + f64 now = vlib_time_now (vm); + + if (!lacp_timer_is_running (sif->periodic_timer)) + return 0; + + // No more than 3 LACPDUs per fast interval + if (now <= (sif->last_lacpdu_time + 0.333)) + return 0; + + if (sif->ntt) + { + lacp_send_lacp_pdu (vm, sif); + } + sif->ntt = 0; + + return 0; +} + +static u8 * +format_tx_event (u8 * s, va_list * args) +{ + static lacp_event_struct lacp_tx_event_array[] = { +#define _(b, s, n) {.bit = b, .str = #s, }, + foreach_lacp_tx_event +#undef _ + {.str = NULL} + }; + int e = va_arg (*args, int); + lacp_event_struct *event_entry = + (lacp_event_struct *) & lacp_tx_event_array; + + if (e >= (sizeof (lacp_tx_event_array) / sizeof (*event_entry))) + s = format (s, "Bad event %d", e); + else + s = format (s, "%s", event_entry[e].str); + + return s; +} + +void +lacp_tx_debug_func (slave_if_t * sif, int event, int state, + lacp_fsm_state_t * transition) +{ + clib_warning ("%U-TX: event %U, old state %U, new state %U", + format_vnet_sw_if_index_name, vnet_get_main (), + sif->sw_if_index, format_tx_event, + event, format_tx_sm_state, state, format_tx_sm_state, + transition->next_state); +} + +void +lacp_init_tx_machine (vlib_main_t * vm, slave_if_t * sif) +{ + lacp_machine_dispatch (&lacp_tx_machine, vm, sif, LACP_TX_EVENT_BEGIN, + &sif->tx_state); + if (sif->is_passive == 0) + lacp_machine_dispatch (&lacp_tx_machine, vm, sif, LACP_TX_EVENT_NTT, + &sif->tx_state); +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/lacp/tx_machine.h b/src/plugins/lacp/tx_machine.h new file mode 100644 index 00000000000..428c19b5edd --- /dev/null +++ b/src/plugins/lacp/tx_machine.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LACP_TX_MACHINE_H__ +#define __LACP_TX_MACHINE_H__ + +#include <stdint.h> +#include <lacp/machine.h> + +#define foreach_lacp_tx_event \ + _(0, BEGIN, "begin") \ + _(1, NTT, "Need To Transmit") + +typedef enum +{ +#define _(a, b, c) LACP_TX_EVENT_##b = (a), + foreach_lacp_tx_event +#undef _ +} lacp_tx_event_t; + +#define foreach_lacp_tx_sm_state \ + _(0, TRANSMIT, "transmit PDU") + +typedef enum +{ +#define _(a, b, c) LACP_TX_STATE_##b = (a), + foreach_lacp_tx_sm_state +#undef _ +} lacp_tx_sm_state_t; + +extern lacp_machine_t lacp_tx_machine; + +int lacp_tx_action_transmit (void *p1, void *p2); +void lacp_tx_debug_func (slave_if_t * sif, int event, int state, + lacp_fsm_state_t * transition); + +#define LACP_ACTION_TRANSMIT LACP_ACTION_ROUTINE(lacp_tx_action_transmit) + +#endif /* __LACP_TX_MACHINE_H__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vat/api_format.c b/src/vat/api_format.c index 97e67f9d80b..019d095c40a 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -50,7 +50,7 @@ #include <vnet/policer/police.h> #include <vnet/mfib/mfib_types.h> #include <vnet/dhcp/dhcp_proxy.h> - +#include <vnet/bonding/node.h> #include "vat/json_format.h" #include <inttypes.h> @@ -1775,6 +1775,275 @@ static void vl_api_tap_delete_v2_reply_t_handler_json vam->result_ready = 1; } +static void +vl_api_bond_create_reply_t_handler (vl_api_bond_create_reply_t * mp) +{ + vat_main_t *vam = &vat_main; + i32 retval = ntohl (mp->retval); + + if (vam->async_mode) + { + vam->async_errors += (retval < 0); + } + else + { + vam->retval = retval; + vam->sw_if_index = ntohl (mp->sw_if_index); + vam->result_ready = 1; + } +} + +static void vl_api_bond_create_reply_t_handler_json + (vl_api_bond_create_reply_t * mp) +{ + vat_main_t *vam = &vat_main; + vat_json_node_t node; + + vat_json_init_object (&node); + vat_json_object_add_int (&node, "retval", ntohl (mp->retval)); + vat_json_object_add_uint (&node, "sw_if_index", ntohl (mp->sw_if_index)); + + vat_json_print (vam->ofp, &node); + vat_json_free (&node); + + vam->retval = ntohl (mp->retval); + vam->result_ready = 1; +} + +static void +vl_api_bond_delete_reply_t_handler (vl_api_bond_delete_reply_t * mp) +{ + vat_main_t *vam = &vat_main; + i32 retval = ntohl (mp->retval); + + if (vam->async_mode) + { + vam->async_errors += (retval < 0); + } + else + { + vam->retval = retval; + vam->result_ready = 1; + } +} + +static void vl_api_bond_delete_reply_t_handler_json + (vl_api_bond_delete_reply_t * mp) +{ + vat_main_t *vam = &vat_main; + vat_json_node_t node; + + vat_json_init_object (&node); + vat_json_object_add_int (&node, "retval", ntohl (mp->retval)); + + vat_json_print (vam->ofp, &node); + vat_json_free (&node); + + vam->retval = ntohl (mp->retval); + vam->result_ready = 1; +} + +static void +vl_api_bond_enslave_reply_t_handler (vl_api_bond_enslave_reply_t * mp) +{ + vat_main_t *vam = &vat_main; + i32 retval = ntohl (mp->retval); + + if (vam->async_mode) + { + vam->async_errors += (retval < 0); + } + else + { + vam->retval = retval; + vam->result_ready = 1; + } +} + +static void vl_api_bond_enslave_reply_t_handler_json + (vl_api_bond_enslave_reply_t * mp) +{ + vat_main_t *vam = &vat_main; + vat_json_node_t node; + + vat_json_init_object (&node); + vat_json_object_add_int (&node, "retval", ntohl (mp->retval)); + + vat_json_print (vam->ofp, &node); + vat_json_free (&node); + + vam->retval = ntohl (mp->retval); + vam->result_ready = 1; +} + +static void +vl_api_bond_detach_slave_reply_t_handler (vl_api_bond_detach_slave_reply_t * + mp) +{ + vat_main_t *vam = &vat_main; + i32 retval = ntohl (mp->retval); + + if (vam->async_mode) + { + vam->async_errors += (retval < 0); + } + else + { + vam->retval = retval; + vam->result_ready = 1; + } +} + +static void vl_api_bond_detach_slave_reply_t_handler_json + (vl_api_bond_detach_slave_reply_t * mp) +{ + vat_main_t *vam = &vat_main; + vat_json_node_t node; + + vat_json_init_object (&node); + vat_json_object_add_int (&node, "retval", ntohl (mp->retval)); + + vat_json_print (vam->ofp, &node); + vat_json_free (&node); + + vam->retval = ntohl (mp->retval); + vam->result_ready = 1; +} + +static void vl_api_sw_interface_bond_details_t_handler + (vl_api_sw_interface_bond_details_t * mp) +{ + vat_main_t *vam = &vat_main; + + print (vam->ofp, + "%-16s %-12d %-12U %-13U %-14u %-14u", + mp->interface_name, ntohl (mp->sw_if_index), + format_bond_mode, mp->mode, format_bond_load_balance, mp->lb, + ntohl (mp->active_slaves), ntohl (mp->slaves)); +} + +static void vl_api_sw_interface_bond_details_t_handler_json + (vl_api_sw_interface_bond_details_t * mp) +{ + vat_main_t *vam = &vat_main; + vat_json_node_t *node = NULL; + + if (VAT_JSON_ARRAY != vam->json_tree.type) + { + ASSERT (VAT_JSON_NONE == vam->json_tree.type); + vat_json_init_array (&vam->json_tree); + } + node = vat_json_array_add (&vam->json_tree); + + vat_json_init_object (node); + vat_json_object_add_uint (node, "sw_if_index", ntohl (mp->sw_if_index)); + vat_json_object_add_string_copy (node, "interface_name", + mp->interface_name); + vat_json_object_add_uint (node, "mode", mp->mode); + vat_json_object_add_uint (node, "load_balance", mp->lb); + vat_json_object_add_uint (node, "active_slaves", ntohl (mp->active_slaves)); + vat_json_object_add_uint (node, "slaves", ntohl (mp->slaves)); +} + +static int +api_sw_interface_bond_dump (vat_main_t * vam) +{ + vl_api_sw_interface_bond_dump_t *mp; + vl_api_control_ping_t *mp_ping; + int ret; + + print (vam->ofp, + "\n%-16s %-12s %-12s %-13s %-14s %-14s", + "interface name", "sw_if_index", "mode", "load balance", + "active slaves", "slaves"); + + /* Get list of bond interfaces */ + M (SW_INTERFACE_BOND_DUMP, mp); + S (mp); + + /* Use a control ping for synchronization */ + MPING (CONTROL_PING, mp_ping); + S (mp_ping); + + W (ret); + return ret; +} + +static void vl_api_sw_interface_slave_details_t_handler + (vl_api_sw_interface_slave_details_t * mp) +{ + vat_main_t *vam = &vat_main; + + print (vam->ofp, + "%-25s %-12d %-12d %d", mp->interface_name, + ntohl (mp->sw_if_index), mp->is_passive, mp->is_long_timeout); +} + +static void vl_api_sw_interface_slave_details_t_handler_json + (vl_api_sw_interface_slave_details_t * mp) +{ + vat_main_t *vam = &vat_main; + vat_json_node_t *node = NULL; + + if (VAT_JSON_ARRAY != vam->json_tree.type) + { + ASSERT (VAT_JSON_NONE == vam->json_tree.type); + vat_json_init_array (&vam->json_tree); + } + node = vat_json_array_add (&vam->json_tree); + + vat_json_init_object (node); + vat_json_object_add_uint (node, "sw_if_index", ntohl (mp->sw_if_index)); + vat_json_object_add_string_copy (node, "interface_name", + mp->interface_name); + vat_json_object_add_uint (node, "passive", mp->is_passive); + vat_json_object_add_uint (node, "long_timeout", mp->is_long_timeout); +} + +static int +api_sw_interface_slave_dump (vat_main_t * vam) +{ + unformat_input_t *i = vam->input; + vl_api_sw_interface_slave_dump_t *mp; + vl_api_control_ping_t *mp_ping; + u32 sw_if_index = ~0; + u8 sw_if_index_set = 0; + int ret; + + /* Parse args required to build the message */ + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index)) + sw_if_index_set = 1; + else if (unformat (i, "sw_if_index %d", &sw_if_index)) + sw_if_index_set = 1; + else + break; + } + + if (sw_if_index_set == 0) + { + errmsg ("missing vpp interface name. "); + return -99; + } + + print (vam->ofp, + "\n%-25s %-12s %-12s %s", + "slave interface name", "sw_if_index", "passive", "long_timeout"); + + /* Get list of bond interfaces */ + M (SW_INTERFACE_SLAVE_DUMP, mp); + mp->sw_if_index = ntohl (sw_if_index); + S (mp); + + /* Use a control ping for synchronization */ + MPING (CONTROL_PING, mp_ping); + S (mp_ping); + + W (ret); + return ret; +} + static void vl_api_mpls_tunnel_add_del_reply_t_handler (vl_api_mpls_tunnel_add_del_reply_t * mp) { @@ -5466,6 +5735,12 @@ _(SW_INTERFACE_TAP_DETAILS, sw_interface_tap_details) \ _(TAP_CREATE_V2_REPLY, tap_create_v2_reply) \ _(TAP_DELETE_V2_REPLY, tap_delete_v2_reply) \ _(SW_INTERFACE_TAP_V2_DETAILS, sw_interface_tap_v2_details) \ +_(BOND_CREATE_REPLY, bond_create_reply) \ +_(BOND_DELETE_REPLY, bond_delete_reply) \ +_(BOND_ENSLAVE_REPLY, bond_enslave_reply) \ +_(BOND_DETACH_SLAVE_REPLY, bond_detach_slave_reply) \ +_(SW_INTERFACE_BOND_DETAILS, sw_interface_bond_details) \ +_(SW_INTERFACE_SLAVE_DETAILS, sw_interface_slave_details) \ _(IP_ADD_DEL_ROUTE_REPLY, ip_add_del_route_reply) \ _(IP_TABLE_ADD_DEL_REPLY, ip_table_add_del_reply) \ _(IP_MROUTE_ADD_DEL_REPLY, ip_mroute_add_del_reply) \ @@ -7954,6 +8229,194 @@ api_tap_delete_v2 (vat_main_t * vam) } static int +api_bond_create (vat_main_t * vam) +{ + unformat_input_t *i = vam->input; + vl_api_bond_create_t *mp; + u8 mac_address[6]; + u8 custom_mac = 0; + int ret; + u8 mode; + u8 lb; + u8 mode_is_set = 0; + + memset (mac_address, 0, sizeof (mac_address)); + lb = BOND_LB_L2; + + /* Parse args required to build the message */ + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "mode %U", unformat_bond_mode, &mode)) + mode_is_set = 1; + else if (((mode == BOND_MODE_LACP) || (mode == BOND_MODE_XOR)) + && unformat (i, "lb %U", unformat_bond_load_balance, &lb)) + ; + else if (unformat (i, "hw-addr %U", unformat_ethernet_address, + mac_address)) + custom_mac = 1; + else + break; + } + + if (mode_is_set == 0) + { + errmsg ("Missing bond mode. "); + return -99; + } + + /* Construct the API message */ + M (BOND_CREATE, mp); + + mp->use_custom_mac = custom_mac; + + mp->mode = mode; + mp->lb = lb; + + if (custom_mac) + clib_memcpy (mp->mac_address, mac_address, 6); + + /* send it... */ + S (mp); + + /* Wait for a reply... */ + W (ret); + return ret; +} + +static int +api_bond_delete (vat_main_t * vam) +{ + unformat_input_t *i = vam->input; + vl_api_bond_delete_t *mp; + u32 sw_if_index = ~0; + u8 sw_if_index_set = 0; + int ret; + + /* Parse args required to build the message */ + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index)) + sw_if_index_set = 1; + else if (unformat (i, "sw_if_index %d", &sw_if_index)) + sw_if_index_set = 1; + else + break; + } + + if (sw_if_index_set == 0) + { + errmsg ("missing vpp interface name. "); + return -99; + } + + /* Construct the API message */ + M (BOND_DELETE, mp); + + mp->sw_if_index = ntohl (sw_if_index); + + /* send it... */ + S (mp); + + /* Wait for a reply... */ + W (ret); + return ret; +} + +static int +api_bond_enslave (vat_main_t * vam) +{ + unformat_input_t *i = vam->input; + vl_api_bond_enslave_t *mp; + u32 bond_sw_if_index; + int ret; + u8 is_passive; + u8 is_long_timeout; + u32 bond_sw_if_index_is_set = 0; + u32 sw_if_index; + u8 sw_if_index_is_set = 0; + + /* Parse args required to build the message */ + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "sw_if_index %d", &sw_if_index)) + sw_if_index_is_set = 1; + else if (unformat (i, "bond %u", &bond_sw_if_index)) + bond_sw_if_index_is_set = 1; + else if (unformat (i, "passive %d", &is_passive)) + ; + else if (unformat (i, "long-timeout %d", &is_long_timeout)) + ; + else + break; + } + + if (bond_sw_if_index_is_set == 0) + { + errmsg ("Missing bond sw_if_index. "); + return -99; + } + if (sw_if_index_is_set == 0) + { + errmsg ("Missing slave sw_if_index. "); + return -99; + } + + /* Construct the API message */ + M (BOND_ENSLAVE, mp); + + mp->bond_sw_if_index = ntohl (bond_sw_if_index); + mp->sw_if_index = ntohl (sw_if_index); + mp->is_long_timeout = is_long_timeout; + mp->is_passive = is_passive; + + /* send it... */ + S (mp); + + /* Wait for a reply... */ + W (ret); + return ret; +} + +static int +api_bond_detach_slave (vat_main_t * vam) +{ + unformat_input_t *i = vam->input; + vl_api_bond_detach_slave_t *mp; + u32 sw_if_index = ~0; + u8 sw_if_index_set = 0; + int ret; + + /* Parse args required to build the message */ + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index)) + sw_if_index_set = 1; + else if (unformat (i, "sw_if_index %d", &sw_if_index)) + sw_if_index_set = 1; + else + break; + } + + if (sw_if_index_set == 0) + { + errmsg ("missing vpp interface name. "); + return -99; + } + + /* Construct the API message */ + M (BOND_DETACH_SLAVE, mp); + + mp->sw_if_index = ntohl (sw_if_index); + + /* send it... */ + S (mp); + + /* Wait for a reply... */ + W (ret); + return ret; +} + +static int api_ip_table_add_del (vat_main_t * vam) { unformat_input_t *i = vam->input; @@ -22782,6 +23245,18 @@ _(tap_create_v2, \ _(tap_delete_v2, \ "<vpp-if-name> | sw_if_index <id>") \ _(sw_interface_tap_v2_dump, "") \ +_(bond_create, \ + "[hw-addr <mac-addr>] {round-robin | active-backup | " \ + "broadcast | {lacp | xor} [load-balance { l2 | l23 | l34 }]}") \ +_(bond_delete, \ + "<vpp-if-name> | sw_if_index <id>") \ +_(bond_enslave, \ + "sw_if_index <n> bond <sw_if_index> [is_passive] [is_long_timeout]") \ +_(bond_detach_slave, \ + "sw_if_index <n>") \ +_(sw_interface_bond_dump, "") \ +_(sw_interface_slave_dump, \ + "<vpp-if-name> | sw_if_index <id>") \ _(ip_table_add_del, \ "table-id <n> [ipv6]\n") \ _(ip_add_del_route, \ diff --git a/src/vnet.am b/src/vnet.am index d20124191b7..a58bdcae6f0 100644 --- a/src/vnet.am +++ b/src/vnet.am @@ -273,6 +273,21 @@ nobase_include_HEADERS += \ API_FILES += vnet/geneve/geneve.api ######################################## +# Layer 2 / Bonding +######################################## +libvnet_la_SOURCES += \ + vnet/bonding/cli.c \ + vnet/bonding/node.c \ + vnet/bonding/device.c \ + vnet/bonding/bond_api.c + +nobase_include_HEADERS += \ + vnet/bonding/node.h \ + vnet/bonding/bond.api.h + +API_FILES += vnet/bonding/bond.api + +######################################## # Layer 2 / LLDP ######################################## libvnet_la_SOURCES += \ diff --git a/src/vnet/bonding/bond.api b/src/vnet/bonding/bond.api new file mode 100644 index 00000000000..e8919e14904 --- /dev/null +++ b/src/vnet/bonding/bond.api @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** \file + + This file defines vpe control-plane API messages for + the bonding device driver +*/ + +option version = "1.0.0"; + +/** \brief Initialize a new bond interface with the given paramters + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param use_custom_mac - if set, mac_address is valid + @param mac_address - mac addr to assign to the interface if use_custom_mac is set + @param mode - mode, required (1=round-robin, 2=active-backup, 3=xor, 4=broadcastcast, 5=lacp) + @param lb - load balance, optional (0=l2, 1=l34, 2=l23) valid for xor and lacp modes. Otherwise ignored +*/ +define bond_create +{ + u32 client_index; + u32 context; + u8 use_custom_mac; + u8 mac_address[6]; + u8 mode; + u8 lb; +}; + +/** \brief Reply for bond create reply + @param context - returned sender context, to match reply w/ request + @param retval - return code + @param sw_if_index - software index allocated for the new tap interface +*/ +define bond_create_reply +{ + u32 context; + i32 retval; + u32 sw_if_index; +}; + +/** \brief Delete bond interface + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - interface index of slave interface +*/ +autoreply define bond_delete +{ + u32 client_index; + u32 context; + u32 sw_if_index; +}; + +/** \brief Initialize a new bond interface with the given paramters + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - slave sw_if_index + @param bond_sw_if_index - bond sw_if_index + @param is_passive - interface does not initiate the lacp protocol, remote must be active speaker + @param is_long_timeout - 90 seconds vs default 3 seconds neighbor timeout +*/ +define bond_enslave +{ + u32 client_index; + u32 context; + u32 sw_if_index; + u32 bond_sw_if_index; + u8 is_passive; + u8 is_long_timeout; +}; + +/** \brief Reply for bond enslave reply + @param context - returned sender context, to match reply w/ request + @param retval - return code +*/ +define bond_enslave_reply +{ + u32 context; + i32 retval; +}; + +/** \brief bond detach slave + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - interface index of slave interface +*/ +autoreply define bond_detach_slave +{ + u32 client_index; + u32 context; + u32 sw_if_index; +}; + +/** \brief Dump bond interfaces request */ +define sw_interface_bond_dump +{ + u32 client_index; + u32 context; +}; + +/** \brief Reply for bond dump request + @param sw_if_index - software index of bond interface + @param interface_name - name of interface + @param mode - bonding mode + @param lb - load balance algo + @param active_slaves - active slaves count + @param slaves - config slave count +*/ +define sw_interface_bond_details +{ + u32 context; + u32 sw_if_index; + u8 interface_name[64]; + u8 mode; + u8 lb; + u32 active_slaves; + u32 slaves; +}; + +/** \brief bond slave dump + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - interface index of bond interface +*/ +define sw_interface_slave_dump +{ + u32 client_index; + u32 context; + u32 sw_if_index; +}; + +/** \brief Reply for slave dump request + @param sw_if_index - software index of slave interface + @param interface_name - name of interface + @param is_passve - interface does not initiate the lacp protocol, remote must be active speaker + @param is_long_timeout - 90 seconds vs default 3 seconds neighbor timeout +*/ +define sw_interface_slave_details +{ + u32 context; + u32 sw_if_index; + u8 interface_name[64]; + u8 is_passive; + u8 is_long_timeout; +}; + +/* + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/bonding/bond_api.c b/src/vnet/bonding/bond_api.c new file mode 100644 index 00000000000..02536e966a1 --- /dev/null +++ b/src/vnet/bonding/bond_api.c @@ -0,0 +1,328 @@ +/* + *------------------------------------------------------------------ + * bond_api.c - vnet bonding device driver API support + * + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <vnet/vnet.h> +#include <vlibmemory/api.h> + +#include <vnet/interface.h> +#include <vnet/api_errno.h> +#include <vnet/ethernet/ethernet.h> + +#include <vnet/vnet_msg_enum.h> + +#define vl_typedefs /* define message structures */ +#include <vnet/vnet_all_api_h.h> +#undef vl_typedefs + +#define vl_endianfun /* define message structures */ +#include <vnet/vnet_all_api_h.h> +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) +#define vl_printfun +#include <vnet/vnet_all_api_h.h> +#undef vl_printfun + +#include <vlibapi/api_helper_macros.h> +#include <vnet/bonding/node.h> + +#define foreach_bond_api_msg \ +_(BOND_CREATE, bond_create) \ +_(BOND_DELETE, bond_delete) \ +_(BOND_ENSLAVE, bond_enslave) \ +_(BOND_DETACH_SLAVE, bond_detach_slave) \ +_(SW_INTERFACE_BOND_DUMP, sw_interface_bond_dump)\ +_(SW_INTERFACE_SLAVE_DUMP, sw_interface_slave_dump) + +static void +bond_send_sw_interface_event_deleted (vpe_api_main_t * am, + unix_shared_memory_queue_t * q, + u32 sw_if_index) +{ + vl_api_sw_interface_event_t *mp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_EVENT); + mp->sw_if_index = ntohl (sw_if_index); + + mp->admin_up_down = 0; + mp->link_up_down = 0; + mp->deleted = 1; + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + +static void +vl_api_bond_delete_t_handler (vl_api_bond_delete_t * mp) +{ + vlib_main_t *vm = vlib_get_main (); + int rv; + vpe_api_main_t *vam = &vpe_api_main; + vl_api_bond_delete_reply_t *rmp; + unix_shared_memory_queue_t *q; + u32 sw_if_index = ntohl (mp->sw_if_index); + + rv = bond_delete_if (vm, sw_if_index); + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (!q) + return; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_BOND_DELETE_REPLY); + rmp->context = mp->context; + rmp->retval = ntohl (rv); + + vl_msg_api_send_shmem (q, (u8 *) & rmp); + + if (!rv) + bond_send_sw_interface_event_deleted (vam, q, sw_if_index); +} + +static void +vl_api_bond_create_t_handler (vl_api_bond_create_t * mp) +{ + vlib_main_t *vm = vlib_get_main (); + vl_api_bond_create_reply_t *rmp; + unix_shared_memory_queue_t *q; + bond_create_if_args_t _a, *ap = &_a; + + memset (ap, 0, sizeof (*ap)); + + if (mp->use_custom_mac) + { + clib_memcpy (ap->hw_addr, mp->mac_address, 6); + ap->hw_addr_set = 1; + } + + ap->mode = mp->mode; + ap->lb = mp->lb; + bond_create_if (vm, ap); + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (!q) + return; + + if (ap->rv != 0) + return; + rmp = vl_msg_api_alloc (sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_BOND_CREATE_REPLY); + rmp->context = mp->context; + rmp->retval = ntohl (ap->rv); + rmp->sw_if_index = ntohl (ap->sw_if_index); + + vl_msg_api_send_shmem (q, (u8 *) & rmp); +} + +static void +vl_api_bond_enslave_t_handler (vl_api_bond_enslave_t * mp) +{ + vlib_main_t *vm = vlib_get_main (); + vl_api_bond_enslave_reply_t *rmp; + unix_shared_memory_queue_t *q; + bond_enslave_args_t _a, *ap = &_a; + + memset (ap, 0, sizeof (*ap)); + + ap->group = ntohl (mp->bond_sw_if_index); + ap->slave = ntohl (mp->sw_if_index); + ap->is_passive = mp->is_passive; + ap->is_long_timeout = mp->is_long_timeout; + + bond_enslave (vm, ap); + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (!q) + return; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_BOND_ENSLAVE_REPLY); + rmp->context = mp->context; + rmp->retval = ntohl (ap->rv); + + vl_msg_api_send_shmem (q, (u8 *) & rmp); +} + +static void +vl_api_bond_detach_slave_t_handler (vl_api_bond_detach_slave_t * mp) +{ + vlib_main_t *vm = vlib_get_main (); + vl_api_bond_detach_slave_reply_t *rmp; + unix_shared_memory_queue_t *q; + bond_detach_slave_args_t _a, *ap = &_a; + + memset (ap, 0, sizeof (*ap)); + + ap->slave = ntohl (mp->sw_if_index); + bond_detach_slave (vm, ap); + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (!q) + return; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_BOND_DETACH_SLAVE_REPLY); + rmp->context = mp->context; + rmp->retval = htonl (ap->rv); + + vl_msg_api_send_shmem (q, (u8 *) & rmp); +} + +static void +bond_send_sw_interface_details (vpe_api_main_t * am, + vl_api_registration_t * reg, + bond_interface_details_t * bond_if, + u32 context) +{ + vl_api_sw_interface_bond_details_t *mp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = htons (VL_API_SW_INTERFACE_BOND_DETAILS); + mp->sw_if_index = htonl (bond_if->sw_if_index); + clib_memcpy (mp->interface_name, bond_if->interface_name, + MIN (ARRAY_LEN (mp->interface_name) - 1, + strlen ((const char *) bond_if->interface_name))); + mp->mode = bond_if->mode; + mp->lb = bond_if->lb; + mp->active_slaves = htonl (bond_if->active_slaves); + mp->slaves = htonl (bond_if->slaves); + + mp->context = context; + vl_api_send_msg (reg, (u8 *) mp); +} + +static void +vl_api_sw_interface_bond_dump_t_handler (vl_api_sw_interface_bond_dump_t * mp) +{ + int rv; + vpe_api_main_t *am = &vpe_api_main; + vl_api_registration_t *reg; + bond_interface_details_t *bondifs = NULL; + bond_interface_details_t *bond_if = NULL; + + reg = vl_api_client_index_to_registration (mp->client_index); + if (!reg) + return; + + rv = bond_dump_ifs (&bondifs); + if (rv) + return; + + vec_foreach (bond_if, bondifs) + { + bond_send_sw_interface_details (am, reg, bond_if, mp->context); + } + + vec_free (bondifs); +} + +static void +bond_send_sw_interface_slave_details (vpe_api_main_t * am, + vl_api_registration_t * reg, + slave_interface_details_t * slave_if, + u32 context) +{ + vl_api_sw_interface_slave_details_t *mp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = htons (VL_API_SW_INTERFACE_SLAVE_DETAILS); + mp->sw_if_index = htonl (slave_if->sw_if_index); + clib_memcpy (mp->interface_name, slave_if->interface_name, + MIN (ARRAY_LEN (mp->interface_name) - 1, + strlen ((const char *) slave_if->interface_name))); + mp->is_passive = slave_if->is_passive; + mp->is_long_timeout = slave_if->is_long_timeout; + + mp->context = context; + vl_api_send_msg (reg, (u8 *) mp); +} + +static void +vl_api_sw_interface_slave_dump_t_handler (vl_api_sw_interface_slave_dump_t * + mp) +{ + int rv; + vpe_api_main_t *am = &vpe_api_main; + vl_api_registration_t *reg; + slave_interface_details_t *slaveifs = NULL; + slave_interface_details_t *slave_if = NULL; + + reg = vl_api_client_index_to_registration (mp->client_index); + if (!reg) + return; + + rv = bond_dump_slave_ifs (&slaveifs, ntohl (mp->sw_if_index)); + if (rv) + return; + + vec_foreach (slave_if, slaveifs) + { + bond_send_sw_interface_slave_details (am, reg, slave_if, mp->context); + } + + vec_free (slaveifs); +} + +#define vl_msg_name_crc_list +#include <vnet/vnet_all_api_h.h> +#undef vl_msg_name_crc_list + +static void +bond_setup_message_id_table (api_main_t * am) +{ +#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id); + foreach_vl_msg_name_crc_bond; +#undef _ +} + +static clib_error_t * +bond_api_hookup (vlib_main_t * vm) +{ + api_main_t *am = &api_main; + +#define _(N,n) \ + vl_msg_api_set_handlers(VL_API_##N, #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_bond_api_msg; +#undef _ + + /* + * Set up the (msg_name, crc, message-id) table + */ + bond_setup_message_id_table (am); + + return 0; +} + +VLIB_API_INIT_FUNCTION (bond_api_hookup); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/bonding/cli.c b/src/vnet/bonding/cli.c new file mode 100644 index 00000000000..b2d66f9f1c8 --- /dev/null +++ b/src/vnet/bonding/cli.c @@ -0,0 +1,706 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <stdint.h> +#include <vlib/vlib.h> +#include <vlib/unix/unix.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/bonding/node.h> + +void +bond_disable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif) +{ + bond_if_t *bif; + int i; + uword p; + + bif = bond_get_master_by_dev_instance (sif->bif_dev_instance); + vec_foreach_index (i, bif->active_slaves) + { + p = *vec_elt_at_index (bif->active_slaves, i); + if (p == sif->sw_if_index) + { + vec_del1 (bif->active_slaves, i); + hash_unset (bif->active_slave_by_sw_if_index, sif->sw_if_index); + break; + } + } +} + +void +bond_enable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif) +{ + bond_if_t *bif; + + bif = bond_get_master_by_dev_instance (sif->bif_dev_instance); + if (!hash_get (bif->active_slave_by_sw_if_index, sif->sw_if_index)) + { + hash_set (bif->active_slave_by_sw_if_index, sif->sw_if_index, + sif->sw_if_index); + vec_add1 (bif->active_slaves, sif->sw_if_index); + } +} + +int +bond_dump_ifs (bond_interface_details_t ** out_bondifs) +{ + vnet_main_t *vnm = vnet_get_main (); + bond_main_t *bm = &bond_main; + bond_if_t *bif; + vnet_hw_interface_t *hi; + bond_interface_details_t *r_bondifs = NULL; + bond_interface_details_t *bondif = NULL; + + /* *INDENT-OFF* */ + pool_foreach (bif, bm->interfaces, + vec_add2(r_bondifs, bondif, 1); + memset (bondif, 0, sizeof (*bondif)); + bondif->sw_if_index = bif->sw_if_index; + hi = vnet_get_hw_interface (vnm, bif->hw_if_index); + clib_memcpy(bondif->interface_name, hi->name, + MIN (ARRAY_LEN (bondif->interface_name) - 1, + strlen ((const char *) hi->name))); + bondif->mode = bif->mode; + bondif->lb = bif->lb; + bondif->active_slaves = vec_len (bif->active_slaves); + bondif->slaves = vec_len (bif->slaves); + ); + /* *INDENT-ON* */ + + *out_bondifs = r_bondifs; + + return 0; +} + +int +bond_dump_slave_ifs (slave_interface_details_t ** out_slaveifs, + u32 bond_sw_if_index) +{ + vnet_main_t *vnm = vnet_get_main (); + bond_if_t *bif; + vnet_hw_interface_t *hi; + vnet_sw_interface_t *sw; + slave_interface_details_t *r_slaveifs = NULL; + slave_interface_details_t *slaveif = NULL; + u32 *sw_if_index = NULL; + slave_if_t *sif; + + bif = bond_get_master_by_sw_if_index (bond_sw_if_index); + if (!bif) + return 1; + + vec_foreach (sw_if_index, bif->slaves) + { + vec_add2 (r_slaveifs, slaveif, 1); + memset (slaveif, 0, sizeof (*slaveif)); + sif = bond_get_slave_by_sw_if_index (*sw_if_index); + if (sif) + { + sw = vnet_get_sw_interface (vnm, sif->sw_if_index); + hi = vnet_get_hw_interface (vnm, sw->hw_if_index); + clib_memcpy (slaveif->interface_name, hi->name, + MIN (ARRAY_LEN (slaveif->interface_name) - 1, + strlen ((const char *) hi->name))); + slaveif->sw_if_index = sif->sw_if_index; + slaveif->is_passive = sif->is_passive; + slaveif->is_long_timeout = sif->is_long_timeout; + } + } + *out_slaveifs = r_slaveifs; + + return 0; +} + +static void +bond_delete_neighbor (vlib_main_t * vm, bond_if_t * bif, slave_if_t * sif) +{ + bond_main_t *bm = &bond_main; + vnet_main_t *vnm = vnet_get_main (); + int i; + vnet_hw_interface_t *hw; + + bif->port_number_bitmap = + clib_bitmap_set (bif->port_number_bitmap, + ntohs (sif->actor_admin.port_number) - 1, 0); + hash_unset (bm->neighbor_by_sw_if_index, sif->sw_if_index); + vec_free (sif->last_marker_pkt); + vec_free (sif->last_rx_pkt); + vec_foreach_index (i, bif->slaves) + { + uword p = *vec_elt_at_index (bif->slaves, i); + if (p == sif->sw_if_index) + { + vec_del1 (bif->slaves, i); + break; + } + } + + bond_disable_collecting_distributing (vm, sif); + + /* Put back the old mac */ + hw = vnet_get_sup_hw_interface (vnm, sif->sw_if_index); + vnet_hw_interface_change_mac_address (vnm, hw->hw_if_index, + sif->persistent_hw_address); + + pool_put (bm->neighbors, sif); + + if ((bif->mode == BOND_MODE_LACP) && bm->lacp_enable_disable) + (*bm->lacp_enable_disable) (vm, bif, sif, 0); +} + +int +bond_delete_if (vlib_main_t * vm, u32 sw_if_index) +{ + bond_main_t *bm = &bond_main; + vnet_main_t *vnm = vnet_get_main (); + bond_if_t *bif; + slave_if_t *sif; + vnet_hw_interface_t *hw; + u32 *sif_sw_if_index; + + hw = vnet_get_sup_hw_interface (vnm, sw_if_index); + if (hw == NULL || bond_dev_class.index != hw->dev_class_index) + return VNET_API_ERROR_INVALID_SW_IF_INDEX; + + bif = bond_get_master_by_dev_instance (hw->dev_instance); + + vec_foreach (sif_sw_if_index, bif->slaves) + { + sif = bond_get_slave_by_sw_if_index (*sif_sw_if_index); + if (sif) + bond_delete_neighbor (vm, bif, sif); + } + + /* bring down the interface */ + vnet_hw_interface_set_flags (vnm, bif->hw_if_index, 0); + vnet_sw_interface_set_flags (vnm, bif->sw_if_index, 0); + + ethernet_delete_interface (vnm, bif->hw_if_index); + + clib_bitmap_free (bif->port_number_bitmap); + hash_unset (bm->bond_by_sw_if_index, bif->sw_if_index); + memset (bif, 0, sizeof (*bif)); + pool_put (bm->interfaces, bif); + + return 0; +} + +void +bond_create_if (vlib_main_t * vm, bond_create_if_args_t * args) +{ + bond_main_t *bm = &bond_main; + vnet_main_t *vnm = vnet_get_main (); + vnet_sw_interface_t *sw; + bond_if_t *bif; + + if ((args->mode == BOND_MODE_LACP) && bm->lacp_plugin_loaded == 0) + { + args->rv = VNET_API_ERROR_FEATURE_DISABLED; + args->error = clib_error_return (0, "LACP plugin is not loaded"); + return; + } + if (args->mode > BOND_MODE_LACP || args->mode < BOND_MODE_ROUND_ROBIN) + { + args->rv = VNET_API_ERROR_INVALID_ARGUMENT; + args->error = clib_error_return (0, "Invalid mode"); + return; + } + if (args->lb > BOND_LB_L23) + { + args->rv = VNET_API_ERROR_INVALID_ARGUMENT; + args->error = clib_error_return (0, "Invalid load-balance"); + return; + } + pool_get (bm->interfaces, bif); + memset (bif, 0, sizeof (*bif)); + bif->dev_instance = bif - bm->interfaces; + bif->lb = args->lb; + bif->mode = args->mode; + + // Special load-balance mode used for rr and bc + if (bif->mode == BOND_MODE_ROUND_ROBIN) + bif->lb = BOND_LB_RR; + else if (bif->mode == BOND_MODE_BROADCAST) + bif->lb = BOND_LB_BC; + + bif->use_custom_mac = args->hw_addr_set; + if (!args->hw_addr_set) + { + f64 now = vlib_time_now (vm); + u32 rnd; + rnd = (u32) (now * 1e6); + rnd = random_u32 (&rnd); + + memcpy (args->hw_addr + 2, &rnd, sizeof (rnd)); + args->hw_addr[0] = 2; + args->hw_addr[1] = 0xfe; + } + memcpy (bif->hw_address, args->hw_addr, 6); + args->error = ethernet_register_interface + (vnm, bond_dev_class.index, bif - bm->interfaces /* device instance */ , + bif->hw_address /* ethernet address */ , + &bif->hw_if_index, 0 /* flag change */ ); + + if (args->error) + { + args->rv = VNET_API_ERROR_INVALID_REGISTRATION; + pool_put (bm->interfaces, bif); + return; + } + + sw = vnet_get_hw_sw_interface (vnm, bif->hw_if_index); + bif->sw_if_index = sw->sw_if_index; + bif->group = bif->sw_if_index; + + vnet_hw_interface_set_flags (vnm, bif->hw_if_index, + VNET_HW_INTERFACE_FLAG_LINK_UP); + + hash_set (bm->bond_by_sw_if_index, bif->sw_if_index, bif->dev_instance); + + // for return + args->sw_if_index = bif->sw_if_index; +} + +static clib_error_t * +bond_create_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + bond_create_if_args_t args = { 0 }; + u8 mode_is_set = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return clib_error_return (0, "Missing required arguments."); + + args.mode = -1; + args.lb = BOND_LB_L2; + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "mode %U", unformat_bond_mode, &args.mode)) + mode_is_set = 1; + else if (((args.mode == BOND_MODE_LACP) || (args.mode == BOND_MODE_XOR)) + && unformat (line_input, "load-balance %U", + unformat_bond_load_balance, &args.lb)) + ; + else if (unformat (line_input, "hw-addr %U", + unformat_ethernet_address, args.hw_addr)) + args.hw_addr_set = 1; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + unformat_free (line_input); + + if (mode_is_set == 0) + return clib_error_return (0, "Missing bond mode"); + + bond_create_if (vm, &args); + + return args.error; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (bond_create_command, static) = { + .path = "create bond", + .short_help = "create bond mode {round-robin | active-backup | broadcast | " + "{lacp | xor} [load-balance { l2 | l23 | l34 }]} [hw-addr <mac-address>]", + .function = bond_create_command_fn, +}; +/* *INDENT-ON* */ + +static clib_error_t * +bond_delete_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + u32 sw_if_index = ~0; + vnet_main_t *vnm = vnet_get_main (); + int rv; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return clib_error_return (0, "Missing <interface>"); + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "sw_if_index %d", &sw_if_index)) + ; + else if (unformat (line_input, "%U", unformat_vnet_sw_interface, + vnm, &sw_if_index)) + ; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + unformat_free (line_input); + + if (sw_if_index == ~0) + return clib_error_return (0, + "please specify interface name or sw_if_index"); + + rv = bond_delete_if (vm, sw_if_index); + if (rv == VNET_API_ERROR_INVALID_SW_IF_INDEX) + return clib_error_return (0, "not a bond interface"); + else if (rv != 0) + return clib_error_return (0, "error on deleting bond interface"); + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (bond_delete__command, static) = +{ + .path = "delete bond", + .short_help = "delete bond {<interface> | sw_if_index <sw_idx>}", + .function = bond_delete_command_fn, +}; +/* *INDENT-ON* */ + +void +bond_enslave (vlib_main_t * vm, bond_enslave_args_t * args) +{ + bond_main_t *bm = &bond_main; + vnet_main_t *vnm = vnet_get_main (); + bond_if_t *bif; + slave_if_t *sif; + vnet_interface_main_t *im = &vnm->interface_main; + vnet_hw_interface_t *hw, *hw2; + vnet_sw_interface_t *sw; + + bif = bond_get_master_by_sw_if_index (args->group); + if (!bif) + { + args->rv = VNET_API_ERROR_INVALID_INTERFACE; + args->error = clib_error_return (0, "bond interface not found"); + return; + } + // make sure the interface is not already enslaved + if (bond_get_slave_by_sw_if_index (args->slave)) + { + args->rv = VNET_API_ERROR_VALUE_EXIST; + args->error = clib_error_return (0, "interface was already enslaved"); + return; + } + hw = vnet_get_sup_hw_interface (vnm, args->slave); + if (hw->dev_class_index == bond_dev_class.index) + { + args->rv = VNET_API_ERROR_INVALID_INTERFACE; + args->error = + clib_error_return (0, "bond interface cannot be enslaved"); + return; + } + pool_get (bm->neighbors, sif); + memset (sif, 0, sizeof (*sif)); + clib_spinlock_init (&sif->lockp); + sw = pool_elt_at_index (im->sw_interfaces, args->slave); + sif->port_enabled = sw->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP; + sif->sw_if_index = sw->sw_if_index; + sif->hw_if_index = sw->hw_if_index; + sif->packet_template_index = (u8) ~ 0; + sif->is_passive = args->is_passive; + sif->group = args->group; + sif->bif_dev_instance = bif->dev_instance; + sif->mode = bif->mode; + + sif->is_long_timeout = args->is_long_timeout; + if (args->is_long_timeout) + sif->ttl_in_seconds = LACP_LONG_TIMOUT_TIME; + else + sif->ttl_in_seconds = LACP_SHORT_TIMOUT_TIME; + + hash_set (bm->neighbor_by_sw_if_index, sif->sw_if_index, + sif - bm->neighbors); + vec_add1 (bif->slaves, sif->sw_if_index); + + hw = vnet_get_sup_hw_interface (vnm, sif->sw_if_index); + /* Save the old mac */ + memcpy (sif->persistent_hw_address, hw->hw_address, 6); + if (bif->use_custom_mac) + { + vnet_hw_interface_change_mac_address (vnm, hw->hw_if_index, + bif->hw_address); + } + else + { + // bond interface gets the mac address from the first slave + if (vec_len (bif->slaves) == 1) + { + memcpy (bif->hw_address, hw->hw_address, 6); + hw2 = vnet_get_sup_hw_interface (vnm, bif->sw_if_index); + vnet_hw_interface_change_mac_address (vnm, hw2->hw_if_index, + hw->hw_address); + } + else + { + // subsequent slaves gets the mac address of the bond interface + vnet_hw_interface_change_mac_address (vnm, hw->hw_if_index, + bif->hw_address); + } + } + + if ((bif->mode == BOND_MODE_LACP) && bm->lacp_enable_disable) + { + (*bm->lacp_enable_disable) (vm, bif, sif, 1); + } + else + { + bond_enable_collecting_distributing (vm, sif); + } + + args->rv = vnet_feature_enable_disable ("device-input", "bond-input", + hw->hw_if_index, 1, 0, 0); + + if (args->rv) + { + args->error = + clib_error_return (0, + "Error encountered on input feature arc enable"); + } +} + +static clib_error_t * +enslave_interface_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + bond_enslave_args_t args = { 0 }; + unformat_input_t _line_input, *line_input = &_line_input; + vnet_main_t *vnm = vnet_get_main (); + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return clib_error_return (0, "Missing required arguments."); + + args.slave = ~0; + args.group = ~0; + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "interface %U", + unformat_vnet_sw_interface, vnm, &args.slave)) + ; + else if (unformat (line_input, "to %U", unformat_vnet_sw_interface, vnm, + &args.group)) + ; + else if (unformat (line_input, "passive")) + args.is_passive = 1; + else if (unformat (line_input, "long-timeout")) + args.is_long_timeout = 1; + else + { + args.error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + break; + } + } + unformat_free (line_input); + + if (args.error) + return args.error; + if (args.group == ~0) + return clib_error_return (0, "Missing bond interface"); + if (args.slave == ~0) + return clib_error_return (0, "please specify valid interface name"); + + bond_enslave (vm, &args); + + return args.error; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (enslave_interface_command, static) = { + .path = "enslave", + .short_help = "enslave interface <interface> to <BondEthernetx> [passive] [long-timeout]", + .function = enslave_interface_command_fn, +}; +/* *INDENT-ON* */ + +void +bond_detach_slave (vlib_main_t * vm, bond_detach_slave_args_t * args) +{ + bond_if_t *bif; + slave_if_t *sif; + + sif = bond_get_slave_by_sw_if_index (args->slave); + if (!sif) + { + args->rv = VNET_API_ERROR_INVALID_INTERFACE; + args->error = clib_error_return (0, "interface was not enslaved"); + return; + } + bif = bond_get_master_by_dev_instance (sif->bif_dev_instance); + bond_delete_neighbor (vm, bif, sif); +} + +static clib_error_t * +detach_interface_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + bond_detach_slave_args_t args = { 0 }; + unformat_input_t _line_input, *line_input = &_line_input; + vnet_main_t *vnm = vnet_get_main (); + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return clib_error_return (0, "Missing required arguments."); + + args.slave = ~0; + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "interface %U", + unformat_vnet_sw_interface, vnm, &args.slave)) + ; + else + { + args.error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + break; + } + } + unformat_free (line_input); + + if (args.error) + return args.error; + if (args.slave == ~0) + return clib_error_return (0, "please specify valid interface name"); + + bond_detach_slave (vm, &args); + + return args.error; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (detach_interface_command, static) = { + .path = "detach", + .short_help = "detach interface <interface>", + .function = detach_interface_command_fn, +}; +/* *INDENT-ON* */ + +static void +show_bond (vlib_main_t * vm) +{ + bond_main_t *bm = &bond_main; + bond_if_t *bif; + + vlib_cli_output (vm, "%-16s %-12s %-12s %-13s %-14s %s", + "interface name", "sw_if_index", "mode", + "load balance", "active slaves", "slaves"); + + /* *INDENT-OFF* */ + pool_foreach (bif, bm->interfaces, + ({ + vlib_cli_output (vm, "%-16U %-12d %-12U %-13U %-14u %u", + format_bond_interface_name, bif->dev_instance, + bif->sw_if_index, format_bond_mode, bif->mode, + format_bond_load_balance, bif->lb, + vec_len (bif->active_slaves), vec_len (bif->slaves)); + })); + /* *INDENT-ON* */ +} + +static void +show_bond_details (vlib_main_t * vm) +{ + bond_main_t *bm = &bond_main; + bond_if_t *bif; + u32 *sw_if_index; + + /* *INDENT-OFF* */ + pool_foreach (bif, bm->interfaces, + ({ + vlib_cli_output (vm, "%U", format_bond_interface_name, bif->dev_instance); + vlib_cli_output (vm, " mode: %U", + format_bond_mode, bif->mode); + vlib_cli_output (vm, " load balance: %U", + format_bond_load_balance, bif->lb); + if (bif->mode == BOND_MODE_ROUND_ROBIN) + vlib_cli_output (vm, " last xmit slave index: %u", + bif->lb_rr_last_index); + vlib_cli_output (vm, " number of active slaves: %d", + vec_len (bif->active_slaves)); + vec_foreach (sw_if_index, bif->active_slaves) + { + vlib_cli_output (vm, " %U", format_vnet_sw_if_index_name, + vnet_get_main (), *sw_if_index); + } + vlib_cli_output (vm, " number of slaves: %d", vec_len (bif->slaves)); + vec_foreach (sw_if_index, bif->slaves) + { + vlib_cli_output (vm, " %U", format_vnet_sw_if_index_name, + vnet_get_main (), *sw_if_index); + } + vlib_cli_output (vm, " device instance: %d", bif->dev_instance); + vlib_cli_output (vm, " sw_if_index: %d", bif->sw_if_index); + vlib_cli_output (vm, " hw_if_index: %d", bif->hw_if_index); + })); + /* *INDENT-ON* */ +} + +static clib_error_t * +show_bond_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u8 details = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "details")) + details = 1; + else + { + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + } + + if (details) + show_bond_details (vm); + else + show_bond (vm); + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_bond_command, static) = { + .path = "show bond", + .short_help = "show bond [details]", + .function = show_bond_fn, +}; +/* *INDENT-ON* */ + +clib_error_t * +bond_cli_init (vlib_main_t * vm) +{ + bond_main_t *bm = &bond_main; + + bm->vlib_main = vm; + bm->vnet_main = vnet_get_main (); + bm->neighbor_by_sw_if_index = hash_create (0, sizeof (uword)); + + return 0; +} + +VLIB_INIT_FUNCTION (bond_cli_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/bonding/device.c b/src/vnet/bonding/device.c new file mode 100644 index 00000000000..8f9b3a95591 --- /dev/null +++ b/src/vnet/bonding/device.c @@ -0,0 +1,610 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#define _GNU_SOURCE +#include <stdint.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ip/ip4_packet.h> +#include <vnet/ip/ip6_packet.h> +#include <vnet/ip/ip6_hop_by_hop_packet.h> +#include <vnet/bonding/node.h> + +#define foreach_bond_tx_error \ + _(NONE, "no error") \ + _(IF_DOWN, "interface down") \ + _(NO_SLAVE, "no slave") + +typedef enum +{ +#define _(f,s) BOND_TX_ERROR_##f, + foreach_bond_tx_error +#undef _ + BOND_TX_N_ERROR, +} bond_tx_error_t; + +static char *bond_tx_error_strings[] = { +#define _(n,s) s, + foreach_bond_tx_error +#undef _ +}; + +static u8 * +format_bond_tx_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + bond_packet_trace_t *t = va_arg (*args, bond_packet_trace_t *); + vnet_hw_interface_t *hw, *hw1; + vnet_main_t *vnm = vnet_get_main (); + + hw = vnet_get_sup_hw_interface (vnm, t->sw_if_index); + hw1 = vnet_get_sup_hw_interface (vnm, t->bond_sw_if_index); + s = format (s, "src %U, dst %U, %s -> %s", + format_ethernet_address, t->ethernet.src_address, + format_ethernet_address, t->ethernet.dst_address, + hw->name, hw1->name); + + return s; +} + +u8 * +format_bond_interface_name (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + bond_main_t *bm = &bond_main; + bond_if_t *bif = pool_elt_at_index (bm->interfaces, dev_instance); + + s = format (s, "BondEthernet%lu", bif->dev_instance); + + return s; +} + +static __clib_unused clib_error_t * +bond_subif_add_del_function (vnet_main_t * vnm, u32 hw_if_index, + struct vnet_sw_interface_t *st, int is_add) +{ + /* Nothing for now */ + return 0; +} + +static clib_error_t * +bond_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) +{ + vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index); + uword is_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0; + bond_main_t *bm = &bond_main; + bond_if_t *bif = pool_elt_at_index (bm->interfaces, hif->dev_instance); + + bif->admin_up = is_up; + if (is_up && vec_len (bif->active_slaves)) + vnet_hw_interface_set_flags (vnm, bif->hw_if_index, + VNET_HW_INTERFACE_FLAG_LINK_UP); + return 0; +} + +static inline u32 +bond_load_balance_broadcast (vlib_main_t * vm, vlib_node_runtime_t * node, + bond_if_t * bif, vlib_buffer_t * b0) +{ + vnet_main_t *vnm = vnet_get_main (); + vlib_buffer_t *c0; + int i; + u32 *to_next = 0; + u32 sw_if_index; + vlib_frame_t *f; + + + for (i = 1; i < vec_len (bif->active_slaves); i++) + { + sw_if_index = *vec_elt_at_index (bif->active_slaves, i); + f = vnet_get_frame_to_sw_interface (vnm, sw_if_index); + to_next = vlib_frame_vector_args (f); + to_next += f->n_vectors; + c0 = vlib_buffer_copy (vm, b0); + if (PREDICT_TRUE (c0 != 0)) + { + vnet_buffer (c0)->sw_if_index[VLIB_TX] = sw_if_index; + to_next[0] = vlib_get_buffer_index (vm, c0); + f->n_vectors++; + vnet_put_frame_to_sw_interface (vnm, sw_if_index, f); + } + } + + return 0; +} + +static inline u32 +bond_load_balance_l2 (vlib_main_t * vm, vlib_node_runtime_t * node, + bond_if_t * bif, vlib_buffer_t * b0) +{ + ethernet_header_t *eth = (ethernet_header_t *) vlib_buffer_get_current (b0); + u32 a = 0, b = 0, c = 0, t1, t2; + u16 t11, t22; + + memcpy (&t1, eth->src_address, sizeof (t1)); + memcpy (&t11, ð->src_address[4], sizeof (t11)); + a = t1 ^ t11; + + memcpy (&t2, eth->dst_address, sizeof (t2)); + memcpy (&t22, ð->dst_address[4], sizeof (t22)); + b = t2 ^ t22; + + hash_v3_mix32 (a, b, c); + hash_v3_finalize32 (a, b, c); + + return c % vec_len (bif->active_slaves); +} + +static inline u16 * +bond_locate_ethertype (ethernet_header_t * eth) +{ + u16 *ethertype_p; + ethernet_vlan_header_t *vlan; + + if (!ethernet_frame_is_tagged (clib_net_to_host_u16 (eth->type))) + { + ethertype_p = ð->type; + } + else + { + vlan = (void *) (eth + 1); + ethertype_p = &vlan->type; + if (*ethertype_p == ntohs (ETHERNET_TYPE_VLAN)) + { + vlan++; + ethertype_p = &vlan->type; + } + } + return ethertype_p; +} + +static inline u32 +bond_load_balance_l23 (vlib_main_t * vm, vlib_node_runtime_t * node, + bond_if_t * bif, vlib_buffer_t * b0) +{ + ethernet_header_t *eth = (ethernet_header_t *) vlib_buffer_get_current (b0); + u8 ip_version; + ip4_header_t *ip4; + u16 ethertype, *ethertype_p; + + ethertype_p = bond_locate_ethertype (eth); + ethertype = *ethertype_p; + + if ((ethertype != htons (ETHERNET_TYPE_IP4)) && + (ethertype != htons (ETHERNET_TYPE_IP6))) + return (bond_load_balance_l2 (vm, node, bif, b0)); + + ip4 = (ip4_header_t *) (ethertype_p + 1); + ip_version = (ip4->ip_version_and_header_length >> 4); + + if (ip_version == 0x4) + { + u16 t11, t22; + u32 a = 0, b = 0, c = 0, t1, t2; + + memcpy (&t1, eth->src_address, sizeof (t1)); + memcpy (&t11, ð->src_address[4], sizeof (t11)); + a = t1 ^ t11; + + memcpy (&t2, eth->dst_address, sizeof (t2)); + memcpy (&t22, ð->dst_address[4], sizeof (t22)); + b = t2 ^ t22; + + c = ip4->src_address.data_u32 ^ ip4->dst_address.data_u32; + + hash_v3_mix32 (a, b, c); + hash_v3_finalize32 (a, b, c); + + return c % vec_len (bif->active_slaves); + } + else if (ip_version == 0x6) + { + u64 a, b, c; + u64 t1 = 0, t2 = 0; + ip6_header_t *ip6 = (ip6_header_t *) (eth + 1); + + memcpy (&t1, eth->src_address, sizeof (eth->src_address)); + memcpy (&t2, eth->dst_address, sizeof (eth->dst_address)); + a = t1 ^ t2; + + b = (ip6->src_address.as_u64[0] ^ ip6->src_address.as_u64[1]); + c = (ip6->dst_address.as_u64[0] ^ ip6->dst_address.as_u64[1]); + + hash_mix64 (a, b, c); + return c % vec_len (bif->active_slaves); + } + return (bond_load_balance_l2 (vm, node, bif, b0)); +} + +static inline u32 +bond_load_balance_l34 (vlib_main_t * vm, vlib_node_runtime_t * node, + bond_if_t * bif, vlib_buffer_t * b0) +{ + ethernet_header_t *eth = (ethernet_header_t *) vlib_buffer_get_current (b0); + u8 ip_version; + uword is_tcp_udp = 0; + ip4_header_t *ip4; + u16 ethertype, *ethertype_p; + + ethertype_p = bond_locate_ethertype (eth); + ethertype = *ethertype_p; + + if ((ethertype != htons (ETHERNET_TYPE_IP4)) && + (ethertype != htons (ETHERNET_TYPE_IP6))) + return (bond_load_balance_l2 (vm, node, bif, b0)); + + ip4 = (ip4_header_t *) (ethertype_p + 1); + ip_version = (ip4->ip_version_and_header_length >> 4); + + if (ip_version == 0x4) + { + u32 a = 0, b = 0, c = 0, t1, t2; + tcp_header_t *tcp = (void *) (ip4 + 1); + is_tcp_udp = (ip4->protocol == IP_PROTOCOL_TCP) || + (ip4->protocol == IP_PROTOCOL_UDP); + + a = ip4->src_address.data_u32 ^ ip4->dst_address.data_u32; + + t1 = is_tcp_udp ? tcp->src : 0; + t2 = is_tcp_udp ? tcp->dst : 0; + b = t1 + (t2 << 16); + + hash_v3_mix32 (a, b, c); + hash_v3_finalize32 (a, b, c); + + return c % vec_len (bif->active_slaves); + } + else if (ip_version == 0x6) + { + u64 a, b, c; + u64 t1, t2; + ip6_header_t *ip6 = (ip6_header_t *) (eth + 1); + tcp_header_t *tcp = (void *) (ip6 + 1); + + if (PREDICT_TRUE ((ip6->protocol == IP_PROTOCOL_TCP) || + (ip6->protocol == IP_PROTOCOL_UDP))) + { + is_tcp_udp = 1; + tcp = (void *) (ip6 + 1); + } + else if (ip6->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) + { + ip6_hop_by_hop_header_t *hbh = + (ip6_hop_by_hop_header_t *) (ip6 + 1); + if ((hbh->protocol == IP_PROTOCOL_TCP) + || (hbh->protocol == IP_PROTOCOL_UDP)) + { + is_tcp_udp = 1; + tcp = (tcp_header_t *) ((u8 *) hbh + ((hbh->length + 1) << 3)); + } + } + a = (ip6->src_address.as_u64[0] ^ ip6->src_address.as_u64[1]); + b = (ip6->dst_address.as_u64[0] ^ ip6->dst_address.as_u64[1]); + + t1 = is_tcp_udp ? tcp->src : 0; + t2 = is_tcp_udp ? tcp->dst : 0; + c = (t2 << 16) | t1; + hash_mix64 (a, b, c); + + return c % vec_len (bif->active_slaves); + } + + return (bond_load_balance_l2 (vm, node, bif, b0)); +} + +static inline u32 +bond_load_balance_round_robin (vlib_main_t * vm, + vlib_node_runtime_t * node, + bond_if_t * bif, vlib_buffer_t * b0) +{ + bif->lb_rr_last_index++; + bif->lb_rr_last_index %= vec_len (bif->active_slaves); + + return bif->lb_rr_last_index; +} + +static inline u32 +bond_load_balance_active_backup (vlib_main_t * vm, + vlib_node_runtime_t * node, + bond_if_t * bif, vlib_buffer_t * b0) +{ + /* First interface is the active, the rest is backup */ + return 0; +} + +static bond_load_balance_func_t bond_load_balance_table[] = { +#define _(v,f,s, p) { bond_load_balance_##p }, + foreach_bond_lb_algo +#undef _ +}; + +static uword +bond_tx_fn (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + vnet_interface_output_runtime_t *rund = (void *) node->runtime_data; + bond_main_t *bm = &bond_main; + bond_if_t *bif = pool_elt_at_index (bm->interfaces, rund->dev_instance); + u32 bi0, bi1, bi2, bi3; + vlib_buffer_t *b0, *b1, *b2, *b3; + u32 *from = vlib_frame_vector_args (frame); + u32 n_left_from; + ethernet_header_t *eth; + u32 next0 = 0, next1 = 0, next2 = 0, next3 = 0; + u32 port, port1, port2, port3; + u32 sw_if_index, sw_if_index1, sw_if_index2, sw_if_index3; + bond_packet_trace_t *t0; + uword n_trace = vlib_get_trace_count (vm, node); + u16 thread_index = vlib_get_thread_index (); + vnet_main_t *vnm = vnet_get_main (); + u32 *to_next, *to_next1, *to_next2, *to_next3; + u32 sif_if_index, sif_if_index1, sif_if_index2, sif_if_index3; + vlib_frame_t *f, *f1, *f2, *f3; + + if (PREDICT_FALSE (bif->admin_up == 0)) + { + vlib_buffer_free (vm, vlib_frame_args (frame), frame->n_vectors); + vlib_increment_simple_counter (vnet_main.interface_main.sw_if_counters + + VNET_INTERFACE_COUNTER_DROP, + thread_index, bif->sw_if_index, + frame->n_vectors); + vlib_error_count (vm, node->node_index, BOND_TX_ERROR_IF_DOWN, + frame->n_vectors); + return frame->n_vectors; + } + + if (PREDICT_FALSE (vec_len (bif->active_slaves) == 0)) + { + bi0 = from[0]; + b0 = vlib_get_buffer (vm, bi0); + vlib_increment_combined_counter + (vnet_main.interface_main.combined_sw_if_counters + + VNET_INTERFACE_COUNTER_TX, thread_index, bif->sw_if_index, + frame->n_vectors, b0->current_length); + + vlib_buffer_free (vm, vlib_frame_args (frame), frame->n_vectors); + vlib_increment_simple_counter (vnet_main.interface_main.sw_if_counters + + VNET_INTERFACE_COUNTER_DROP, + thread_index, bif->sw_if_index, + frame->n_vectors); + vlib_error_count (vm, node->node_index, BOND_TX_ERROR_NO_SLAVE, + frame->n_vectors); + return frame->n_vectors; + } + + /* Number of buffers / pkts */ + n_left_from = frame->n_vectors; + + while (n_left_from >= 8) + { + // Prefetch next iteration + { + vlib_buffer_t *p4, *p5, *p6, *p7; + + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + p6 = vlib_get_buffer (vm, from[6]); + p7 = vlib_get_buffer (vm, from[7]); + + vlib_prefetch_buffer_header (p4, STORE); + vlib_prefetch_buffer_header (p5, STORE); + vlib_prefetch_buffer_header (p6, STORE); + vlib_prefetch_buffer_header (p7, STORE); + + CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, LOAD); + } + + bi0 = from[0]; + bi1 = from[1]; + bi2 = from[2]; + bi3 = from[3]; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + b2 = vlib_get_buffer (vm, bi2); + b3 = vlib_get_buffer (vm, bi3); + + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b1); + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b2); + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b3); + + sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_TX]; + sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_TX]; + sw_if_index2 = vnet_buffer (b2)->sw_if_index[VLIB_TX]; + sw_if_index3 = vnet_buffer (b3)->sw_if_index[VLIB_TX]; + + port = + (bond_load_balance_table[bif->lb]).load_balance (vm, node, bif, b0); + port1 = + (bond_load_balance_table[bif->lb]).load_balance (vm, node, bif, b1); + port2 = + (bond_load_balance_table[bif->lb]).load_balance (vm, node, bif, b2); + port3 = + (bond_load_balance_table[bif->lb]).load_balance (vm, node, bif, b3); + + sif_if_index = *vec_elt_at_index (bif->active_slaves, port); + sif_if_index1 = *vec_elt_at_index (bif->active_slaves, port1); + sif_if_index2 = *vec_elt_at_index (bif->active_slaves, port2); + sif_if_index3 = *vec_elt_at_index (bif->active_slaves, port3); + + vnet_buffer (b0)->sw_if_index[VLIB_TX] = sif_if_index; + vnet_buffer (b1)->sw_if_index[VLIB_TX] = sif_if_index1; + vnet_buffer (b2)->sw_if_index[VLIB_TX] = sif_if_index2; + vnet_buffer (b3)->sw_if_index[VLIB_TX] = sif_if_index3; + + f = vnet_get_frame_to_sw_interface (vnm, sif_if_index); + f1 = vnet_get_frame_to_sw_interface (vnm, sif_if_index1); + f2 = vnet_get_frame_to_sw_interface (vnm, sif_if_index2); + f3 = vnet_get_frame_to_sw_interface (vnm, sif_if_index3); + + to_next = vlib_frame_vector_args (f); + to_next1 = vlib_frame_vector_args (f1); + to_next2 = vlib_frame_vector_args (f2); + to_next3 = vlib_frame_vector_args (f3); + + to_next += f->n_vectors; + to_next1 += f1->n_vectors; + to_next2 += f2->n_vectors; + to_next3 += f3->n_vectors; + + to_next[0] = vlib_get_buffer_index (vm, b0); + to_next1[0] = vlib_get_buffer_index (vm, b1); + to_next2[0] = vlib_get_buffer_index (vm, b2); + to_next3[0] = vlib_get_buffer_index (vm, b3); + + f->n_vectors++; + f1->n_vectors++; + f2->n_vectors++; + f3->n_vectors++; + + vnet_put_frame_to_sw_interface (vnm, sif_if_index, f); + vnet_put_frame_to_sw_interface (vnm, sif_if_index1, f1); + vnet_put_frame_to_sw_interface (vnm, sif_if_index2, f2); + vnet_put_frame_to_sw_interface (vnm, sif_if_index3, f3); + + if (PREDICT_FALSE (n_trace > 0)) + { + vlib_trace_buffer (vm, node, next0, b0, 0 /* follow_chain */ ); + vlib_set_trace_count (vm, node, --n_trace); + t0 = vlib_add_trace (vm, node, b0, sizeof (*t0)); + eth = (ethernet_header_t *) vlib_buffer_get_current (b0); + t0->ethernet = *eth; + t0->sw_if_index = sw_if_index; + t0->bond_sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_TX]; + + if (PREDICT_TRUE (n_trace > 0)) + { + vlib_trace_buffer (vm, node, next1, b1, 0 /* follow_chain */ ); + vlib_set_trace_count (vm, node, --n_trace); + t0 = vlib_add_trace (vm, node, b1, sizeof (*t0)); + eth = (ethernet_header_t *) vlib_buffer_get_current (b1); + t0->ethernet = *eth; + t0->sw_if_index = sw_if_index1; + t0->bond_sw_if_index = vnet_buffer (b1)->sw_if_index[VLIB_TX]; + + if (PREDICT_TRUE (n_trace > 0)) + { + vlib_trace_buffer (vm, node, next2, b2, + 0 /* follow_chain */ ); + vlib_set_trace_count (vm, node, --n_trace); + t0 = vlib_add_trace (vm, node, b2, sizeof (*t0)); + eth = (ethernet_header_t *) vlib_buffer_get_current (b2); + t0->ethernet = *eth; + t0->sw_if_index = sw_if_index2; + t0->bond_sw_if_index = + vnet_buffer (b2)->sw_if_index[VLIB_TX]; + + if (PREDICT_TRUE (n_trace > 0)) + { + vlib_trace_buffer (vm, node, next3, b3, + 0 /* follow_chain */ ); + vlib_set_trace_count (vm, node, --n_trace); + t0 = vlib_add_trace (vm, node, b3, sizeof (*t0)); + eth = + (ethernet_header_t *) vlib_buffer_get_current (b3); + t0->ethernet = *eth; + t0->sw_if_index = sw_if_index3; + t0->bond_sw_if_index = + vnet_buffer (b3)->sw_if_index[VLIB_TX]; + } + } + } + } + + from += 4; + n_left_from -= 4; + } + + while (n_left_from > 0) + { + // Prefetch next iteration + if (n_left_from > 1) + { + vlib_buffer_t *p2; + + p2 = vlib_get_buffer (vm, from[1]); + vlib_prefetch_buffer_header (p2, STORE); + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, LOAD); + } + + bi0 = from[0]; + b0 = vlib_get_buffer (vm, bi0); + + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); + + sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_TX]; + + port = + (bond_load_balance_table[bif->lb]).load_balance (vm, node, bif, b0); + sif_if_index = *vec_elt_at_index (bif->active_slaves, port); + vnet_buffer (b0)->sw_if_index[VLIB_TX] = sif_if_index; + f = vnet_get_frame_to_sw_interface (vnm, sif_if_index); + to_next = vlib_frame_vector_args (f); + to_next += f->n_vectors; + + to_next[0] = vlib_get_buffer_index (vm, b0); + f->n_vectors++; + vnet_put_frame_to_sw_interface (vnm, sif_if_index, f); + + if (PREDICT_FALSE (n_trace > 0)) + { + vlib_trace_buffer (vm, node, next0, b0, 0 /* follow_chain */ ); + vlib_set_trace_count (vm, node, --n_trace); + t0 = vlib_add_trace (vm, node, b0, sizeof (*t0)); + eth = (ethernet_header_t *) vlib_buffer_get_current (b0); + t0->ethernet = *eth; + t0->sw_if_index = sw_if_index; + t0->bond_sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_TX]; + } + + from += 1; + n_left_from -= 1; + } + + vlib_increment_simple_counter (vnet_main.interface_main.sw_if_counters + + VNET_INTERFACE_COUNTER_TX, thread_index, + bif->sw_if_index, frame->n_vectors); + + return frame->n_vectors; +} + +/* *INDENT-OFF* */ +VNET_DEVICE_CLASS (bond_dev_class) = { + .name = "bond", + .tx_function = bond_tx_fn, + .tx_function_n_errors = BOND_TX_N_ERROR, + .tx_function_error_strings = bond_tx_error_strings, + .format_device_name = format_bond_interface_name, + .admin_up_down_function = bond_interface_admin_up_down, + .subif_add_del_function = bond_subif_add_del_function, + .format_tx_trace = format_bond_tx_trace, +}; + +VLIB_DEVICE_TX_FUNCTION_MULTIARCH (bond_dev_class, bond_tx_fn) +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/bonding/node.c b/src/vnet/bonding/node.c new file mode 100644 index 00000000000..4deec829195 --- /dev/null +++ b/src/vnet/bonding/node.c @@ -0,0 +1,509 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#define _GNU_SOURCE +#include <stdint.h> +#include <vnet/llc/llc.h> +#include <vnet/snap/snap.h> +#include <vnet/bonding/node.h> + +bond_main_t bond_main; + +#define foreach_bond_input_error \ + _(NONE, "no error") \ + _(IF_DOWN, "interface down") \ + _(NO_SLAVE, "no slave") \ + _(NO_BOND, "no bond interface")\ + _(PASS_THRU, "pass through") + +typedef enum +{ +#define _(f,s) BOND_INPUT_ERROR_##f, + foreach_bond_input_error +#undef _ + BOND_INPUT_N_ERROR, +} bond_input_error_t; + +static char *bond_input_error_strings[] = { +#define _(n,s) s, + foreach_bond_input_error +#undef _ +}; + +static u8 * +format_bond_input_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + bond_packet_trace_t *t = va_arg (*args, bond_packet_trace_t *); + vnet_hw_interface_t *hw, *hw1; + vnet_main_t *vnm = vnet_get_main (); + + hw = vnet_get_sup_hw_interface (vnm, t->sw_if_index); + hw1 = vnet_get_sup_hw_interface (vnm, t->bond_sw_if_index); + s = format (s, "src %U, dst %U, %s -> %s", + format_ethernet_address, t->ethernet.src_address, + format_ethernet_address, t->ethernet.dst_address, + hw->name, hw1->name); + + return s; +} + +static_always_inline u8 +packet_is_cdp (ethernet_header_t * eth) +{ + llc_header_t *llc; + snap_header_t *snap; + + llc = (llc_header_t *) (eth + 1); + snap = (snap_header_t *) (llc + 1); + + return ((eth->type == htons (ETHERNET_TYPE_CDP)) || + ((llc->src_sap == 0xAA) && (llc->control == 0x03) && + (snap->protocol == htons (0x2000)) && + (snap->oui[0] == 0) && (snap->oui[1] == 0) && + (snap->oui[2] == 0x0C))); +} + +static inline void +bond_sw_if_index_rewrite (vlib_main_t * vm, vlib_node_runtime_t * node, + slave_if_t * sif, ethernet_header_t * eth, + vlib_buffer_t * b0) +{ + bond_if_t *bif; + u16 thread_index = vlib_get_thread_index (); + u16 *ethertype_p, ethertype; + ethernet_vlan_header_t *vlan; + + if (PREDICT_TRUE (sif != 0)) + { + bif = bond_get_master_by_sw_if_index (sif->group); + if (PREDICT_TRUE (bif != 0)) + { + if (PREDICT_TRUE (vec_len (bif->slaves) >= 1)) + { + if (PREDICT_TRUE (bif->admin_up == 1)) + { + if (!ethernet_frame_is_tagged (ntohs (eth->type))) + { + // Let some layer2 packets pass through. + if (PREDICT_TRUE ((eth->type != + htons (ETHERNET_TYPE_SLOW_PROTOCOLS)) + && !packet_is_cdp (eth) + && (eth->type != + htons + (ETHERNET_TYPE_802_1_LLDP)))) + { + // Change the physical interface to + // bond interface + vnet_buffer (b0)->sw_if_index[VLIB_RX] = + bif->sw_if_index; + + /* increase rx counters */ + vlib_increment_simple_counter + (vnet_main.interface_main.sw_if_counters + + VNET_INTERFACE_COUNTER_RX, thread_index, + bif->sw_if_index, 1); + } + else + { + vlib_error_count (vm, node->node_index, + BOND_INPUT_ERROR_PASS_THRU, 1); + } + } + else + { + vlan = (void *) (eth + 1); + ethertype_p = &vlan->type; + if (*ethertype_p == ntohs (ETHERNET_TYPE_VLAN)) + { + vlan++; + ethertype_p = &vlan->type; + } + ethertype = *ethertype_p; + if (PREDICT_TRUE ((ethertype != + htons (ETHERNET_TYPE_SLOW_PROTOCOLS)) + && (ethertype != + htons (ETHERNET_TYPE_CDP)) + && (ethertype != + htons + (ETHERNET_TYPE_802_1_LLDP)))) + { + // Change the physical interface to + // bond interface + vnet_buffer (b0)->sw_if_index[VLIB_RX] = + bif->sw_if_index; + + /* increase rx counters */ + vlib_increment_simple_counter + (vnet_main.interface_main.sw_if_counters + + VNET_INTERFACE_COUNTER_RX, thread_index, + bif->sw_if_index, 1); + } + else + { + vlib_error_count (vm, node->node_index, + BOND_INPUT_ERROR_PASS_THRU, 1); + } + } + } + else + { + vlib_error_count (vm, node->node_index, + BOND_INPUT_ERROR_IF_DOWN, 1); + } + } + else + { + vlib_error_count (vm, node->node_index, + BOND_INPUT_ERROR_NO_SLAVE, 1); + } + } + else + { + vlib_error_count (vm, node->node_index, + BOND_INPUT_ERROR_NO_BOND, 1); + } + } + else + { + vlib_error_count (vm, node->node_index, BOND_INPUT_ERROR_NO_SLAVE, 1); + } + +} + +static uword +bond_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 bi0, bi1, bi2, bi3; + vlib_buffer_t *b0, *b1, *b2, *b3; + u32 next_index; + u32 *from, *to_next, n_left_from, n_left_to_next; + ethernet_header_t *eth, *eth1, *eth2, *eth3; + u32 next0, next1, next2, next3; + bond_packet_trace_t *t0; + uword n_trace = vlib_get_trace_count (vm, node); + u32 sw_if_index, sw_if_index1, sw_if_index2, sw_if_index3; + slave_if_t *sif, *sif1, *sif2, *sif3; + u16 thread_index = vlib_get_thread_index (); + + /* Vector of buffer / pkt indices we're supposed to process */ + from = vlib_frame_vector_args (frame); + + /* Number of buffers / pkts */ + n_left_from = frame->n_vectors; + + /* Speculatively send the first buffer to the last disposition we used */ + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + /* set up to enqueue to our disposition with index = next_index */ + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 12 && n_left_to_next >= 4) + { + // Prefetch next iteration + { + vlib_buffer_t *b4, *b5, *b6, *b7; + + b4 = vlib_get_buffer (vm, from[4]); + b5 = vlib_get_buffer (vm, from[5]); + b6 = vlib_get_buffer (vm, from[6]); + b7 = vlib_get_buffer (vm, from[7]); + + vlib_prefetch_buffer_header (b4, STORE); + vlib_prefetch_buffer_header (b5, STORE); + vlib_prefetch_buffer_header (b6, STORE); + vlib_prefetch_buffer_header (b7, STORE); + + CLIB_PREFETCH (b4->data, CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (b5->data, CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (b6->data, CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (b7->data, CLIB_CACHE_LINE_BYTES, LOAD); + } + + next0 = 0; + next1 = 0; + next2 = 0; + next3 = 0; + + bi0 = from[0]; + bi1 = from[1]; + bi2 = from[2]; + bi3 = from[3]; + + to_next[0] = bi0; + to_next[1] = bi1; + to_next[2] = bi2; + to_next[3] = bi3; + + from += 4; + to_next += 4; + n_left_from -= 4; + n_left_to_next -= 4; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + b2 = vlib_get_buffer (vm, bi2); + b3 = vlib_get_buffer (vm, bi3); + + vnet_feature_next (vnet_buffer (b0)->sw_if_index[VLIB_RX], &next0, + b0); + vnet_feature_next (vnet_buffer (b1)->sw_if_index[VLIB_RX], &next1, + b1); + vnet_feature_next (vnet_buffer (b2)->sw_if_index[VLIB_RX], &next2, + b2); + vnet_feature_next (vnet_buffer (b3)->sw_if_index[VLIB_RX], &next3, + b3); + + eth = (ethernet_header_t *) vlib_buffer_get_current (b0); + eth1 = (ethernet_header_t *) vlib_buffer_get_current (b1); + eth2 = (ethernet_header_t *) vlib_buffer_get_current (b2); + eth3 = (ethernet_header_t *) vlib_buffer_get_current (b3); + + sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX]; + sw_if_index2 = vnet_buffer (b2)->sw_if_index[VLIB_RX]; + sw_if_index3 = vnet_buffer (b3)->sw_if_index[VLIB_RX]; + + // sw_if_index points to the physical interface + sif = bond_get_slave_by_sw_if_index (sw_if_index); + sif1 = bond_get_slave_by_sw_if_index (sw_if_index1); + sif2 = bond_get_slave_by_sw_if_index (sw_if_index2); + sif3 = bond_get_slave_by_sw_if_index (sw_if_index3); + + bond_sw_if_index_rewrite (vm, node, sif, eth, b0); + bond_sw_if_index_rewrite (vm, node, sif1, eth1, b1); + bond_sw_if_index_rewrite (vm, node, sif2, eth2, b2); + bond_sw_if_index_rewrite (vm, node, sif3, eth3, b3); + + if (PREDICT_FALSE (n_trace > 0)) + { + vlib_trace_buffer (vm, node, next0, b0, 0 /* follow_chain */ ); + vlib_set_trace_count (vm, node, --n_trace); + t0 = vlib_add_trace (vm, node, b0, sizeof (*t0)); + t0->ethernet = *eth; + t0->sw_if_index = sw_if_index; + t0->bond_sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + + if (PREDICT_TRUE (n_trace > 0)) + { + vlib_trace_buffer (vm, node, next1, b1, + 0 /* follow_chain */ ); + vlib_set_trace_count (vm, node, --n_trace); + t0 = vlib_add_trace (vm, node, b1, sizeof (*t0)); + t0->ethernet = *eth1; + t0->sw_if_index = sw_if_index1; + t0->bond_sw_if_index = + vnet_buffer (b1)->sw_if_index[VLIB_RX]; + + if (PREDICT_TRUE (n_trace > 0)) + { + vlib_trace_buffer (vm, node, next1, b2, + 0 /* follow_chain */ ); + vlib_set_trace_count (vm, node, --n_trace); + t0 = vlib_add_trace (vm, node, b2, sizeof (*t0)); + t0->ethernet = *eth2; + t0->sw_if_index = sw_if_index2; + t0->bond_sw_if_index = + vnet_buffer (b2)->sw_if_index[VLIB_RX]; + + if (PREDICT_TRUE (n_trace > 0)) + { + vlib_trace_buffer (vm, node, next1, b2, + 0 /* follow_chain */ ); + vlib_set_trace_count (vm, node, --n_trace); + t0 = vlib_add_trace (vm, node, b3, sizeof (*t0)); + t0->ethernet = *eth3; + t0->sw_if_index = sw_if_index3; + t0->bond_sw_if_index = + vnet_buffer (b3)->sw_if_index[VLIB_RX]; + } + } + } + } + + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b1); + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b2); + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b3); + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x4 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, bi2, bi3, next0, next1, + next2, next3); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + // Prefetch next iteration + if (n_left_from > 1) + { + vlib_buffer_t *p2; + + p2 = vlib_get_buffer (vm, from[1]); + vlib_prefetch_buffer_header (p2, STORE); + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, LOAD); + } + + next0 = 0; + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + vnet_feature_next (vnet_buffer (b0)->sw_if_index[VLIB_RX], &next0, + b0); + + eth = (ethernet_header_t *) vlib_buffer_get_current (b0); + + sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + // sw_if_index points to the physical interface + sif = bond_get_slave_by_sw_if_index (sw_if_index); + bond_sw_if_index_rewrite (vm, node, sif, eth, b0); + + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, bond_input_node.index, + BOND_INPUT_ERROR_NONE, frame->n_vectors); + + vnet_device_increment_rx_packets (thread_index, frame->n_vectors); + + return frame->n_vectors; +} + +static clib_error_t * +bond_input_init (vlib_main_t * vm) +{ + return 0; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (bond_input_node) = { + .function = bond_input_fn, + .name = "bond-input", + .vector_size = sizeof (u32), + .format_buffer = format_ethernet_header_with_length, + .format_trace = format_bond_input_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = BOND_INPUT_N_ERROR, + .error_strings = bond_input_error_strings, + .n_next_nodes = 0, + .next_nodes = + { + [0] = "error-drop" + } +}; + +VLIB_INIT_FUNCTION (bond_input_init); + +VNET_FEATURE_INIT (bond_input, static) = +{ + .arc_name = "device-input", + .node_name = "bond-input", + .runs_before = VNET_FEATURES ("ethernet-input"), +}; +VLIB_NODE_FUNCTION_MULTIARCH (bond_input_node, bond_input_fn) +/* *INDENT-ON* */ + +static clib_error_t * +bond_sw_interface_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags) +{ + bond_main_t *bm = &bond_main; + slave_if_t *sif; + vlib_main_t *vm = bm->vlib_main; + + sif = bond_get_slave_by_sw_if_index (sw_if_index); + if (sif) + { + sif->port_enabled = flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP; + if (sif->port_enabled == 0) + { + if (sif->lacp_enabled == 0) + { + bond_disable_collecting_distributing (vm, sif); + } + } + else + { + if (sif->lacp_enabled == 0) + { + bond_enable_collecting_distributing (vm, sif); + } + } + } + + return 0; +} + +VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (bond_sw_interface_up_down); + +static clib_error_t * +bond_hw_interface_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) +{ + bond_main_t *bm = &bond_main; + slave_if_t *sif; + vnet_sw_interface_t *sw; + vlib_main_t *vm = bm->vlib_main; + vnet_interface_main_t *im = &vnm->interface_main; + + sw = pool_elt_at_index (im->sw_interfaces, hw_if_index); + sif = bond_get_slave_by_sw_if_index (sw->sw_if_index); + if (sif) + { + if (!(flags & VNET_HW_INTERFACE_FLAG_LINK_UP)) + { + if (sif->lacp_enabled == 0) + { + bond_disable_collecting_distributing (vm, sif); + } + } + else + { + if (sif->lacp_enabled == 0) + { + bond_enable_collecting_distributing (vm, sif); + } + } + } + + return 0; +} + +VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION (bond_hw_interface_up_down); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/bonding/node.h b/src/vnet/bonding/node.h new file mode 100644 index 00000000000..74f3b1a356a --- /dev/null +++ b/src/vnet/bonding/node.h @@ -0,0 +1,451 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_vnet_bonding_node_h__ +#define __included_vnet_bonding_node_h__ + +#include <vlib/vlib.h> +#include <vlib/unix/unix.h> +#include <vppinfra/format.h> +#include <vppinfra/hash.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/interface.h> + +#define LACP_FAST_PERIODIC_TIMER 1.0 +#define LACP_SHORT_TIMOUT_TIME (LACP_FAST_PERIODIC_TIMER * 3) +#define LACP_SLOW_PERIODIC_TIMER 30.0 +#define LACP_LONG_TIMOUT_TIME (LACP_SLOW_PERIODIC_TIMER * 3) + +#ifndef MIN +#define MIN(x,y) (((x)<(y))?(x):(y)) +#endif + +#define foreach_bond_mode \ + _ (1, ROUND_ROBIN, "round-robin") \ + _ (2, ACTIVE_BACKUP, "active-backup") \ + _ (3, XOR, "xor") \ + _ (4, BROADCAST, "broadcast") \ + _ (5, LACP, "lacp") + +typedef enum +{ +#define _(v, f, s) BOND_MODE_##f = v, + foreach_bond_mode +#undef _ +} bond_mode_t; + +/* configurable load-balances */ +#define foreach_bond_lb \ + _ (2, L23, "l23", l23) \ + _ (1, l34 , "l34", l34) \ + _ (0, L2, "l2", l2) + +/* load-balance functions implemented in bond-output */ +#define foreach_bond_lb_algo \ + _ (0, L2, "l2", l2) \ + _ (1, l34 , "l34", l34) \ + _ (2, L23, "l23", l23) \ + _ (3, RR, "round-robin", round_robin) \ + _ (4, BC, "broadcast", broadcast) \ + _ (5, AB, "active-backup", active_backup) + +typedef enum +{ +#define _(v, f, s, p) BOND_LB_##f = v, + foreach_bond_lb_algo +#undef _ +} bond_load_balance_t; + +typedef struct +{ + u8 hw_addr_set; + u8 hw_addr[6]; + u8 mode; + u8 lb; + /* return */ + u32 sw_if_index; + int rv; + clib_error_t *error; +} bond_create_if_args_t; + +typedef struct +{ + /* slave's sw_if_index */ + u32 slave; + /* bond's sw_if_index */ + u32 group; + u8 is_passive; + u8 is_long_timeout; + /* return */ + int rv; + clib_error_t *error; +} bond_enslave_args_t; + +typedef struct +{ + u32 slave; + /* return */ + int rv; + clib_error_t *error; +} bond_detach_slave_args_t; + +/** BOND interface details struct */ +typedef struct +{ + u32 sw_if_index; + u8 interface_name[64]; + u8 mode; + u8 lb; + u32 active_slaves; + u32 slaves; +} bond_interface_details_t; + +/** slave interface details struct */ +typedef struct +{ + u32 sw_if_index; + u8 interface_name[64]; + u8 is_passive; + u8 is_long_timeout; + u32 active_slaves; +} slave_interface_details_t; + +typedef CLIB_PACKED (struct + { + u16 system_priority; + u8 system[6]; + u16 key; u16 port_priority; u16 port_number; + u8 state; + }) lacp_port_info_t; + +typedef struct +{ + u8 admin_up; + u8 mode; + u8 lb; + + /* the last slave index for the rr lb */ + u32 lb_rr_last_index; + + u32 dev_instance; + u32 hw_if_index; + u32 sw_if_index; + + /* Configured slaves */ + u32 *slaves; + + /* Slaves that are in DISTRIBUTING state */ + u32 *active_slaves; + + /* rapidly find an active slave */ + uword *active_slave_by_sw_if_index; + + lacp_port_info_t partner; + lacp_port_info_t actor; + u8 individual_aggregator; + + u32 group; + uword *port_number_bitmap; + u8 use_custom_mac; + u8 hw_address[6]; +} bond_if_t; + +typedef struct +{ + u8 persistent_hw_address[6]; + + /* neighbor's vlib software interface index */ + u32 sw_if_index; + + /* Neighbor time-to-live (usually 3s) */ + f32 ttl_in_seconds; + + /* 1 = interface is configured with long timeout (60s) */ + u8 is_long_timeout; + + /* 1 = debug is on; 0 = debug is off */ + u8 debug; + + /* tx packet template id for this neighbor */ + u8 packet_template_index; + + /* Info we actually keep about each neighbor */ + + /* Jenkins hash optimization: avoid tlv scan, send short keepalive msg */ + u8 last_packet_signature_valid; + uword last_packet_signature; + + /* last received lacp packet, for the J-hash optimization */ + u8 *last_rx_pkt; + + /* last marker packet */ + u8 *last_marker_pkt; + + /* neighbor vlib hw_if_index */ + u32 hw_if_index; + + /* actor does not initiate the protocol exchange */ + u8 is_passive; + + /* Partner port information */ + lacp_port_info_t partner; + lacp_port_info_t partner_admin;; + + /* Partner port information */ + lacp_port_info_t actor; + lacp_port_info_t actor_admin; + + /* Need To Transmit flag */ + u8 ntt; + + /* Link has been established and Aggregate Port is operable */ + u8 port_enabled; + + /* Initialization or reinitialization of the lacp protocol entity */ + u8 begin; + + /* Aggregation Port is operating the lacp */ + u8 lacp_enabled; + + /* MUX to indicate to the Selection Logic wait_while_timer expired */ + u8 ready_n; + + /* Selection Logic indicates al Aggregation Ports attached */ + u8 ready; + + /* Selection Logic selected an Aggregator */ + int selected; + + /* RX machine indicates an Aggregation Port in PORT_DISABLED state */ + u8 port_moved; + + /* timer used to detect whether received protocol information has expired */ + f64 current_while_timer; + + /* timer used to detect actor churn states */ + f64 actor_churn_timer; + + /* time last lacpdu was sent */ + f64 last_lacpdu_time; + + /* timer used to generate periodic transmission */ + f64 periodic_timer; + + /* timer used to detect partner churn states */ + f64 partner_churn_timer; + + /* provides hysteresis before performing an aggregation change */ + f64 wait_while_timer; + + /* Implemention variables, not in the spec */ + int rx_state; + int tx_state; + int mux_state; + int ptx_state; + + /* actor admin key */ + u32 group; + + u32 marker_tx_id; + + u32 bif_dev_instance; + + u8 loopback_port; + + /* bond mode */ + u8 mode; + + clib_spinlock_t lockp; +} slave_if_t; + +typedef void (*lacp_enable_disable_func) (vlib_main_t * vm, bond_if_t * bif, + slave_if_t * sif, u8 enable); + +typedef struct +{ + /* pool of bonding interfaces */ + bond_if_t *interfaces; + + /* pool of lacp neighbors */ + slave_if_t *neighbors; + + /* rapidly find a neighbor by vlib software interface index */ + uword *neighbor_by_sw_if_index; + + /* rapidly find a bond by vlib software interface index */ + uword *bond_by_sw_if_index; + + /* convenience variables */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; + + /* lacp plugin is loaded */ + u8 lacp_plugin_loaded; + + lacp_enable_disable_func lacp_enable_disable; +} bond_main_t; + +/* bond packet trace capture */ +typedef struct +{ + ethernet_header_t ethernet; + u32 sw_if_index; + u32 bond_sw_if_index; +} bond_packet_trace_t; + +typedef u32 (*load_balance_func) (vlib_main_t * vm, + vlib_node_runtime_t * node, bond_if_t * bif, + vlib_buffer_t * b0); + +typedef struct +{ + load_balance_func load_balance; +} bond_load_balance_func_t; + +extern vlib_node_registration_t bond_input_node; +extern vnet_device_class_t bond_dev_class; +extern bond_main_t bond_main; + +void bond_disable_collecting_distributing (vlib_main_t * vm, + slave_if_t * sif); +void bond_enable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif); +u8 *format_bond_interface_name (u8 * s, va_list * args); + +void bond_create_if (vlib_main_t * vm, bond_create_if_args_t * args); +int bond_delete_if (vlib_main_t * vm, u32 sw_if_index); +void bond_enslave (vlib_main_t * vm, bond_enslave_args_t * args); +void bond_detach_slave (vlib_main_t * vm, bond_detach_slave_args_t * args); +int bond_dump_ifs (bond_interface_details_t ** out_bondids); +int bond_dump_slave_ifs (slave_interface_details_t ** out_slaveids, + u32 bond_sw_if_index); + +static inline uword +unformat_bond_mode (unformat_input_t * input, va_list * args) +{ + u8 *r = va_arg (*args, u8 *); + + if (0); +#define _(v, f, s) else if (unformat (input, s)) *r = BOND_MODE_##f; + foreach_bond_mode +#undef _ + else + return 0; + + return 1; +} + +static inline u8 * +format_bond_mode (u8 * s, va_list * args) +{ + u32 i = va_arg (*args, u32); + u8 *t = 0; + + switch (i) + { +#define _(v, f, s) case BOND_MODE_##f: t = (u8 *) s; break; + foreach_bond_mode +#undef _ + default: + return format (s, "unknown"); + } + return format (s, "%s", t); +} + +static inline uword +unformat_bond_load_balance (unformat_input_t * input, va_list * args) +{ + u8 *r = va_arg (*args, u8 *); + + if (0); +#define _(v, f, s, p) else if (unformat (input, s)) *r = BOND_LB_##f; + foreach_bond_lb +#undef _ + else + return 0; + + return 1; +} + +static inline u8 * +format_bond_load_balance (u8 * s, va_list * args) +{ + u32 i = va_arg (*args, u32); + u8 *t = 0; + + switch (i) + { +#define _(v, f, s, p) case BOND_LB_##f: t = (u8 *) s; break; + foreach_bond_lb_algo +#undef _ + default: + return format (s, "unknown"); + } + return format (s, "%s", t); +} + +static inline void +bond_register_callback (lacp_enable_disable_func func) +{ + bond_main_t *bm = &bond_main; + + bm->lacp_plugin_loaded = 1; + bm->lacp_enable_disable = func; +} + +static inline bond_if_t * +bond_get_master_by_sw_if_index (u32 sw_if_index) +{ + bond_main_t *bm = &bond_main; + uword *p; + + p = hash_get (bm->bond_by_sw_if_index, sw_if_index); + if (!p) + { + return 0; + } + return pool_elt_at_index (bm->interfaces, p[0]); +} + +static inline bond_if_t * +bond_get_master_by_dev_instance (u32 dev_instance) +{ + bond_main_t *bm = &bond_main; + + return pool_elt_at_index (bm->interfaces, dev_instance); +} + +static inline slave_if_t * +bond_get_slave_by_sw_if_index (u32 sw_if_index) +{ + bond_main_t *bm = &bond_main; + slave_if_t *sif = 0; + uword *p; + + p = hash_get (bm->neighbor_by_sw_if_index, sw_if_index); + if (p) + { + sif = pool_elt_at_index (bm->neighbors, p[0]); + } + return sif; +} + +#endif /* __included_vnet_bonding_node_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/vnet_all_api_h.h b/src/vnet/vnet_all_api_h.h index fea92e7fd27..5c7c4869c91 100644 --- a/src/vnet/vnet_all_api_h.h +++ b/src/vnet/vnet_all_api_h.h @@ -29,6 +29,7 @@ #include <vlibmemory/vl_memory_api_h.h> #endif /* included_from_layer_3 */ +#include <vnet/bonding/bond.api.h> #include <vnet/devices/af_packet/af_packet.api.h> #include <vnet/devices/netmap/netmap.api.h> #include <vnet/devices/virtio/vhost_user.api.h> diff --git a/src/vpp/api/custom_dump.c b/src/vpp/api/custom_dump.c index 28f16d2fe67..ded6e7de337 100644 --- a/src/vpp/api/custom_dump.c +++ b/src/vpp/api/custom_dump.c @@ -47,6 +47,8 @@ #include <vpp/api/vpe_msg_enum.h> +#include <vnet/bonding/node.h> + #define vl_typedefs /* define message structures */ #include <vpp/api/vpe_all_api_h.h> #undef vl_typedefs @@ -609,6 +611,84 @@ static void *vl_api_sw_interface_tap_v2_dump_t_print FINISH; } +static void *vl_api_bond_create_t_print + (vl_api_bond_create_t * mp, void *handle) +{ + u8 *s; + u8 null_mac[6]; + + memset (null_mac, 0, sizeof (null_mac)); + + s = format (0, "SCRIPT: bond_create "); + if (memcmp (mp->mac_address, null_mac, 6)) + s = format (s, "mac-address %U ", + format_ethernet_address, mp->mac_address); + if (mp->mode) + s = format (s, "mode %U", format_bond_mode, mp->mode); + if (mp->lb) + s = format (s, "lb %U", format_bond_load_balance, mp->lb); + FINISH; +} + +static void *vl_api_bond_delete_t_print + (vl_api_bond_delete_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: bond_delete "); + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + + FINISH; +} + +static void *vl_api_bond_enslave_t_print + (vl_api_bond_enslave_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: bond_enslave "); + s = format (s, "bond_sw_if_index %u ", mp->bond_sw_if_index); + s = format (s, "sw_if_index %u ", mp->sw_if_index); + if (mp->is_passive) + s = format (s, "passive "); + if (mp->is_long_timeout) + s = format (s, "long-timeout "); + + FINISH; +} + +static void *vl_api_bond_detach_slave_t_print + (vl_api_bond_detach_slave_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: bond_detach_slave "); + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + + FINISH; +} + +static void *vl_api_sw_interface_bond_dump_t_print + (vl_api_sw_interface_bond_dump_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: sw_interface_bond_dump "); + + FINISH; +} + +static void *vl_api_sw_interface_slave_dump_t_print + (vl_api_sw_interface_slave_dump_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: sw_interface_slave_dump "); + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + + FINISH; +} + static void *vl_api_ip_add_del_route_t_print (vl_api_ip_add_del_route_t * mp, void *handle) { @@ -3357,6 +3437,10 @@ _(TAP_CONNECT, tap_connect) \ _(TAP_MODIFY, tap_modify) \ _(TAP_DELETE, tap_delete) \ _(SW_INTERFACE_TAP_DUMP, sw_interface_tap_dump) \ +_(BOND_CREATE, bond_create) \ +_(BOND_DELETE, bond_delete) \ +_(BOND_ENSLAVE, bond_enslave) \ +_(BOND_DETACH_SLAVE, bond_detach_slave) \ _(TAP_CREATE_V2, tap_create_v2) \ _(TAP_DELETE_V2, tap_delete_v2) \ _(SW_INTERFACE_TAP_V2_DUMP, sw_interface_tap_v2_dump) \ |