diff options
author | Steven <sluong@cisco.com> | 2017-12-20 12:43:01 -0800 |
---|---|---|
committer | Damjan Marion <dmarion.lists@gmail.com> | 2018-03-21 21:02:15 +0000 |
commit | 9cd2d7a5a4fafadb65d772c48109d55d1e19d425 (patch) | |
tree | 4a9e0665be0096ee6bfc2235388f90b276b23814 /src/vnet/bonding/node.c | |
parent | 43ebe29b6ea1107c30311cfb3dbd8190282903d0 (diff) |
bond: Add bonding driver and LACP protocol
Add bonding driver to support creation of bond interface which composes of
multiple slave interfaces. The slave interfaces could be physical interfaces,
or just any virtual interfaces. For example, memif interfaces.
The syntax to create a bond interface is
create bond mode <lacp | xor | acitve-backup | broadcast | round-robin>
To enslave an interface to the bond interface,
enslave interface TenGigabitEthernet6/0/0 to BondEthernet0
Please see src/plugins/lacp/lacp_doc.md for more examples and additional
options.
LACP is a control plane protocol which manages and monitors the status of
the slave interfaces. The protocol is part of 802.3ad standard. This patch
implements LACPv1. LACPv2 is not supported.
To enable LACP on the bond interface, specify "mode lacp" when the bond
interface is created. The syntax to enslave a slave interface is the same as
other bonding modes.
Change-Id: I06581d3b87635972f9f0e1ec50b67560fc13e26c
Signed-off-by: Steven <sluong@cisco.com>
Diffstat (limited to 'src/vnet/bonding/node.c')
-rw-r--r-- | src/vnet/bonding/node.c | 509 |
1 files changed, 509 insertions, 0 deletions
diff --git a/src/vnet/bonding/node.c b/src/vnet/bonding/node.c new file mode 100644 index 00000000000..4deec829195 --- /dev/null +++ b/src/vnet/bonding/node.c @@ -0,0 +1,509 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#define _GNU_SOURCE +#include <stdint.h> +#include <vnet/llc/llc.h> +#include <vnet/snap/snap.h> +#include <vnet/bonding/node.h> + +bond_main_t bond_main; + +#define foreach_bond_input_error \ + _(NONE, "no error") \ + _(IF_DOWN, "interface down") \ + _(NO_SLAVE, "no slave") \ + _(NO_BOND, "no bond interface")\ + _(PASS_THRU, "pass through") + +typedef enum +{ +#define _(f,s) BOND_INPUT_ERROR_##f, + foreach_bond_input_error +#undef _ + BOND_INPUT_N_ERROR, +} bond_input_error_t; + +static char *bond_input_error_strings[] = { +#define _(n,s) s, + foreach_bond_input_error +#undef _ +}; + +static u8 * +format_bond_input_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + bond_packet_trace_t *t = va_arg (*args, bond_packet_trace_t *); + vnet_hw_interface_t *hw, *hw1; + vnet_main_t *vnm = vnet_get_main (); + + hw = vnet_get_sup_hw_interface (vnm, t->sw_if_index); + hw1 = vnet_get_sup_hw_interface (vnm, t->bond_sw_if_index); + s = format (s, "src %U, dst %U, %s -> %s", + format_ethernet_address, t->ethernet.src_address, + format_ethernet_address, t->ethernet.dst_address, + hw->name, hw1->name); + + return s; +} + +static_always_inline u8 +packet_is_cdp (ethernet_header_t * eth) +{ + llc_header_t *llc; + snap_header_t *snap; + + llc = (llc_header_t *) (eth + 1); + snap = (snap_header_t *) (llc + 1); + + return ((eth->type == htons (ETHERNET_TYPE_CDP)) || + ((llc->src_sap == 0xAA) && (llc->control == 0x03) && + (snap->protocol == htons (0x2000)) && + (snap->oui[0] == 0) && (snap->oui[1] == 0) && + (snap->oui[2] == 0x0C))); +} + +static inline void +bond_sw_if_index_rewrite (vlib_main_t * vm, vlib_node_runtime_t * node, + slave_if_t * sif, ethernet_header_t * eth, + vlib_buffer_t * b0) +{ + bond_if_t *bif; + u16 thread_index = vlib_get_thread_index (); + u16 *ethertype_p, ethertype; + ethernet_vlan_header_t *vlan; + + if (PREDICT_TRUE (sif != 0)) + { + bif = bond_get_master_by_sw_if_index (sif->group); + if (PREDICT_TRUE (bif != 0)) + { + if (PREDICT_TRUE (vec_len (bif->slaves) >= 1)) + { + if (PREDICT_TRUE (bif->admin_up == 1)) + { + if (!ethernet_frame_is_tagged (ntohs (eth->type))) + { + // Let some layer2 packets pass through. + if (PREDICT_TRUE ((eth->type != + htons (ETHERNET_TYPE_SLOW_PROTOCOLS)) + && !packet_is_cdp (eth) + && (eth->type != + htons + (ETHERNET_TYPE_802_1_LLDP)))) + { + // Change the physical interface to + // bond interface + vnet_buffer (b0)->sw_if_index[VLIB_RX] = + bif->sw_if_index; + + /* increase rx counters */ + vlib_increment_simple_counter + (vnet_main.interface_main.sw_if_counters + + VNET_INTERFACE_COUNTER_RX, thread_index, + bif->sw_if_index, 1); + } + else + { + vlib_error_count (vm, node->node_index, + BOND_INPUT_ERROR_PASS_THRU, 1); + } + } + else + { + vlan = (void *) (eth + 1); + ethertype_p = &vlan->type; + if (*ethertype_p == ntohs (ETHERNET_TYPE_VLAN)) + { + vlan++; + ethertype_p = &vlan->type; + } + ethertype = *ethertype_p; + if (PREDICT_TRUE ((ethertype != + htons (ETHERNET_TYPE_SLOW_PROTOCOLS)) + && (ethertype != + htons (ETHERNET_TYPE_CDP)) + && (ethertype != + htons + (ETHERNET_TYPE_802_1_LLDP)))) + { + // Change the physical interface to + // bond interface + vnet_buffer (b0)->sw_if_index[VLIB_RX] = + bif->sw_if_index; + + /* increase rx counters */ + vlib_increment_simple_counter + (vnet_main.interface_main.sw_if_counters + + VNET_INTERFACE_COUNTER_RX, thread_index, + bif->sw_if_index, 1); + } + else + { + vlib_error_count (vm, node->node_index, + BOND_INPUT_ERROR_PASS_THRU, 1); + } + } + } + else + { + vlib_error_count (vm, node->node_index, + BOND_INPUT_ERROR_IF_DOWN, 1); + } + } + else + { + vlib_error_count (vm, node->node_index, + BOND_INPUT_ERROR_NO_SLAVE, 1); + } + } + else + { + vlib_error_count (vm, node->node_index, + BOND_INPUT_ERROR_NO_BOND, 1); + } + } + else + { + vlib_error_count (vm, node->node_index, BOND_INPUT_ERROR_NO_SLAVE, 1); + } + +} + +static uword +bond_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 bi0, bi1, bi2, bi3; + vlib_buffer_t *b0, *b1, *b2, *b3; + u32 next_index; + u32 *from, *to_next, n_left_from, n_left_to_next; + ethernet_header_t *eth, *eth1, *eth2, *eth3; + u32 next0, next1, next2, next3; + bond_packet_trace_t *t0; + uword n_trace = vlib_get_trace_count (vm, node); + u32 sw_if_index, sw_if_index1, sw_if_index2, sw_if_index3; + slave_if_t *sif, *sif1, *sif2, *sif3; + u16 thread_index = vlib_get_thread_index (); + + /* Vector of buffer / pkt indices we're supposed to process */ + from = vlib_frame_vector_args (frame); + + /* Number of buffers / pkts */ + n_left_from = frame->n_vectors; + + /* Speculatively send the first buffer to the last disposition we used */ + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + /* set up to enqueue to our disposition with index = next_index */ + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 12 && n_left_to_next >= 4) + { + // Prefetch next iteration + { + vlib_buffer_t *b4, *b5, *b6, *b7; + + b4 = vlib_get_buffer (vm, from[4]); + b5 = vlib_get_buffer (vm, from[5]); + b6 = vlib_get_buffer (vm, from[6]); + b7 = vlib_get_buffer (vm, from[7]); + + vlib_prefetch_buffer_header (b4, STORE); + vlib_prefetch_buffer_header (b5, STORE); + vlib_prefetch_buffer_header (b6, STORE); + vlib_prefetch_buffer_header (b7, STORE); + + CLIB_PREFETCH (b4->data, CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (b5->data, CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (b6->data, CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (b7->data, CLIB_CACHE_LINE_BYTES, LOAD); + } + + next0 = 0; + next1 = 0; + next2 = 0; + next3 = 0; + + bi0 = from[0]; + bi1 = from[1]; + bi2 = from[2]; + bi3 = from[3]; + + to_next[0] = bi0; + to_next[1] = bi1; + to_next[2] = bi2; + to_next[3] = bi3; + + from += 4; + to_next += 4; + n_left_from -= 4; + n_left_to_next -= 4; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + b2 = vlib_get_buffer (vm, bi2); + b3 = vlib_get_buffer (vm, bi3); + + vnet_feature_next (vnet_buffer (b0)->sw_if_index[VLIB_RX], &next0, + b0); + vnet_feature_next (vnet_buffer (b1)->sw_if_index[VLIB_RX], &next1, + b1); + vnet_feature_next (vnet_buffer (b2)->sw_if_index[VLIB_RX], &next2, + b2); + vnet_feature_next (vnet_buffer (b3)->sw_if_index[VLIB_RX], &next3, + b3); + + eth = (ethernet_header_t *) vlib_buffer_get_current (b0); + eth1 = (ethernet_header_t *) vlib_buffer_get_current (b1); + eth2 = (ethernet_header_t *) vlib_buffer_get_current (b2); + eth3 = (ethernet_header_t *) vlib_buffer_get_current (b3); + + sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX]; + sw_if_index2 = vnet_buffer (b2)->sw_if_index[VLIB_RX]; + sw_if_index3 = vnet_buffer (b3)->sw_if_index[VLIB_RX]; + + // sw_if_index points to the physical interface + sif = bond_get_slave_by_sw_if_index (sw_if_index); + sif1 = bond_get_slave_by_sw_if_index (sw_if_index1); + sif2 = bond_get_slave_by_sw_if_index (sw_if_index2); + sif3 = bond_get_slave_by_sw_if_index (sw_if_index3); + + bond_sw_if_index_rewrite (vm, node, sif, eth, b0); + bond_sw_if_index_rewrite (vm, node, sif1, eth1, b1); + bond_sw_if_index_rewrite (vm, node, sif2, eth2, b2); + bond_sw_if_index_rewrite (vm, node, sif3, eth3, b3); + + if (PREDICT_FALSE (n_trace > 0)) + { + vlib_trace_buffer (vm, node, next0, b0, 0 /* follow_chain */ ); + vlib_set_trace_count (vm, node, --n_trace); + t0 = vlib_add_trace (vm, node, b0, sizeof (*t0)); + t0->ethernet = *eth; + t0->sw_if_index = sw_if_index; + t0->bond_sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + + if (PREDICT_TRUE (n_trace > 0)) + { + vlib_trace_buffer (vm, node, next1, b1, + 0 /* follow_chain */ ); + vlib_set_trace_count (vm, node, --n_trace); + t0 = vlib_add_trace (vm, node, b1, sizeof (*t0)); + t0->ethernet = *eth1; + t0->sw_if_index = sw_if_index1; + t0->bond_sw_if_index = + vnet_buffer (b1)->sw_if_index[VLIB_RX]; + + if (PREDICT_TRUE (n_trace > 0)) + { + vlib_trace_buffer (vm, node, next1, b2, + 0 /* follow_chain */ ); + vlib_set_trace_count (vm, node, --n_trace); + t0 = vlib_add_trace (vm, node, b2, sizeof (*t0)); + t0->ethernet = *eth2; + t0->sw_if_index = sw_if_index2; + t0->bond_sw_if_index = + vnet_buffer (b2)->sw_if_index[VLIB_RX]; + + if (PREDICT_TRUE (n_trace > 0)) + { + vlib_trace_buffer (vm, node, next1, b2, + 0 /* follow_chain */ ); + vlib_set_trace_count (vm, node, --n_trace); + t0 = vlib_add_trace (vm, node, b3, sizeof (*t0)); + t0->ethernet = *eth3; + t0->sw_if_index = sw_if_index3; + t0->bond_sw_if_index = + vnet_buffer (b3)->sw_if_index[VLIB_RX]; + } + } + } + } + + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b1); + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b2); + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b3); + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x4 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, bi2, bi3, next0, next1, + next2, next3); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + // Prefetch next iteration + if (n_left_from > 1) + { + vlib_buffer_t *p2; + + p2 = vlib_get_buffer (vm, from[1]); + vlib_prefetch_buffer_header (p2, STORE); + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, LOAD); + } + + next0 = 0; + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + vnet_feature_next (vnet_buffer (b0)->sw_if_index[VLIB_RX], &next0, + b0); + + eth = (ethernet_header_t *) vlib_buffer_get_current (b0); + + sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + // sw_if_index points to the physical interface + sif = bond_get_slave_by_sw_if_index (sw_if_index); + bond_sw_if_index_rewrite (vm, node, sif, eth, b0); + + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, bond_input_node.index, + BOND_INPUT_ERROR_NONE, frame->n_vectors); + + vnet_device_increment_rx_packets (thread_index, frame->n_vectors); + + return frame->n_vectors; +} + +static clib_error_t * +bond_input_init (vlib_main_t * vm) +{ + return 0; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (bond_input_node) = { + .function = bond_input_fn, + .name = "bond-input", + .vector_size = sizeof (u32), + .format_buffer = format_ethernet_header_with_length, + .format_trace = format_bond_input_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = BOND_INPUT_N_ERROR, + .error_strings = bond_input_error_strings, + .n_next_nodes = 0, + .next_nodes = + { + [0] = "error-drop" + } +}; + +VLIB_INIT_FUNCTION (bond_input_init); + +VNET_FEATURE_INIT (bond_input, static) = +{ + .arc_name = "device-input", + .node_name = "bond-input", + .runs_before = VNET_FEATURES ("ethernet-input"), +}; +VLIB_NODE_FUNCTION_MULTIARCH (bond_input_node, bond_input_fn) +/* *INDENT-ON* */ + +static clib_error_t * +bond_sw_interface_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags) +{ + bond_main_t *bm = &bond_main; + slave_if_t *sif; + vlib_main_t *vm = bm->vlib_main; + + sif = bond_get_slave_by_sw_if_index (sw_if_index); + if (sif) + { + sif->port_enabled = flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP; + if (sif->port_enabled == 0) + { + if (sif->lacp_enabled == 0) + { + bond_disable_collecting_distributing (vm, sif); + } + } + else + { + if (sif->lacp_enabled == 0) + { + bond_enable_collecting_distributing (vm, sif); + } + } + } + + return 0; +} + +VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (bond_sw_interface_up_down); + +static clib_error_t * +bond_hw_interface_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) +{ + bond_main_t *bm = &bond_main; + slave_if_t *sif; + vnet_sw_interface_t *sw; + vlib_main_t *vm = bm->vlib_main; + vnet_interface_main_t *im = &vnm->interface_main; + + sw = pool_elt_at_index (im->sw_interfaces, hw_if_index); + sif = bond_get_slave_by_sw_if_index (sw->sw_if_index); + if (sif) + { + if (!(flags & VNET_HW_INTERFACE_FLAG_LINK_UP)) + { + if (sif->lacp_enabled == 0) + { + bond_disable_collecting_distributing (vm, sif); + } + } + else + { + if (sif->lacp_enabled == 0) + { + bond_enable_collecting_distributing (vm, sif); + } + } + } + + return 0; +} + +VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION (bond_hw_interface_up_down); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ |