aboutsummaryrefslogtreecommitdiffstats
path: root/src/vnet
diff options
context:
space:
mode:
Diffstat (limited to 'src/vnet')
-rw-r--r--src/vnet/bonding/bond.api163
-rw-r--r--src/vnet/bonding/bond_api.c328
-rw-r--r--src/vnet/bonding/cli.c706
-rw-r--r--src/vnet/bonding/device.c610
-rw-r--r--src/vnet/bonding/node.c509
-rw-r--r--src/vnet/bonding/node.h451
-rw-r--r--src/vnet/vnet_all_api_h.h1
7 files changed, 2768 insertions, 0 deletions
diff --git a/src/vnet/bonding/bond.api b/src/vnet/bonding/bond.api
new file mode 100644
index 00000000000..e8919e14904
--- /dev/null
+++ b/src/vnet/bonding/bond.api
@@ -0,0 +1,163 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** \file
+
+ This file defines vpe control-plane API messages for
+ the bonding device driver
+*/
+
+option version = "1.0.0";
+
+/** \brief Initialize a new bond interface with the given paramters
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param use_custom_mac - if set, mac_address is valid
+ @param mac_address - mac addr to assign to the interface if use_custom_mac is set
+ @param mode - mode, required (1=round-robin, 2=active-backup, 3=xor, 4=broadcastcast, 5=lacp)
+ @param lb - load balance, optional (0=l2, 1=l34, 2=l23) valid for xor and lacp modes. Otherwise ignored
+*/
+define bond_create
+{
+ u32 client_index;
+ u32 context;
+ u8 use_custom_mac;
+ u8 mac_address[6];
+ u8 mode;
+ u8 lb;
+};
+
+/** \brief Reply for bond create reply
+ @param context - returned sender context, to match reply w/ request
+ @param retval - return code
+ @param sw_if_index - software index allocated for the new tap interface
+*/
+define bond_create_reply
+{
+ u32 context;
+ i32 retval;
+ u32 sw_if_index;
+};
+
+/** \brief Delete bond interface
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - interface index of slave interface
+*/
+autoreply define bond_delete
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+};
+
+/** \brief Initialize a new bond interface with the given paramters
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - slave sw_if_index
+ @param bond_sw_if_index - bond sw_if_index
+ @param is_passive - interface does not initiate the lacp protocol, remote must be active speaker
+ @param is_long_timeout - 90 seconds vs default 3 seconds neighbor timeout
+*/
+define bond_enslave
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u32 bond_sw_if_index;
+ u8 is_passive;
+ u8 is_long_timeout;
+};
+
+/** \brief Reply for bond enslave reply
+ @param context - returned sender context, to match reply w/ request
+ @param retval - return code
+*/
+define bond_enslave_reply
+{
+ u32 context;
+ i32 retval;
+};
+
+/** \brief bond detach slave
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - interface index of slave interface
+*/
+autoreply define bond_detach_slave
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+};
+
+/** \brief Dump bond interfaces request */
+define sw_interface_bond_dump
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Reply for bond dump request
+ @param sw_if_index - software index of bond interface
+ @param interface_name - name of interface
+ @param mode - bonding mode
+ @param lb - load balance algo
+ @param active_slaves - active slaves count
+ @param slaves - config slave count
+*/
+define sw_interface_bond_details
+{
+ u32 context;
+ u32 sw_if_index;
+ u8 interface_name[64];
+ u8 mode;
+ u8 lb;
+ u32 active_slaves;
+ u32 slaves;
+};
+
+/** \brief bond slave dump
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - interface index of bond interface
+*/
+define sw_interface_slave_dump
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+};
+
+/** \brief Reply for slave dump request
+ @param sw_if_index - software index of slave interface
+ @param interface_name - name of interface
+ @param is_passve - interface does not initiate the lacp protocol, remote must be active speaker
+ @param is_long_timeout - 90 seconds vs default 3 seconds neighbor timeout
+*/
+define sw_interface_slave_details
+{
+ u32 context;
+ u32 sw_if_index;
+ u8 interface_name[64];
+ u8 is_passive;
+ u8 is_long_timeout;
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/bonding/bond_api.c b/src/vnet/bonding/bond_api.c
new file mode 100644
index 00000000000..02536e966a1
--- /dev/null
+++ b/src/vnet/bonding/bond_api.c
@@ -0,0 +1,328 @@
+/*
+ *------------------------------------------------------------------
+ * bond_api.c - vnet bonding device driver API support
+ *
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+#include <vnet/ethernet/ethernet.h>
+
+#include <vnet/vnet_msg_enum.h>
+
+#define vl_typedefs /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vnet/vnet_all_api_h.h>
+#undef vl_printfun
+
+#include <vlibapi/api_helper_macros.h>
+#include <vnet/bonding/node.h>
+
+#define foreach_bond_api_msg \
+_(BOND_CREATE, bond_create) \
+_(BOND_DELETE, bond_delete) \
+_(BOND_ENSLAVE, bond_enslave) \
+_(BOND_DETACH_SLAVE, bond_detach_slave) \
+_(SW_INTERFACE_BOND_DUMP, sw_interface_bond_dump)\
+_(SW_INTERFACE_SLAVE_DUMP, sw_interface_slave_dump)
+
+static void
+bond_send_sw_interface_event_deleted (vpe_api_main_t * am,
+ unix_shared_memory_queue_t * q,
+ u32 sw_if_index)
+{
+ vl_api_sw_interface_event_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_EVENT);
+ mp->sw_if_index = ntohl (sw_if_index);
+
+ mp->admin_up_down = 0;
+ mp->link_up_down = 0;
+ mp->deleted = 1;
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+static void
+vl_api_bond_delete_t_handler (vl_api_bond_delete_t * mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ int rv;
+ vpe_api_main_t *vam = &vpe_api_main;
+ vl_api_bond_delete_reply_t *rmp;
+ unix_shared_memory_queue_t *q;
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+
+ rv = bond_delete_if (vm, sw_if_index);
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (!q)
+ return;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_BOND_DELETE_REPLY);
+ rmp->context = mp->context;
+ rmp->retval = ntohl (rv);
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+
+ if (!rv)
+ bond_send_sw_interface_event_deleted (vam, q, sw_if_index);
+}
+
+static void
+vl_api_bond_create_t_handler (vl_api_bond_create_t * mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_bond_create_reply_t *rmp;
+ unix_shared_memory_queue_t *q;
+ bond_create_if_args_t _a, *ap = &_a;
+
+ memset (ap, 0, sizeof (*ap));
+
+ if (mp->use_custom_mac)
+ {
+ clib_memcpy (ap->hw_addr, mp->mac_address, 6);
+ ap->hw_addr_set = 1;
+ }
+
+ ap->mode = mp->mode;
+ ap->lb = mp->lb;
+ bond_create_if (vm, ap);
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (!q)
+ return;
+
+ if (ap->rv != 0)
+ return;
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_BOND_CREATE_REPLY);
+ rmp->context = mp->context;
+ rmp->retval = ntohl (ap->rv);
+ rmp->sw_if_index = ntohl (ap->sw_if_index);
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+vl_api_bond_enslave_t_handler (vl_api_bond_enslave_t * mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_bond_enslave_reply_t *rmp;
+ unix_shared_memory_queue_t *q;
+ bond_enslave_args_t _a, *ap = &_a;
+
+ memset (ap, 0, sizeof (*ap));
+
+ ap->group = ntohl (mp->bond_sw_if_index);
+ ap->slave = ntohl (mp->sw_if_index);
+ ap->is_passive = mp->is_passive;
+ ap->is_long_timeout = mp->is_long_timeout;
+
+ bond_enslave (vm, ap);
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (!q)
+ return;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_BOND_ENSLAVE_REPLY);
+ rmp->context = mp->context;
+ rmp->retval = ntohl (ap->rv);
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+vl_api_bond_detach_slave_t_handler (vl_api_bond_detach_slave_t * mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_bond_detach_slave_reply_t *rmp;
+ unix_shared_memory_queue_t *q;
+ bond_detach_slave_args_t _a, *ap = &_a;
+
+ memset (ap, 0, sizeof (*ap));
+
+ ap->slave = ntohl (mp->sw_if_index);
+ bond_detach_slave (vm, ap);
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (!q)
+ return;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_BOND_DETACH_SLAVE_REPLY);
+ rmp->context = mp->context;
+ rmp->retval = htonl (ap->rv);
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+bond_send_sw_interface_details (vpe_api_main_t * am,
+ vl_api_registration_t * reg,
+ bond_interface_details_t * bond_if,
+ u32 context)
+{
+ vl_api_sw_interface_bond_details_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = htons (VL_API_SW_INTERFACE_BOND_DETAILS);
+ mp->sw_if_index = htonl (bond_if->sw_if_index);
+ clib_memcpy (mp->interface_name, bond_if->interface_name,
+ MIN (ARRAY_LEN (mp->interface_name) - 1,
+ strlen ((const char *) bond_if->interface_name)));
+ mp->mode = bond_if->mode;
+ mp->lb = bond_if->lb;
+ mp->active_slaves = htonl (bond_if->active_slaves);
+ mp->slaves = htonl (bond_if->slaves);
+
+ mp->context = context;
+ vl_api_send_msg (reg, (u8 *) mp);
+}
+
+static void
+vl_api_sw_interface_bond_dump_t_handler (vl_api_sw_interface_bond_dump_t * mp)
+{
+ int rv;
+ vpe_api_main_t *am = &vpe_api_main;
+ vl_api_registration_t *reg;
+ bond_interface_details_t *bondifs = NULL;
+ bond_interface_details_t *bond_if = NULL;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+
+ rv = bond_dump_ifs (&bondifs);
+ if (rv)
+ return;
+
+ vec_foreach (bond_if, bondifs)
+ {
+ bond_send_sw_interface_details (am, reg, bond_if, mp->context);
+ }
+
+ vec_free (bondifs);
+}
+
+static void
+bond_send_sw_interface_slave_details (vpe_api_main_t * am,
+ vl_api_registration_t * reg,
+ slave_interface_details_t * slave_if,
+ u32 context)
+{
+ vl_api_sw_interface_slave_details_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = htons (VL_API_SW_INTERFACE_SLAVE_DETAILS);
+ mp->sw_if_index = htonl (slave_if->sw_if_index);
+ clib_memcpy (mp->interface_name, slave_if->interface_name,
+ MIN (ARRAY_LEN (mp->interface_name) - 1,
+ strlen ((const char *) slave_if->interface_name)));
+ mp->is_passive = slave_if->is_passive;
+ mp->is_long_timeout = slave_if->is_long_timeout;
+
+ mp->context = context;
+ vl_api_send_msg (reg, (u8 *) mp);
+}
+
+static void
+vl_api_sw_interface_slave_dump_t_handler (vl_api_sw_interface_slave_dump_t *
+ mp)
+{
+ int rv;
+ vpe_api_main_t *am = &vpe_api_main;
+ vl_api_registration_t *reg;
+ slave_interface_details_t *slaveifs = NULL;
+ slave_interface_details_t *slave_if = NULL;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+
+ rv = bond_dump_slave_ifs (&slaveifs, ntohl (mp->sw_if_index));
+ if (rv)
+ return;
+
+ vec_foreach (slave_if, slaveifs)
+ {
+ bond_send_sw_interface_slave_details (am, reg, slave_if, mp->context);
+ }
+
+ vec_free (slaveifs);
+}
+
+#define vl_msg_name_crc_list
+#include <vnet/vnet_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+bond_setup_message_id_table (api_main_t * am)
+{
+#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id);
+ foreach_vl_msg_name_crc_bond;
+#undef _
+}
+
+static clib_error_t *
+bond_api_hookup (vlib_main_t * vm)
+{
+ api_main_t *am = &api_main;
+
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_bond_api_msg;
+#undef _
+
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ bond_setup_message_id_table (am);
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (bond_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/bonding/cli.c b/src/vnet/bonding/cli.c
new file mode 100644
index 00000000000..b2d66f9f1c8
--- /dev/null
+++ b/src/vnet/bonding/cli.c
@@ -0,0 +1,706 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <stdint.h>
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/bonding/node.h>
+
+void
+bond_disable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif)
+{
+ bond_if_t *bif;
+ int i;
+ uword p;
+
+ bif = bond_get_master_by_dev_instance (sif->bif_dev_instance);
+ vec_foreach_index (i, bif->active_slaves)
+ {
+ p = *vec_elt_at_index (bif->active_slaves, i);
+ if (p == sif->sw_if_index)
+ {
+ vec_del1 (bif->active_slaves, i);
+ hash_unset (bif->active_slave_by_sw_if_index, sif->sw_if_index);
+ break;
+ }
+ }
+}
+
+void
+bond_enable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif)
+{
+ bond_if_t *bif;
+
+ bif = bond_get_master_by_dev_instance (sif->bif_dev_instance);
+ if (!hash_get (bif->active_slave_by_sw_if_index, sif->sw_if_index))
+ {
+ hash_set (bif->active_slave_by_sw_if_index, sif->sw_if_index,
+ sif->sw_if_index);
+ vec_add1 (bif->active_slaves, sif->sw_if_index);
+ }
+}
+
+int
+bond_dump_ifs (bond_interface_details_t ** out_bondifs)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ bond_main_t *bm = &bond_main;
+ bond_if_t *bif;
+ vnet_hw_interface_t *hi;
+ bond_interface_details_t *r_bondifs = NULL;
+ bond_interface_details_t *bondif = NULL;
+
+ /* *INDENT-OFF* */
+ pool_foreach (bif, bm->interfaces,
+ vec_add2(r_bondifs, bondif, 1);
+ memset (bondif, 0, sizeof (*bondif));
+ bondif->sw_if_index = bif->sw_if_index;
+ hi = vnet_get_hw_interface (vnm, bif->hw_if_index);
+ clib_memcpy(bondif->interface_name, hi->name,
+ MIN (ARRAY_LEN (bondif->interface_name) - 1,
+ strlen ((const char *) hi->name)));
+ bondif->mode = bif->mode;
+ bondif->lb = bif->lb;
+ bondif->active_slaves = vec_len (bif->active_slaves);
+ bondif->slaves = vec_len (bif->slaves);
+ );
+ /* *INDENT-ON* */
+
+ *out_bondifs = r_bondifs;
+
+ return 0;
+}
+
+int
+bond_dump_slave_ifs (slave_interface_details_t ** out_slaveifs,
+ u32 bond_sw_if_index)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ bond_if_t *bif;
+ vnet_hw_interface_t *hi;
+ vnet_sw_interface_t *sw;
+ slave_interface_details_t *r_slaveifs = NULL;
+ slave_interface_details_t *slaveif = NULL;
+ u32 *sw_if_index = NULL;
+ slave_if_t *sif;
+
+ bif = bond_get_master_by_sw_if_index (bond_sw_if_index);
+ if (!bif)
+ return 1;
+
+ vec_foreach (sw_if_index, bif->slaves)
+ {
+ vec_add2 (r_slaveifs, slaveif, 1);
+ memset (slaveif, 0, sizeof (*slaveif));
+ sif = bond_get_slave_by_sw_if_index (*sw_if_index);
+ if (sif)
+ {
+ sw = vnet_get_sw_interface (vnm, sif->sw_if_index);
+ hi = vnet_get_hw_interface (vnm, sw->hw_if_index);
+ clib_memcpy (slaveif->interface_name, hi->name,
+ MIN (ARRAY_LEN (slaveif->interface_name) - 1,
+ strlen ((const char *) hi->name)));
+ slaveif->sw_if_index = sif->sw_if_index;
+ slaveif->is_passive = sif->is_passive;
+ slaveif->is_long_timeout = sif->is_long_timeout;
+ }
+ }
+ *out_slaveifs = r_slaveifs;
+
+ return 0;
+}
+
+static void
+bond_delete_neighbor (vlib_main_t * vm, bond_if_t * bif, slave_if_t * sif)
+{
+ bond_main_t *bm = &bond_main;
+ vnet_main_t *vnm = vnet_get_main ();
+ int i;
+ vnet_hw_interface_t *hw;
+
+ bif->port_number_bitmap =
+ clib_bitmap_set (bif->port_number_bitmap,
+ ntohs (sif->actor_admin.port_number) - 1, 0);
+ hash_unset (bm->neighbor_by_sw_if_index, sif->sw_if_index);
+ vec_free (sif->last_marker_pkt);
+ vec_free (sif->last_rx_pkt);
+ vec_foreach_index (i, bif->slaves)
+ {
+ uword p = *vec_elt_at_index (bif->slaves, i);
+ if (p == sif->sw_if_index)
+ {
+ vec_del1 (bif->slaves, i);
+ break;
+ }
+ }
+
+ bond_disable_collecting_distributing (vm, sif);
+
+ /* Put back the old mac */
+ hw = vnet_get_sup_hw_interface (vnm, sif->sw_if_index);
+ vnet_hw_interface_change_mac_address (vnm, hw->hw_if_index,
+ sif->persistent_hw_address);
+
+ pool_put (bm->neighbors, sif);
+
+ if ((bif->mode == BOND_MODE_LACP) && bm->lacp_enable_disable)
+ (*bm->lacp_enable_disable) (vm, bif, sif, 0);
+}
+
+int
+bond_delete_if (vlib_main_t * vm, u32 sw_if_index)
+{
+ bond_main_t *bm = &bond_main;
+ vnet_main_t *vnm = vnet_get_main ();
+ bond_if_t *bif;
+ slave_if_t *sif;
+ vnet_hw_interface_t *hw;
+ u32 *sif_sw_if_index;
+
+ hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
+ if (hw == NULL || bond_dev_class.index != hw->dev_class_index)
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+ bif = bond_get_master_by_dev_instance (hw->dev_instance);
+
+ vec_foreach (sif_sw_if_index, bif->slaves)
+ {
+ sif = bond_get_slave_by_sw_if_index (*sif_sw_if_index);
+ if (sif)
+ bond_delete_neighbor (vm, bif, sif);
+ }
+
+ /* bring down the interface */
+ vnet_hw_interface_set_flags (vnm, bif->hw_if_index, 0);
+ vnet_sw_interface_set_flags (vnm, bif->sw_if_index, 0);
+
+ ethernet_delete_interface (vnm, bif->hw_if_index);
+
+ clib_bitmap_free (bif->port_number_bitmap);
+ hash_unset (bm->bond_by_sw_if_index, bif->sw_if_index);
+ memset (bif, 0, sizeof (*bif));
+ pool_put (bm->interfaces, bif);
+
+ return 0;
+}
+
+void
+bond_create_if (vlib_main_t * vm, bond_create_if_args_t * args)
+{
+ bond_main_t *bm = &bond_main;
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_sw_interface_t *sw;
+ bond_if_t *bif;
+
+ if ((args->mode == BOND_MODE_LACP) && bm->lacp_plugin_loaded == 0)
+ {
+ args->rv = VNET_API_ERROR_FEATURE_DISABLED;
+ args->error = clib_error_return (0, "LACP plugin is not loaded");
+ return;
+ }
+ if (args->mode > BOND_MODE_LACP || args->mode < BOND_MODE_ROUND_ROBIN)
+ {
+ args->rv = VNET_API_ERROR_INVALID_ARGUMENT;
+ args->error = clib_error_return (0, "Invalid mode");
+ return;
+ }
+ if (args->lb > BOND_LB_L23)
+ {
+ args->rv = VNET_API_ERROR_INVALID_ARGUMENT;
+ args->error = clib_error_return (0, "Invalid load-balance");
+ return;
+ }
+ pool_get (bm->interfaces, bif);
+ memset (bif, 0, sizeof (*bif));
+ bif->dev_instance = bif - bm->interfaces;
+ bif->lb = args->lb;
+ bif->mode = args->mode;
+
+ // Special load-balance mode used for rr and bc
+ if (bif->mode == BOND_MODE_ROUND_ROBIN)
+ bif->lb = BOND_LB_RR;
+ else if (bif->mode == BOND_MODE_BROADCAST)
+ bif->lb = BOND_LB_BC;
+
+ bif->use_custom_mac = args->hw_addr_set;
+ if (!args->hw_addr_set)
+ {
+ f64 now = vlib_time_now (vm);
+ u32 rnd;
+ rnd = (u32) (now * 1e6);
+ rnd = random_u32 (&rnd);
+
+ memcpy (args->hw_addr + 2, &rnd, sizeof (rnd));
+ args->hw_addr[0] = 2;
+ args->hw_addr[1] = 0xfe;
+ }
+ memcpy (bif->hw_address, args->hw_addr, 6);
+ args->error = ethernet_register_interface
+ (vnm, bond_dev_class.index, bif - bm->interfaces /* device instance */ ,
+ bif->hw_address /* ethernet address */ ,
+ &bif->hw_if_index, 0 /* flag change */ );
+
+ if (args->error)
+ {
+ args->rv = VNET_API_ERROR_INVALID_REGISTRATION;
+ pool_put (bm->interfaces, bif);
+ return;
+ }
+
+ sw = vnet_get_hw_sw_interface (vnm, bif->hw_if_index);
+ bif->sw_if_index = sw->sw_if_index;
+ bif->group = bif->sw_if_index;
+
+ vnet_hw_interface_set_flags (vnm, bif->hw_if_index,
+ VNET_HW_INTERFACE_FLAG_LINK_UP);
+
+ hash_set (bm->bond_by_sw_if_index, bif->sw_if_index, bif->dev_instance);
+
+ // for return
+ args->sw_if_index = bif->sw_if_index;
+}
+
+static clib_error_t *
+bond_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ bond_create_if_args_t args = { 0 };
+ u8 mode_is_set = 0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return clib_error_return (0, "Missing required arguments.");
+
+ args.mode = -1;
+ args.lb = BOND_LB_L2;
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "mode %U", unformat_bond_mode, &args.mode))
+ mode_is_set = 1;
+ else if (((args.mode == BOND_MODE_LACP) || (args.mode == BOND_MODE_XOR))
+ && unformat (line_input, "load-balance %U",
+ unformat_bond_load_balance, &args.lb))
+ ;
+ else if (unformat (line_input, "hw-addr %U",
+ unformat_ethernet_address, args.hw_addr))
+ args.hw_addr_set = 1;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ unformat_free (line_input);
+
+ if (mode_is_set == 0)
+ return clib_error_return (0, "Missing bond mode");
+
+ bond_create_if (vm, &args);
+
+ return args.error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (bond_create_command, static) = {
+ .path = "create bond",
+ .short_help = "create bond mode {round-robin | active-backup | broadcast | "
+ "{lacp | xor} [load-balance { l2 | l23 | l34 }]} [hw-addr <mac-address>]",
+ .function = bond_create_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+bond_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u32 sw_if_index = ~0;
+ vnet_main_t *vnm = vnet_get_main ();
+ int rv;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return clib_error_return (0, "Missing <interface>");
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "sw_if_index %d", &sw_if_index))
+ ;
+ else if (unformat (line_input, "%U", unformat_vnet_sw_interface,
+ vnm, &sw_if_index))
+ ;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ unformat_free (line_input);
+
+ if (sw_if_index == ~0)
+ return clib_error_return (0,
+ "please specify interface name or sw_if_index");
+
+ rv = bond_delete_if (vm, sw_if_index);
+ if (rv == VNET_API_ERROR_INVALID_SW_IF_INDEX)
+ return clib_error_return (0, "not a bond interface");
+ else if (rv != 0)
+ return clib_error_return (0, "error on deleting bond interface");
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (bond_delete__command, static) =
+{
+ .path = "delete bond",
+ .short_help = "delete bond {<interface> | sw_if_index <sw_idx>}",
+ .function = bond_delete_command_fn,
+};
+/* *INDENT-ON* */
+
+void
+bond_enslave (vlib_main_t * vm, bond_enslave_args_t * args)
+{
+ bond_main_t *bm = &bond_main;
+ vnet_main_t *vnm = vnet_get_main ();
+ bond_if_t *bif;
+ slave_if_t *sif;
+ vnet_interface_main_t *im = &vnm->interface_main;
+ vnet_hw_interface_t *hw, *hw2;
+ vnet_sw_interface_t *sw;
+
+ bif = bond_get_master_by_sw_if_index (args->group);
+ if (!bif)
+ {
+ args->rv = VNET_API_ERROR_INVALID_INTERFACE;
+ args->error = clib_error_return (0, "bond interface not found");
+ return;
+ }
+ // make sure the interface is not already enslaved
+ if (bond_get_slave_by_sw_if_index (args->slave))
+ {
+ args->rv = VNET_API_ERROR_VALUE_EXIST;
+ args->error = clib_error_return (0, "interface was already enslaved");
+ return;
+ }
+ hw = vnet_get_sup_hw_interface (vnm, args->slave);
+ if (hw->dev_class_index == bond_dev_class.index)
+ {
+ args->rv = VNET_API_ERROR_INVALID_INTERFACE;
+ args->error =
+ clib_error_return (0, "bond interface cannot be enslaved");
+ return;
+ }
+ pool_get (bm->neighbors, sif);
+ memset (sif, 0, sizeof (*sif));
+ clib_spinlock_init (&sif->lockp);
+ sw = pool_elt_at_index (im->sw_interfaces, args->slave);
+ sif->port_enabled = sw->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP;
+ sif->sw_if_index = sw->sw_if_index;
+ sif->hw_if_index = sw->hw_if_index;
+ sif->packet_template_index = (u8) ~ 0;
+ sif->is_passive = args->is_passive;
+ sif->group = args->group;
+ sif->bif_dev_instance = bif->dev_instance;
+ sif->mode = bif->mode;
+
+ sif->is_long_timeout = args->is_long_timeout;
+ if (args->is_long_timeout)
+ sif->ttl_in_seconds = LACP_LONG_TIMOUT_TIME;
+ else
+ sif->ttl_in_seconds = LACP_SHORT_TIMOUT_TIME;
+
+ hash_set (bm->neighbor_by_sw_if_index, sif->sw_if_index,
+ sif - bm->neighbors);
+ vec_add1 (bif->slaves, sif->sw_if_index);
+
+ hw = vnet_get_sup_hw_interface (vnm, sif->sw_if_index);
+ /* Save the old mac */
+ memcpy (sif->persistent_hw_address, hw->hw_address, 6);
+ if (bif->use_custom_mac)
+ {
+ vnet_hw_interface_change_mac_address (vnm, hw->hw_if_index,
+ bif->hw_address);
+ }
+ else
+ {
+ // bond interface gets the mac address from the first slave
+ if (vec_len (bif->slaves) == 1)
+ {
+ memcpy (bif->hw_address, hw->hw_address, 6);
+ hw2 = vnet_get_sup_hw_interface (vnm, bif->sw_if_index);
+ vnet_hw_interface_change_mac_address (vnm, hw2->hw_if_index,
+ hw->hw_address);
+ }
+ else
+ {
+ // subsequent slaves gets the mac address of the bond interface
+ vnet_hw_interface_change_mac_address (vnm, hw->hw_if_index,
+ bif->hw_address);
+ }
+ }
+
+ if ((bif->mode == BOND_MODE_LACP) && bm->lacp_enable_disable)
+ {
+ (*bm->lacp_enable_disable) (vm, bif, sif, 1);
+ }
+ else
+ {
+ bond_enable_collecting_distributing (vm, sif);
+ }
+
+ args->rv = vnet_feature_enable_disable ("device-input", "bond-input",
+ hw->hw_if_index, 1, 0, 0);
+
+ if (args->rv)
+ {
+ args->error =
+ clib_error_return (0,
+ "Error encountered on input feature arc enable");
+ }
+}
+
+static clib_error_t *
+enslave_interface_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ bond_enslave_args_t args = { 0 };
+ unformat_input_t _line_input, *line_input = &_line_input;
+ vnet_main_t *vnm = vnet_get_main ();
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return clib_error_return (0, "Missing required arguments.");
+
+ args.slave = ~0;
+ args.group = ~0;
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "interface %U",
+ unformat_vnet_sw_interface, vnm, &args.slave))
+ ;
+ else if (unformat (line_input, "to %U", unformat_vnet_sw_interface, vnm,
+ &args.group))
+ ;
+ else if (unformat (line_input, "passive"))
+ args.is_passive = 1;
+ else if (unformat (line_input, "long-timeout"))
+ args.is_long_timeout = 1;
+ else
+ {
+ args.error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ break;
+ }
+ }
+ unformat_free (line_input);
+
+ if (args.error)
+ return args.error;
+ if (args.group == ~0)
+ return clib_error_return (0, "Missing bond interface");
+ if (args.slave == ~0)
+ return clib_error_return (0, "please specify valid interface name");
+
+ bond_enslave (vm, &args);
+
+ return args.error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (enslave_interface_command, static) = {
+ .path = "enslave",
+ .short_help = "enslave interface <interface> to <BondEthernetx> [passive] [long-timeout]",
+ .function = enslave_interface_command_fn,
+};
+/* *INDENT-ON* */
+
+void
+bond_detach_slave (vlib_main_t * vm, bond_detach_slave_args_t * args)
+{
+ bond_if_t *bif;
+ slave_if_t *sif;
+
+ sif = bond_get_slave_by_sw_if_index (args->slave);
+ if (!sif)
+ {
+ args->rv = VNET_API_ERROR_INVALID_INTERFACE;
+ args->error = clib_error_return (0, "interface was not enslaved");
+ return;
+ }
+ bif = bond_get_master_by_dev_instance (sif->bif_dev_instance);
+ bond_delete_neighbor (vm, bif, sif);
+}
+
+static clib_error_t *
+detach_interface_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ bond_detach_slave_args_t args = { 0 };
+ unformat_input_t _line_input, *line_input = &_line_input;
+ vnet_main_t *vnm = vnet_get_main ();
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return clib_error_return (0, "Missing required arguments.");
+
+ args.slave = ~0;
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "interface %U",
+ unformat_vnet_sw_interface, vnm, &args.slave))
+ ;
+ else
+ {
+ args.error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ break;
+ }
+ }
+ unformat_free (line_input);
+
+ if (args.error)
+ return args.error;
+ if (args.slave == ~0)
+ return clib_error_return (0, "please specify valid interface name");
+
+ bond_detach_slave (vm, &args);
+
+ return args.error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (detach_interface_command, static) = {
+ .path = "detach",
+ .short_help = "detach interface <interface>",
+ .function = detach_interface_command_fn,
+};
+/* *INDENT-ON* */
+
+static void
+show_bond (vlib_main_t * vm)
+{
+ bond_main_t *bm = &bond_main;
+ bond_if_t *bif;
+
+ vlib_cli_output (vm, "%-16s %-12s %-12s %-13s %-14s %s",
+ "interface name", "sw_if_index", "mode",
+ "load balance", "active slaves", "slaves");
+
+ /* *INDENT-OFF* */
+ pool_foreach (bif, bm->interfaces,
+ ({
+ vlib_cli_output (vm, "%-16U %-12d %-12U %-13U %-14u %u",
+ format_bond_interface_name, bif->dev_instance,
+ bif->sw_if_index, format_bond_mode, bif->mode,
+ format_bond_load_balance, bif->lb,
+ vec_len (bif->active_slaves), vec_len (bif->slaves));
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+show_bond_details (vlib_main_t * vm)
+{
+ bond_main_t *bm = &bond_main;
+ bond_if_t *bif;
+ u32 *sw_if_index;
+
+ /* *INDENT-OFF* */
+ pool_foreach (bif, bm->interfaces,
+ ({
+ vlib_cli_output (vm, "%U", format_bond_interface_name, bif->dev_instance);
+ vlib_cli_output (vm, " mode: %U",
+ format_bond_mode, bif->mode);
+ vlib_cli_output (vm, " load balance: %U",
+ format_bond_load_balance, bif->lb);
+ if (bif->mode == BOND_MODE_ROUND_ROBIN)
+ vlib_cli_output (vm, " last xmit slave index: %u",
+ bif->lb_rr_last_index);
+ vlib_cli_output (vm, " number of active slaves: %d",
+ vec_len (bif->active_slaves));
+ vec_foreach (sw_if_index, bif->active_slaves)
+ {
+ vlib_cli_output (vm, " %U", format_vnet_sw_if_index_name,
+ vnet_get_main (), *sw_if_index);
+ }
+ vlib_cli_output (vm, " number of slaves: %d", vec_len (bif->slaves));
+ vec_foreach (sw_if_index, bif->slaves)
+ {
+ vlib_cli_output (vm, " %U", format_vnet_sw_if_index_name,
+ vnet_get_main (), *sw_if_index);
+ }
+ vlib_cli_output (vm, " device instance: %d", bif->dev_instance);
+ vlib_cli_output (vm, " sw_if_index: %d", bif->sw_if_index);
+ vlib_cli_output (vm, " hw_if_index: %d", bif->hw_if_index);
+ }));
+ /* *INDENT-ON* */
+}
+
+static clib_error_t *
+show_bond_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u8 details = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "details"))
+ details = 1;
+ else
+ {
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ }
+
+ if (details)
+ show_bond_details (vm);
+ else
+ show_bond (vm);
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_bond_command, static) = {
+ .path = "show bond",
+ .short_help = "show bond [details]",
+ .function = show_bond_fn,
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+bond_cli_init (vlib_main_t * vm)
+{
+ bond_main_t *bm = &bond_main;
+
+ bm->vlib_main = vm;
+ bm->vnet_main = vnet_get_main ();
+ bm->neighbor_by_sw_if_index = hash_create (0, sizeof (uword));
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (bond_cli_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/bonding/device.c b/src/vnet/bonding/device.c
new file mode 100644
index 00000000000..8f9b3a95591
--- /dev/null
+++ b/src/vnet/bonding/device.c
@@ -0,0 +1,610 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#define _GNU_SOURCE
+#include <stdint.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/ip/ip6_hop_by_hop_packet.h>
+#include <vnet/bonding/node.h>
+
+#define foreach_bond_tx_error \
+ _(NONE, "no error") \
+ _(IF_DOWN, "interface down") \
+ _(NO_SLAVE, "no slave")
+
+typedef enum
+{
+#define _(f,s) BOND_TX_ERROR_##f,
+ foreach_bond_tx_error
+#undef _
+ BOND_TX_N_ERROR,
+} bond_tx_error_t;
+
+static char *bond_tx_error_strings[] = {
+#define _(n,s) s,
+ foreach_bond_tx_error
+#undef _
+};
+
+static u8 *
+format_bond_tx_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ bond_packet_trace_t *t = va_arg (*args, bond_packet_trace_t *);
+ vnet_hw_interface_t *hw, *hw1;
+ vnet_main_t *vnm = vnet_get_main ();
+
+ hw = vnet_get_sup_hw_interface (vnm, t->sw_if_index);
+ hw1 = vnet_get_sup_hw_interface (vnm, t->bond_sw_if_index);
+ s = format (s, "src %U, dst %U, %s -> %s",
+ format_ethernet_address, t->ethernet.src_address,
+ format_ethernet_address, t->ethernet.dst_address,
+ hw->name, hw1->name);
+
+ return s;
+}
+
+u8 *
+format_bond_interface_name (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ bond_main_t *bm = &bond_main;
+ bond_if_t *bif = pool_elt_at_index (bm->interfaces, dev_instance);
+
+ s = format (s, "BondEthernet%lu", bif->dev_instance);
+
+ return s;
+}
+
+static __clib_unused clib_error_t *
+bond_subif_add_del_function (vnet_main_t * vnm, u32 hw_if_index,
+ struct vnet_sw_interface_t *st, int is_add)
+{
+ /* Nothing for now */
+ return 0;
+}
+
+static clib_error_t *
+bond_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
+{
+ vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index);
+ uword is_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
+ bond_main_t *bm = &bond_main;
+ bond_if_t *bif = pool_elt_at_index (bm->interfaces, hif->dev_instance);
+
+ bif->admin_up = is_up;
+ if (is_up && vec_len (bif->active_slaves))
+ vnet_hw_interface_set_flags (vnm, bif->hw_if_index,
+ VNET_HW_INTERFACE_FLAG_LINK_UP);
+ return 0;
+}
+
+static inline u32
+bond_load_balance_broadcast (vlib_main_t * vm, vlib_node_runtime_t * node,
+ bond_if_t * bif, vlib_buffer_t * b0)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vlib_buffer_t *c0;
+ int i;
+ u32 *to_next = 0;
+ u32 sw_if_index;
+ vlib_frame_t *f;
+
+
+ for (i = 1; i < vec_len (bif->active_slaves); i++)
+ {
+ sw_if_index = *vec_elt_at_index (bif->active_slaves, i);
+ f = vnet_get_frame_to_sw_interface (vnm, sw_if_index);
+ to_next = vlib_frame_vector_args (f);
+ to_next += f->n_vectors;
+ c0 = vlib_buffer_copy (vm, b0);
+ if (PREDICT_TRUE (c0 != 0))
+ {
+ vnet_buffer (c0)->sw_if_index[VLIB_TX] = sw_if_index;
+ to_next[0] = vlib_get_buffer_index (vm, c0);
+ f->n_vectors++;
+ vnet_put_frame_to_sw_interface (vnm, sw_if_index, f);
+ }
+ }
+
+ return 0;
+}
+
+static inline u32
+bond_load_balance_l2 (vlib_main_t * vm, vlib_node_runtime_t * node,
+ bond_if_t * bif, vlib_buffer_t * b0)
+{
+ ethernet_header_t *eth = (ethernet_header_t *) vlib_buffer_get_current (b0);
+ u32 a = 0, b = 0, c = 0, t1, t2;
+ u16 t11, t22;
+
+ memcpy (&t1, eth->src_address, sizeof (t1));
+ memcpy (&t11, &eth->src_address[4], sizeof (t11));
+ a = t1 ^ t11;
+
+ memcpy (&t2, eth->dst_address, sizeof (t2));
+ memcpy (&t22, &eth->dst_address[4], sizeof (t22));
+ b = t2 ^ t22;
+
+ hash_v3_mix32 (a, b, c);
+ hash_v3_finalize32 (a, b, c);
+
+ return c % vec_len (bif->active_slaves);
+}
+
+static inline u16 *
+bond_locate_ethertype (ethernet_header_t * eth)
+{
+ u16 *ethertype_p;
+ ethernet_vlan_header_t *vlan;
+
+ if (!ethernet_frame_is_tagged (clib_net_to_host_u16 (eth->type)))
+ {
+ ethertype_p = &eth->type;
+ }
+ else
+ {
+ vlan = (void *) (eth + 1);
+ ethertype_p = &vlan->type;
+ if (*ethertype_p == ntohs (ETHERNET_TYPE_VLAN))
+ {
+ vlan++;
+ ethertype_p = &vlan->type;
+ }
+ }
+ return ethertype_p;
+}
+
+static inline u32
+bond_load_balance_l23 (vlib_main_t * vm, vlib_node_runtime_t * node,
+ bond_if_t * bif, vlib_buffer_t * b0)
+{
+ ethernet_header_t *eth = (ethernet_header_t *) vlib_buffer_get_current (b0);
+ u8 ip_version;
+ ip4_header_t *ip4;
+ u16 ethertype, *ethertype_p;
+
+ ethertype_p = bond_locate_ethertype (eth);
+ ethertype = *ethertype_p;
+
+ if ((ethertype != htons (ETHERNET_TYPE_IP4)) &&
+ (ethertype != htons (ETHERNET_TYPE_IP6)))
+ return (bond_load_balance_l2 (vm, node, bif, b0));
+
+ ip4 = (ip4_header_t *) (ethertype_p + 1);
+ ip_version = (ip4->ip_version_and_header_length >> 4);
+
+ if (ip_version == 0x4)
+ {
+ u16 t11, t22;
+ u32 a = 0, b = 0, c = 0, t1, t2;
+
+ memcpy (&t1, eth->src_address, sizeof (t1));
+ memcpy (&t11, &eth->src_address[4], sizeof (t11));
+ a = t1 ^ t11;
+
+ memcpy (&t2, eth->dst_address, sizeof (t2));
+ memcpy (&t22, &eth->dst_address[4], sizeof (t22));
+ b = t2 ^ t22;
+
+ c = ip4->src_address.data_u32 ^ ip4->dst_address.data_u32;
+
+ hash_v3_mix32 (a, b, c);
+ hash_v3_finalize32 (a, b, c);
+
+ return c % vec_len (bif->active_slaves);
+ }
+ else if (ip_version == 0x6)
+ {
+ u64 a, b, c;
+ u64 t1 = 0, t2 = 0;
+ ip6_header_t *ip6 = (ip6_header_t *) (eth + 1);
+
+ memcpy (&t1, eth->src_address, sizeof (eth->src_address));
+ memcpy (&t2, eth->dst_address, sizeof (eth->dst_address));
+ a = t1 ^ t2;
+
+ b = (ip6->src_address.as_u64[0] ^ ip6->src_address.as_u64[1]);
+ c = (ip6->dst_address.as_u64[0] ^ ip6->dst_address.as_u64[1]);
+
+ hash_mix64 (a, b, c);
+ return c % vec_len (bif->active_slaves);
+ }
+ return (bond_load_balance_l2 (vm, node, bif, b0));
+}
+
+static inline u32
+bond_load_balance_l34 (vlib_main_t * vm, vlib_node_runtime_t * node,
+ bond_if_t * bif, vlib_buffer_t * b0)
+{
+ ethernet_header_t *eth = (ethernet_header_t *) vlib_buffer_get_current (b0);
+ u8 ip_version;
+ uword is_tcp_udp = 0;
+ ip4_header_t *ip4;
+ u16 ethertype, *ethertype_p;
+
+ ethertype_p = bond_locate_ethertype (eth);
+ ethertype = *ethertype_p;
+
+ if ((ethertype != htons (ETHERNET_TYPE_IP4)) &&
+ (ethertype != htons (ETHERNET_TYPE_IP6)))
+ return (bond_load_balance_l2 (vm, node, bif, b0));
+
+ ip4 = (ip4_header_t *) (ethertype_p + 1);
+ ip_version = (ip4->ip_version_and_header_length >> 4);
+
+ if (ip_version == 0x4)
+ {
+ u32 a = 0, b = 0, c = 0, t1, t2;
+ tcp_header_t *tcp = (void *) (ip4 + 1);
+ is_tcp_udp = (ip4->protocol == IP_PROTOCOL_TCP) ||
+ (ip4->protocol == IP_PROTOCOL_UDP);
+
+ a = ip4->src_address.data_u32 ^ ip4->dst_address.data_u32;
+
+ t1 = is_tcp_udp ? tcp->src : 0;
+ t2 = is_tcp_udp ? tcp->dst : 0;
+ b = t1 + (t2 << 16);
+
+ hash_v3_mix32 (a, b, c);
+ hash_v3_finalize32 (a, b, c);
+
+ return c % vec_len (bif->active_slaves);
+ }
+ else if (ip_version == 0x6)
+ {
+ u64 a, b, c;
+ u64 t1, t2;
+ ip6_header_t *ip6 = (ip6_header_t *) (eth + 1);
+ tcp_header_t *tcp = (void *) (ip6 + 1);
+
+ if (PREDICT_TRUE ((ip6->protocol == IP_PROTOCOL_TCP) ||
+ (ip6->protocol == IP_PROTOCOL_UDP)))
+ {
+ is_tcp_udp = 1;
+ tcp = (void *) (ip6 + 1);
+ }
+ else if (ip6->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
+ {
+ ip6_hop_by_hop_header_t *hbh =
+ (ip6_hop_by_hop_header_t *) (ip6 + 1);
+ if ((hbh->protocol == IP_PROTOCOL_TCP)
+ || (hbh->protocol == IP_PROTOCOL_UDP))
+ {
+ is_tcp_udp = 1;
+ tcp = (tcp_header_t *) ((u8 *) hbh + ((hbh->length + 1) << 3));
+ }
+ }
+ a = (ip6->src_address.as_u64[0] ^ ip6->src_address.as_u64[1]);
+ b = (ip6->dst_address.as_u64[0] ^ ip6->dst_address.as_u64[1]);
+
+ t1 = is_tcp_udp ? tcp->src : 0;
+ t2 = is_tcp_udp ? tcp->dst : 0;
+ c = (t2 << 16) | t1;
+ hash_mix64 (a, b, c);
+
+ return c % vec_len (bif->active_slaves);
+ }
+
+ return (bond_load_balance_l2 (vm, node, bif, b0));
+}
+
+static inline u32
+bond_load_balance_round_robin (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ bond_if_t * bif, vlib_buffer_t * b0)
+{
+ bif->lb_rr_last_index++;
+ bif->lb_rr_last_index %= vec_len (bif->active_slaves);
+
+ return bif->lb_rr_last_index;
+}
+
+static inline u32
+bond_load_balance_active_backup (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ bond_if_t * bif, vlib_buffer_t * b0)
+{
+ /* First interface is the active, the rest is backup */
+ return 0;
+}
+
+static bond_load_balance_func_t bond_load_balance_table[] = {
+#define _(v,f,s, p) { bond_load_balance_##p },
+ foreach_bond_lb_algo
+#undef _
+};
+
+static uword
+bond_tx_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ vnet_interface_output_runtime_t *rund = (void *) node->runtime_data;
+ bond_main_t *bm = &bond_main;
+ bond_if_t *bif = pool_elt_at_index (bm->interfaces, rund->dev_instance);
+ u32 bi0, bi1, bi2, bi3;
+ vlib_buffer_t *b0, *b1, *b2, *b3;
+ u32 *from = vlib_frame_vector_args (frame);
+ u32 n_left_from;
+ ethernet_header_t *eth;
+ u32 next0 = 0, next1 = 0, next2 = 0, next3 = 0;
+ u32 port, port1, port2, port3;
+ u32 sw_if_index, sw_if_index1, sw_if_index2, sw_if_index3;
+ bond_packet_trace_t *t0;
+ uword n_trace = vlib_get_trace_count (vm, node);
+ u16 thread_index = vlib_get_thread_index ();
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 *to_next, *to_next1, *to_next2, *to_next3;
+ u32 sif_if_index, sif_if_index1, sif_if_index2, sif_if_index3;
+ vlib_frame_t *f, *f1, *f2, *f3;
+
+ if (PREDICT_FALSE (bif->admin_up == 0))
+ {
+ vlib_buffer_free (vm, vlib_frame_args (frame), frame->n_vectors);
+ vlib_increment_simple_counter (vnet_main.interface_main.sw_if_counters +
+ VNET_INTERFACE_COUNTER_DROP,
+ thread_index, bif->sw_if_index,
+ frame->n_vectors);
+ vlib_error_count (vm, node->node_index, BOND_TX_ERROR_IF_DOWN,
+ frame->n_vectors);
+ return frame->n_vectors;
+ }
+
+ if (PREDICT_FALSE (vec_len (bif->active_slaves) == 0))
+ {
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
+ vlib_increment_combined_counter
+ (vnet_main.interface_main.combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_TX, thread_index, bif->sw_if_index,
+ frame->n_vectors, b0->current_length);
+
+ vlib_buffer_free (vm, vlib_frame_args (frame), frame->n_vectors);
+ vlib_increment_simple_counter (vnet_main.interface_main.sw_if_counters +
+ VNET_INTERFACE_COUNTER_DROP,
+ thread_index, bif->sw_if_index,
+ frame->n_vectors);
+ vlib_error_count (vm, node->node_index, BOND_TX_ERROR_NO_SLAVE,
+ frame->n_vectors);
+ return frame->n_vectors;
+ }
+
+ /* Number of buffers / pkts */
+ n_left_from = frame->n_vectors;
+
+ while (n_left_from >= 8)
+ {
+ // Prefetch next iteration
+ {
+ vlib_buffer_t *p4, *p5, *p6, *p7;
+
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
+ p6 = vlib_get_buffer (vm, from[6]);
+ p7 = vlib_get_buffer (vm, from[7]);
+
+ vlib_prefetch_buffer_header (p4, STORE);
+ vlib_prefetch_buffer_header (p5, STORE);
+ vlib_prefetch_buffer_header (p6, STORE);
+ vlib_prefetch_buffer_header (p7, STORE);
+
+ CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+
+ bi0 = from[0];
+ bi1 = from[1];
+ bi2 = from[2];
+ bi3 = from[3];
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+ b2 = vlib_get_buffer (vm, bi2);
+ b3 = vlib_get_buffer (vm, bi3);
+
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b1);
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b2);
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b3);
+
+ sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_TX];
+ sw_if_index2 = vnet_buffer (b2)->sw_if_index[VLIB_TX];
+ sw_if_index3 = vnet_buffer (b3)->sw_if_index[VLIB_TX];
+
+ port =
+ (bond_load_balance_table[bif->lb]).load_balance (vm, node, bif, b0);
+ port1 =
+ (bond_load_balance_table[bif->lb]).load_balance (vm, node, bif, b1);
+ port2 =
+ (bond_load_balance_table[bif->lb]).load_balance (vm, node, bif, b2);
+ port3 =
+ (bond_load_balance_table[bif->lb]).load_balance (vm, node, bif, b3);
+
+ sif_if_index = *vec_elt_at_index (bif->active_slaves, port);
+ sif_if_index1 = *vec_elt_at_index (bif->active_slaves, port1);
+ sif_if_index2 = *vec_elt_at_index (bif->active_slaves, port2);
+ sif_if_index3 = *vec_elt_at_index (bif->active_slaves, port3);
+
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = sif_if_index;
+ vnet_buffer (b1)->sw_if_index[VLIB_TX] = sif_if_index1;
+ vnet_buffer (b2)->sw_if_index[VLIB_TX] = sif_if_index2;
+ vnet_buffer (b3)->sw_if_index[VLIB_TX] = sif_if_index3;
+
+ f = vnet_get_frame_to_sw_interface (vnm, sif_if_index);
+ f1 = vnet_get_frame_to_sw_interface (vnm, sif_if_index1);
+ f2 = vnet_get_frame_to_sw_interface (vnm, sif_if_index2);
+ f3 = vnet_get_frame_to_sw_interface (vnm, sif_if_index3);
+
+ to_next = vlib_frame_vector_args (f);
+ to_next1 = vlib_frame_vector_args (f1);
+ to_next2 = vlib_frame_vector_args (f2);
+ to_next3 = vlib_frame_vector_args (f3);
+
+ to_next += f->n_vectors;
+ to_next1 += f1->n_vectors;
+ to_next2 += f2->n_vectors;
+ to_next3 += f3->n_vectors;
+
+ to_next[0] = vlib_get_buffer_index (vm, b0);
+ to_next1[0] = vlib_get_buffer_index (vm, b1);
+ to_next2[0] = vlib_get_buffer_index (vm, b2);
+ to_next3[0] = vlib_get_buffer_index (vm, b3);
+
+ f->n_vectors++;
+ f1->n_vectors++;
+ f2->n_vectors++;
+ f3->n_vectors++;
+
+ vnet_put_frame_to_sw_interface (vnm, sif_if_index, f);
+ vnet_put_frame_to_sw_interface (vnm, sif_if_index1, f1);
+ vnet_put_frame_to_sw_interface (vnm, sif_if_index2, f2);
+ vnet_put_frame_to_sw_interface (vnm, sif_if_index3, f3);
+
+ if (PREDICT_FALSE (n_trace > 0))
+ {
+ vlib_trace_buffer (vm, node, next0, b0, 0 /* follow_chain */ );
+ vlib_set_trace_count (vm, node, --n_trace);
+ t0 = vlib_add_trace (vm, node, b0, sizeof (*t0));
+ eth = (ethernet_header_t *) vlib_buffer_get_current (b0);
+ t0->ethernet = *eth;
+ t0->sw_if_index = sw_if_index;
+ t0->bond_sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+
+ if (PREDICT_TRUE (n_trace > 0))
+ {
+ vlib_trace_buffer (vm, node, next1, b1, 0 /* follow_chain */ );
+ vlib_set_trace_count (vm, node, --n_trace);
+ t0 = vlib_add_trace (vm, node, b1, sizeof (*t0));
+ eth = (ethernet_header_t *) vlib_buffer_get_current (b1);
+ t0->ethernet = *eth;
+ t0->sw_if_index = sw_if_index1;
+ t0->bond_sw_if_index = vnet_buffer (b1)->sw_if_index[VLIB_TX];
+
+ if (PREDICT_TRUE (n_trace > 0))
+ {
+ vlib_trace_buffer (vm, node, next2, b2,
+ 0 /* follow_chain */ );
+ vlib_set_trace_count (vm, node, --n_trace);
+ t0 = vlib_add_trace (vm, node, b2, sizeof (*t0));
+ eth = (ethernet_header_t *) vlib_buffer_get_current (b2);
+ t0->ethernet = *eth;
+ t0->sw_if_index = sw_if_index2;
+ t0->bond_sw_if_index =
+ vnet_buffer (b2)->sw_if_index[VLIB_TX];
+
+ if (PREDICT_TRUE (n_trace > 0))
+ {
+ vlib_trace_buffer (vm, node, next3, b3,
+ 0 /* follow_chain */ );
+ vlib_set_trace_count (vm, node, --n_trace);
+ t0 = vlib_add_trace (vm, node, b3, sizeof (*t0));
+ eth =
+ (ethernet_header_t *) vlib_buffer_get_current (b3);
+ t0->ethernet = *eth;
+ t0->sw_if_index = sw_if_index3;
+ t0->bond_sw_if_index =
+ vnet_buffer (b3)->sw_if_index[VLIB_TX];
+ }
+ }
+ }
+ }
+
+ from += 4;
+ n_left_from -= 4;
+ }
+
+ while (n_left_from > 0)
+ {
+ // Prefetch next iteration
+ if (n_left_from > 1)
+ {
+ vlib_buffer_t *p2;
+
+ p2 = vlib_get_buffer (vm, from[1]);
+ vlib_prefetch_buffer_header (p2, STORE);
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
+
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
+
+ sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+
+ port =
+ (bond_load_balance_table[bif->lb]).load_balance (vm, node, bif, b0);
+ sif_if_index = *vec_elt_at_index (bif->active_slaves, port);
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = sif_if_index;
+ f = vnet_get_frame_to_sw_interface (vnm, sif_if_index);
+ to_next = vlib_frame_vector_args (f);
+ to_next += f->n_vectors;
+
+ to_next[0] = vlib_get_buffer_index (vm, b0);
+ f->n_vectors++;
+ vnet_put_frame_to_sw_interface (vnm, sif_if_index, f);
+
+ if (PREDICT_FALSE (n_trace > 0))
+ {
+ vlib_trace_buffer (vm, node, next0, b0, 0 /* follow_chain */ );
+ vlib_set_trace_count (vm, node, --n_trace);
+ t0 = vlib_add_trace (vm, node, b0, sizeof (*t0));
+ eth = (ethernet_header_t *) vlib_buffer_get_current (b0);
+ t0->ethernet = *eth;
+ t0->sw_if_index = sw_if_index;
+ t0->bond_sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+ }
+
+ from += 1;
+ n_left_from -= 1;
+ }
+
+ vlib_increment_simple_counter (vnet_main.interface_main.sw_if_counters
+ + VNET_INTERFACE_COUNTER_TX, thread_index,
+ bif->sw_if_index, frame->n_vectors);
+
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VNET_DEVICE_CLASS (bond_dev_class) = {
+ .name = "bond",
+ .tx_function = bond_tx_fn,
+ .tx_function_n_errors = BOND_TX_N_ERROR,
+ .tx_function_error_strings = bond_tx_error_strings,
+ .format_device_name = format_bond_interface_name,
+ .admin_up_down_function = bond_interface_admin_up_down,
+ .subif_add_del_function = bond_subif_add_del_function,
+ .format_tx_trace = format_bond_tx_trace,
+};
+
+VLIB_DEVICE_TX_FUNCTION_MULTIARCH (bond_dev_class, bond_tx_fn)
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/bonding/node.c b/src/vnet/bonding/node.c
new file mode 100644
index 00000000000..4deec829195
--- /dev/null
+++ b/src/vnet/bonding/node.c
@@ -0,0 +1,509 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#define _GNU_SOURCE
+#include <stdint.h>
+#include <vnet/llc/llc.h>
+#include <vnet/snap/snap.h>
+#include <vnet/bonding/node.h>
+
+bond_main_t bond_main;
+
+#define foreach_bond_input_error \
+ _(NONE, "no error") \
+ _(IF_DOWN, "interface down") \
+ _(NO_SLAVE, "no slave") \
+ _(NO_BOND, "no bond interface")\
+ _(PASS_THRU, "pass through")
+
+typedef enum
+{
+#define _(f,s) BOND_INPUT_ERROR_##f,
+ foreach_bond_input_error
+#undef _
+ BOND_INPUT_N_ERROR,
+} bond_input_error_t;
+
+static char *bond_input_error_strings[] = {
+#define _(n,s) s,
+ foreach_bond_input_error
+#undef _
+};
+
+static u8 *
+format_bond_input_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ bond_packet_trace_t *t = va_arg (*args, bond_packet_trace_t *);
+ vnet_hw_interface_t *hw, *hw1;
+ vnet_main_t *vnm = vnet_get_main ();
+
+ hw = vnet_get_sup_hw_interface (vnm, t->sw_if_index);
+ hw1 = vnet_get_sup_hw_interface (vnm, t->bond_sw_if_index);
+ s = format (s, "src %U, dst %U, %s -> %s",
+ format_ethernet_address, t->ethernet.src_address,
+ format_ethernet_address, t->ethernet.dst_address,
+ hw->name, hw1->name);
+
+ return s;
+}
+
+static_always_inline u8
+packet_is_cdp (ethernet_header_t * eth)
+{
+ llc_header_t *llc;
+ snap_header_t *snap;
+
+ llc = (llc_header_t *) (eth + 1);
+ snap = (snap_header_t *) (llc + 1);
+
+ return ((eth->type == htons (ETHERNET_TYPE_CDP)) ||
+ ((llc->src_sap == 0xAA) && (llc->control == 0x03) &&
+ (snap->protocol == htons (0x2000)) &&
+ (snap->oui[0] == 0) && (snap->oui[1] == 0) &&
+ (snap->oui[2] == 0x0C)));
+}
+
+static inline void
+bond_sw_if_index_rewrite (vlib_main_t * vm, vlib_node_runtime_t * node,
+ slave_if_t * sif, ethernet_header_t * eth,
+ vlib_buffer_t * b0)
+{
+ bond_if_t *bif;
+ u16 thread_index = vlib_get_thread_index ();
+ u16 *ethertype_p, ethertype;
+ ethernet_vlan_header_t *vlan;
+
+ if (PREDICT_TRUE (sif != 0))
+ {
+ bif = bond_get_master_by_sw_if_index (sif->group);
+ if (PREDICT_TRUE (bif != 0))
+ {
+ if (PREDICT_TRUE (vec_len (bif->slaves) >= 1))
+ {
+ if (PREDICT_TRUE (bif->admin_up == 1))
+ {
+ if (!ethernet_frame_is_tagged (ntohs (eth->type)))
+ {
+ // Let some layer2 packets pass through.
+ if (PREDICT_TRUE ((eth->type !=
+ htons (ETHERNET_TYPE_SLOW_PROTOCOLS))
+ && !packet_is_cdp (eth)
+ && (eth->type !=
+ htons
+ (ETHERNET_TYPE_802_1_LLDP))))
+ {
+ // Change the physical interface to
+ // bond interface
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] =
+ bif->sw_if_index;
+
+ /* increase rx counters */
+ vlib_increment_simple_counter
+ (vnet_main.interface_main.sw_if_counters +
+ VNET_INTERFACE_COUNTER_RX, thread_index,
+ bif->sw_if_index, 1);
+ }
+ else
+ {
+ vlib_error_count (vm, node->node_index,
+ BOND_INPUT_ERROR_PASS_THRU, 1);
+ }
+ }
+ else
+ {
+ vlan = (void *) (eth + 1);
+ ethertype_p = &vlan->type;
+ if (*ethertype_p == ntohs (ETHERNET_TYPE_VLAN))
+ {
+ vlan++;
+ ethertype_p = &vlan->type;
+ }
+ ethertype = *ethertype_p;
+ if (PREDICT_TRUE ((ethertype !=
+ htons (ETHERNET_TYPE_SLOW_PROTOCOLS))
+ && (ethertype !=
+ htons (ETHERNET_TYPE_CDP))
+ && (ethertype !=
+ htons
+ (ETHERNET_TYPE_802_1_LLDP))))
+ {
+ // Change the physical interface to
+ // bond interface
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] =
+ bif->sw_if_index;
+
+ /* increase rx counters */
+ vlib_increment_simple_counter
+ (vnet_main.interface_main.sw_if_counters +
+ VNET_INTERFACE_COUNTER_RX, thread_index,
+ bif->sw_if_index, 1);
+ }
+ else
+ {
+ vlib_error_count (vm, node->node_index,
+ BOND_INPUT_ERROR_PASS_THRU, 1);
+ }
+ }
+ }
+ else
+ {
+ vlib_error_count (vm, node->node_index,
+ BOND_INPUT_ERROR_IF_DOWN, 1);
+ }
+ }
+ else
+ {
+ vlib_error_count (vm, node->node_index,
+ BOND_INPUT_ERROR_NO_SLAVE, 1);
+ }
+ }
+ else
+ {
+ vlib_error_count (vm, node->node_index,
+ BOND_INPUT_ERROR_NO_BOND, 1);
+ }
+ }
+ else
+ {
+ vlib_error_count (vm, node->node_index, BOND_INPUT_ERROR_NO_SLAVE, 1);
+ }
+
+}
+
+static uword
+bond_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 bi0, bi1, bi2, bi3;
+ vlib_buffer_t *b0, *b1, *b2, *b3;
+ u32 next_index;
+ u32 *from, *to_next, n_left_from, n_left_to_next;
+ ethernet_header_t *eth, *eth1, *eth2, *eth3;
+ u32 next0, next1, next2, next3;
+ bond_packet_trace_t *t0;
+ uword n_trace = vlib_get_trace_count (vm, node);
+ u32 sw_if_index, sw_if_index1, sw_if_index2, sw_if_index3;
+ slave_if_t *sif, *sif1, *sif2, *sif3;
+ u16 thread_index = vlib_get_thread_index ();
+
+ /* Vector of buffer / pkt indices we're supposed to process */
+ from = vlib_frame_vector_args (frame);
+
+ /* Number of buffers / pkts */
+ n_left_from = frame->n_vectors;
+
+ /* Speculatively send the first buffer to the last disposition we used */
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ /* set up to enqueue to our disposition with index = next_index */
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 12 && n_left_to_next >= 4)
+ {
+ // Prefetch next iteration
+ {
+ vlib_buffer_t *b4, *b5, *b6, *b7;
+
+ b4 = vlib_get_buffer (vm, from[4]);
+ b5 = vlib_get_buffer (vm, from[5]);
+ b6 = vlib_get_buffer (vm, from[6]);
+ b7 = vlib_get_buffer (vm, from[7]);
+
+ vlib_prefetch_buffer_header (b4, STORE);
+ vlib_prefetch_buffer_header (b5, STORE);
+ vlib_prefetch_buffer_header (b6, STORE);
+ vlib_prefetch_buffer_header (b7, STORE);
+
+ CLIB_PREFETCH (b4->data, CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (b5->data, CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (b6->data, CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (b7->data, CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+
+ next0 = 0;
+ next1 = 0;
+ next2 = 0;
+ next3 = 0;
+
+ bi0 = from[0];
+ bi1 = from[1];
+ bi2 = from[2];
+ bi3 = from[3];
+
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ to_next[2] = bi2;
+ to_next[3] = bi3;
+
+ from += 4;
+ to_next += 4;
+ n_left_from -= 4;
+ n_left_to_next -= 4;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+ b2 = vlib_get_buffer (vm, bi2);
+ b3 = vlib_get_buffer (vm, bi3);
+
+ vnet_feature_next (vnet_buffer (b0)->sw_if_index[VLIB_RX], &next0,
+ b0);
+ vnet_feature_next (vnet_buffer (b1)->sw_if_index[VLIB_RX], &next1,
+ b1);
+ vnet_feature_next (vnet_buffer (b2)->sw_if_index[VLIB_RX], &next2,
+ b2);
+ vnet_feature_next (vnet_buffer (b3)->sw_if_index[VLIB_RX], &next3,
+ b3);
+
+ eth = (ethernet_header_t *) vlib_buffer_get_current (b0);
+ eth1 = (ethernet_header_t *) vlib_buffer_get_current (b1);
+ eth2 = (ethernet_header_t *) vlib_buffer_get_current (b2);
+ eth3 = (ethernet_header_t *) vlib_buffer_get_current (b3);
+
+ sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+ sw_if_index2 = vnet_buffer (b2)->sw_if_index[VLIB_RX];
+ sw_if_index3 = vnet_buffer (b3)->sw_if_index[VLIB_RX];
+
+ // sw_if_index points to the physical interface
+ sif = bond_get_slave_by_sw_if_index (sw_if_index);
+ sif1 = bond_get_slave_by_sw_if_index (sw_if_index1);
+ sif2 = bond_get_slave_by_sw_if_index (sw_if_index2);
+ sif3 = bond_get_slave_by_sw_if_index (sw_if_index3);
+
+ bond_sw_if_index_rewrite (vm, node, sif, eth, b0);
+ bond_sw_if_index_rewrite (vm, node, sif1, eth1, b1);
+ bond_sw_if_index_rewrite (vm, node, sif2, eth2, b2);
+ bond_sw_if_index_rewrite (vm, node, sif3, eth3, b3);
+
+ if (PREDICT_FALSE (n_trace > 0))
+ {
+ vlib_trace_buffer (vm, node, next0, b0, 0 /* follow_chain */ );
+ vlib_set_trace_count (vm, node, --n_trace);
+ t0 = vlib_add_trace (vm, node, b0, sizeof (*t0));
+ t0->ethernet = *eth;
+ t0->sw_if_index = sw_if_index;
+ t0->bond_sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+
+ if (PREDICT_TRUE (n_trace > 0))
+ {
+ vlib_trace_buffer (vm, node, next1, b1,
+ 0 /* follow_chain */ );
+ vlib_set_trace_count (vm, node, --n_trace);
+ t0 = vlib_add_trace (vm, node, b1, sizeof (*t0));
+ t0->ethernet = *eth1;
+ t0->sw_if_index = sw_if_index1;
+ t0->bond_sw_if_index =
+ vnet_buffer (b1)->sw_if_index[VLIB_RX];
+
+ if (PREDICT_TRUE (n_trace > 0))
+ {
+ vlib_trace_buffer (vm, node, next1, b2,
+ 0 /* follow_chain */ );
+ vlib_set_trace_count (vm, node, --n_trace);
+ t0 = vlib_add_trace (vm, node, b2, sizeof (*t0));
+ t0->ethernet = *eth2;
+ t0->sw_if_index = sw_if_index2;
+ t0->bond_sw_if_index =
+ vnet_buffer (b2)->sw_if_index[VLIB_RX];
+
+ if (PREDICT_TRUE (n_trace > 0))
+ {
+ vlib_trace_buffer (vm, node, next1, b2,
+ 0 /* follow_chain */ );
+ vlib_set_trace_count (vm, node, --n_trace);
+ t0 = vlib_add_trace (vm, node, b3, sizeof (*t0));
+ t0->ethernet = *eth3;
+ t0->sw_if_index = sw_if_index3;
+ t0->bond_sw_if_index =
+ vnet_buffer (b3)->sw_if_index[VLIB_RX];
+ }
+ }
+ }
+ }
+
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b1);
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b2);
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b3);
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x4 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, bi2, bi3, next0, next1,
+ next2, next3);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ // Prefetch next iteration
+ if (n_left_from > 1)
+ {
+ vlib_buffer_t *p2;
+
+ p2 = vlib_get_buffer (vm, from[1]);
+ vlib_prefetch_buffer_header (p2, STORE);
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+
+ next0 = 0;
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ vnet_feature_next (vnet_buffer (b0)->sw_if_index[VLIB_RX], &next0,
+ b0);
+
+ eth = (ethernet_header_t *) vlib_buffer_get_current (b0);
+
+ sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ // sw_if_index points to the physical interface
+ sif = bond_get_slave_by_sw_if_index (sw_if_index);
+ bond_sw_if_index_rewrite (vm, node, sif, eth, b0);
+
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, bond_input_node.index,
+ BOND_INPUT_ERROR_NONE, frame->n_vectors);
+
+ vnet_device_increment_rx_packets (thread_index, frame->n_vectors);
+
+ return frame->n_vectors;
+}
+
+static clib_error_t *
+bond_input_init (vlib_main_t * vm)
+{
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (bond_input_node) = {
+ .function = bond_input_fn,
+ .name = "bond-input",
+ .vector_size = sizeof (u32),
+ .format_buffer = format_ethernet_header_with_length,
+ .format_trace = format_bond_input_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = BOND_INPUT_N_ERROR,
+ .error_strings = bond_input_error_strings,
+ .n_next_nodes = 0,
+ .next_nodes =
+ {
+ [0] = "error-drop"
+ }
+};
+
+VLIB_INIT_FUNCTION (bond_input_init);
+
+VNET_FEATURE_INIT (bond_input, static) =
+{
+ .arc_name = "device-input",
+ .node_name = "bond-input",
+ .runs_before = VNET_FEATURES ("ethernet-input"),
+};
+VLIB_NODE_FUNCTION_MULTIARCH (bond_input_node, bond_input_fn)
+/* *INDENT-ON* */
+
+static clib_error_t *
+bond_sw_interface_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
+{
+ bond_main_t *bm = &bond_main;
+ slave_if_t *sif;
+ vlib_main_t *vm = bm->vlib_main;
+
+ sif = bond_get_slave_by_sw_if_index (sw_if_index);
+ if (sif)
+ {
+ sif->port_enabled = flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP;
+ if (sif->port_enabled == 0)
+ {
+ if (sif->lacp_enabled == 0)
+ {
+ bond_disable_collecting_distributing (vm, sif);
+ }
+ }
+ else
+ {
+ if (sif->lacp_enabled == 0)
+ {
+ bond_enable_collecting_distributing (vm, sif);
+ }
+ }
+ }
+
+ return 0;
+}
+
+VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (bond_sw_interface_up_down);
+
+static clib_error_t *
+bond_hw_interface_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
+{
+ bond_main_t *bm = &bond_main;
+ slave_if_t *sif;
+ vnet_sw_interface_t *sw;
+ vlib_main_t *vm = bm->vlib_main;
+ vnet_interface_main_t *im = &vnm->interface_main;
+
+ sw = pool_elt_at_index (im->sw_interfaces, hw_if_index);
+ sif = bond_get_slave_by_sw_if_index (sw->sw_if_index);
+ if (sif)
+ {
+ if (!(flags & VNET_HW_INTERFACE_FLAG_LINK_UP))
+ {
+ if (sif->lacp_enabled == 0)
+ {
+ bond_disable_collecting_distributing (vm, sif);
+ }
+ }
+ else
+ {
+ if (sif->lacp_enabled == 0)
+ {
+ bond_enable_collecting_distributing (vm, sif);
+ }
+ }
+ }
+
+ return 0;
+}
+
+VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION (bond_hw_interface_up_down);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/bonding/node.h b/src/vnet/bonding/node.h
new file mode 100644
index 00000000000..74f3b1a356a
--- /dev/null
+++ b/src/vnet/bonding/node.h
@@ -0,0 +1,451 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_vnet_bonding_node_h__
+#define __included_vnet_bonding_node_h__
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vppinfra/format.h>
+#include <vppinfra/hash.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/interface.h>
+
+#define LACP_FAST_PERIODIC_TIMER 1.0
+#define LACP_SHORT_TIMOUT_TIME (LACP_FAST_PERIODIC_TIMER * 3)
+#define LACP_SLOW_PERIODIC_TIMER 30.0
+#define LACP_LONG_TIMOUT_TIME (LACP_SLOW_PERIODIC_TIMER * 3)
+
+#ifndef MIN
+#define MIN(x,y) (((x)<(y))?(x):(y))
+#endif
+
+#define foreach_bond_mode \
+ _ (1, ROUND_ROBIN, "round-robin") \
+ _ (2, ACTIVE_BACKUP, "active-backup") \
+ _ (3, XOR, "xor") \
+ _ (4, BROADCAST, "broadcast") \
+ _ (5, LACP, "lacp")
+
+typedef enum
+{
+#define _(v, f, s) BOND_MODE_##f = v,
+ foreach_bond_mode
+#undef _
+} bond_mode_t;
+
+/* configurable load-balances */
+#define foreach_bond_lb \
+ _ (2, L23, "l23", l23) \
+ _ (1, l34 , "l34", l34) \
+ _ (0, L2, "l2", l2)
+
+/* load-balance functions implemented in bond-output */
+#define foreach_bond_lb_algo \
+ _ (0, L2, "l2", l2) \
+ _ (1, l34 , "l34", l34) \
+ _ (2, L23, "l23", l23) \
+ _ (3, RR, "round-robin", round_robin) \
+ _ (4, BC, "broadcast", broadcast) \
+ _ (5, AB, "active-backup", active_backup)
+
+typedef enum
+{
+#define _(v, f, s, p) BOND_LB_##f = v,
+ foreach_bond_lb_algo
+#undef _
+} bond_load_balance_t;
+
+typedef struct
+{
+ u8 hw_addr_set;
+ u8 hw_addr[6];
+ u8 mode;
+ u8 lb;
+ /* return */
+ u32 sw_if_index;
+ int rv;
+ clib_error_t *error;
+} bond_create_if_args_t;
+
+typedef struct
+{
+ /* slave's sw_if_index */
+ u32 slave;
+ /* bond's sw_if_index */
+ u32 group;
+ u8 is_passive;
+ u8 is_long_timeout;
+ /* return */
+ int rv;
+ clib_error_t *error;
+} bond_enslave_args_t;
+
+typedef struct
+{
+ u32 slave;
+ /* return */
+ int rv;
+ clib_error_t *error;
+} bond_detach_slave_args_t;
+
+/** BOND interface details struct */
+typedef struct
+{
+ u32 sw_if_index;
+ u8 interface_name[64];
+ u8 mode;
+ u8 lb;
+ u32 active_slaves;
+ u32 slaves;
+} bond_interface_details_t;
+
+/** slave interface details struct */
+typedef struct
+{
+ u32 sw_if_index;
+ u8 interface_name[64];
+ u8 is_passive;
+ u8 is_long_timeout;
+ u32 active_slaves;
+} slave_interface_details_t;
+
+typedef CLIB_PACKED (struct
+ {
+ u16 system_priority;
+ u8 system[6];
+ u16 key; u16 port_priority; u16 port_number;
+ u8 state;
+ }) lacp_port_info_t;
+
+typedef struct
+{
+ u8 admin_up;
+ u8 mode;
+ u8 lb;
+
+ /* the last slave index for the rr lb */
+ u32 lb_rr_last_index;
+
+ u32 dev_instance;
+ u32 hw_if_index;
+ u32 sw_if_index;
+
+ /* Configured slaves */
+ u32 *slaves;
+
+ /* Slaves that are in DISTRIBUTING state */
+ u32 *active_slaves;
+
+ /* rapidly find an active slave */
+ uword *active_slave_by_sw_if_index;
+
+ lacp_port_info_t partner;
+ lacp_port_info_t actor;
+ u8 individual_aggregator;
+
+ u32 group;
+ uword *port_number_bitmap;
+ u8 use_custom_mac;
+ u8 hw_address[6];
+} bond_if_t;
+
+typedef struct
+{
+ u8 persistent_hw_address[6];
+
+ /* neighbor's vlib software interface index */
+ u32 sw_if_index;
+
+ /* Neighbor time-to-live (usually 3s) */
+ f32 ttl_in_seconds;
+
+ /* 1 = interface is configured with long timeout (60s) */
+ u8 is_long_timeout;
+
+ /* 1 = debug is on; 0 = debug is off */
+ u8 debug;
+
+ /* tx packet template id for this neighbor */
+ u8 packet_template_index;
+
+ /* Info we actually keep about each neighbor */
+
+ /* Jenkins hash optimization: avoid tlv scan, send short keepalive msg */
+ u8 last_packet_signature_valid;
+ uword last_packet_signature;
+
+ /* last received lacp packet, for the J-hash optimization */
+ u8 *last_rx_pkt;
+
+ /* last marker packet */
+ u8 *last_marker_pkt;
+
+ /* neighbor vlib hw_if_index */
+ u32 hw_if_index;
+
+ /* actor does not initiate the protocol exchange */
+ u8 is_passive;
+
+ /* Partner port information */
+ lacp_port_info_t partner;
+ lacp_port_info_t partner_admin;;
+
+ /* Partner port information */
+ lacp_port_info_t actor;
+ lacp_port_info_t actor_admin;
+
+ /* Need To Transmit flag */
+ u8 ntt;
+
+ /* Link has been established and Aggregate Port is operable */
+ u8 port_enabled;
+
+ /* Initialization or reinitialization of the lacp protocol entity */
+ u8 begin;
+
+ /* Aggregation Port is operating the lacp */
+ u8 lacp_enabled;
+
+ /* MUX to indicate to the Selection Logic wait_while_timer expired */
+ u8 ready_n;
+
+ /* Selection Logic indicates al Aggregation Ports attached */
+ u8 ready;
+
+ /* Selection Logic selected an Aggregator */
+ int selected;
+
+ /* RX machine indicates an Aggregation Port in PORT_DISABLED state */
+ u8 port_moved;
+
+ /* timer used to detect whether received protocol information has expired */
+ f64 current_while_timer;
+
+ /* timer used to detect actor churn states */
+ f64 actor_churn_timer;
+
+ /* time last lacpdu was sent */
+ f64 last_lacpdu_time;
+
+ /* timer used to generate periodic transmission */
+ f64 periodic_timer;
+
+ /* timer used to detect partner churn states */
+ f64 partner_churn_timer;
+
+ /* provides hysteresis before performing an aggregation change */
+ f64 wait_while_timer;
+
+ /* Implemention variables, not in the spec */
+ int rx_state;
+ int tx_state;
+ int mux_state;
+ int ptx_state;
+
+ /* actor admin key */
+ u32 group;
+
+ u32 marker_tx_id;
+
+ u32 bif_dev_instance;
+
+ u8 loopback_port;
+
+ /* bond mode */
+ u8 mode;
+
+ clib_spinlock_t lockp;
+} slave_if_t;
+
+typedef void (*lacp_enable_disable_func) (vlib_main_t * vm, bond_if_t * bif,
+ slave_if_t * sif, u8 enable);
+
+typedef struct
+{
+ /* pool of bonding interfaces */
+ bond_if_t *interfaces;
+
+ /* pool of lacp neighbors */
+ slave_if_t *neighbors;
+
+ /* rapidly find a neighbor by vlib software interface index */
+ uword *neighbor_by_sw_if_index;
+
+ /* rapidly find a bond by vlib software interface index */
+ uword *bond_by_sw_if_index;
+
+ /* convenience variables */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+
+ /* lacp plugin is loaded */
+ u8 lacp_plugin_loaded;
+
+ lacp_enable_disable_func lacp_enable_disable;
+} bond_main_t;
+
+/* bond packet trace capture */
+typedef struct
+{
+ ethernet_header_t ethernet;
+ u32 sw_if_index;
+ u32 bond_sw_if_index;
+} bond_packet_trace_t;
+
+typedef u32 (*load_balance_func) (vlib_main_t * vm,
+ vlib_node_runtime_t * node, bond_if_t * bif,
+ vlib_buffer_t * b0);
+
+typedef struct
+{
+ load_balance_func load_balance;
+} bond_load_balance_func_t;
+
+extern vlib_node_registration_t bond_input_node;
+extern vnet_device_class_t bond_dev_class;
+extern bond_main_t bond_main;
+
+void bond_disable_collecting_distributing (vlib_main_t * vm,
+ slave_if_t * sif);
+void bond_enable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif);
+u8 *format_bond_interface_name (u8 * s, va_list * args);
+
+void bond_create_if (vlib_main_t * vm, bond_create_if_args_t * args);
+int bond_delete_if (vlib_main_t * vm, u32 sw_if_index);
+void bond_enslave (vlib_main_t * vm, bond_enslave_args_t * args);
+void bond_detach_slave (vlib_main_t * vm, bond_detach_slave_args_t * args);
+int bond_dump_ifs (bond_interface_details_t ** out_bondids);
+int bond_dump_slave_ifs (slave_interface_details_t ** out_slaveids,
+ u32 bond_sw_if_index);
+
+static inline uword
+unformat_bond_mode (unformat_input_t * input, va_list * args)
+{
+ u8 *r = va_arg (*args, u8 *);
+
+ if (0);
+#define _(v, f, s) else if (unformat (input, s)) *r = BOND_MODE_##f;
+ foreach_bond_mode
+#undef _
+ else
+ return 0;
+
+ return 1;
+}
+
+static inline u8 *
+format_bond_mode (u8 * s, va_list * args)
+{
+ u32 i = va_arg (*args, u32);
+ u8 *t = 0;
+
+ switch (i)
+ {
+#define _(v, f, s) case BOND_MODE_##f: t = (u8 *) s; break;
+ foreach_bond_mode
+#undef _
+ default:
+ return format (s, "unknown");
+ }
+ return format (s, "%s", t);
+}
+
+static inline uword
+unformat_bond_load_balance (unformat_input_t * input, va_list * args)
+{
+ u8 *r = va_arg (*args, u8 *);
+
+ if (0);
+#define _(v, f, s, p) else if (unformat (input, s)) *r = BOND_LB_##f;
+ foreach_bond_lb
+#undef _
+ else
+ return 0;
+
+ return 1;
+}
+
+static inline u8 *
+format_bond_load_balance (u8 * s, va_list * args)
+{
+ u32 i = va_arg (*args, u32);
+ u8 *t = 0;
+
+ switch (i)
+ {
+#define _(v, f, s, p) case BOND_LB_##f: t = (u8 *) s; break;
+ foreach_bond_lb_algo
+#undef _
+ default:
+ return format (s, "unknown");
+ }
+ return format (s, "%s", t);
+}
+
+static inline void
+bond_register_callback (lacp_enable_disable_func func)
+{
+ bond_main_t *bm = &bond_main;
+
+ bm->lacp_plugin_loaded = 1;
+ bm->lacp_enable_disable = func;
+}
+
+static inline bond_if_t *
+bond_get_master_by_sw_if_index (u32 sw_if_index)
+{
+ bond_main_t *bm = &bond_main;
+ uword *p;
+
+ p = hash_get (bm->bond_by_sw_if_index, sw_if_index);
+ if (!p)
+ {
+ return 0;
+ }
+ return pool_elt_at_index (bm->interfaces, p[0]);
+}
+
+static inline bond_if_t *
+bond_get_master_by_dev_instance (u32 dev_instance)
+{
+ bond_main_t *bm = &bond_main;
+
+ return pool_elt_at_index (bm->interfaces, dev_instance);
+}
+
+static inline slave_if_t *
+bond_get_slave_by_sw_if_index (u32 sw_if_index)
+{
+ bond_main_t *bm = &bond_main;
+ slave_if_t *sif = 0;
+ uword *p;
+
+ p = hash_get (bm->neighbor_by_sw_if_index, sw_if_index);
+ if (p)
+ {
+ sif = pool_elt_at_index (bm->neighbors, p[0]);
+ }
+ return sif;
+}
+
+#endif /* __included_vnet_bonding_node_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/vnet_all_api_h.h b/src/vnet/vnet_all_api_h.h
index fea92e7fd27..5c7c4869c91 100644
--- a/src/vnet/vnet_all_api_h.h
+++ b/src/vnet/vnet_all_api_h.h
@@ -29,6 +29,7 @@
#include <vlibmemory/vl_memory_api_h.h>
#endif /* included_from_layer_3 */
+#include <vnet/bonding/bond.api.h>
#include <vnet/devices/af_packet/af_packet.api.h>
#include <vnet/devices/netmap/netmap.api.h>
#include <vnet/devices/virtio/vhost_user.api.h>