/* *------------------------------------------------------------------ * Copyright (c) 2017 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *------------------------------------------------------------------ */ #include #include #include #include #include #include void bond_disable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif) { bond_main_t *bm = &bond_main; bond_if_t *bif; int i; uword p; u8 switching_active = 0; bif = bond_get_master_by_dev_instance (sif->bif_dev_instance); clib_spinlock_lock_if_init (&bif->lockp); vec_foreach_index (i, bif->active_slaves) { p = *vec_elt_at_index (bif->active_slaves, i); if (p == sif->sw_if_index) { if ((bif->mode == BOND_MODE_ACTIVE_BACKUP) && (i == 0) && (vec_len (bif->active_slaves) > 1)) /* deleting the active slave for active-backup */ switching_active = 1; vec_del1 (bif->active_slaves, i); if (sif->lacp_enabled && bif->numa_only) { /* For lacp mode, if we check it is a slave on local numa node, bif->n_numa_slaves should be decreased by 1 becasue the first bif->n_numa_slaves are all slaves on local numa node */ if (i < bif->n_numa_slaves) { bif->n_numa_slaves--; ASSERT (bif->n_numa_slaves >= 0); } } break; } } /* We get a new slave just becoming active */ if (switching_active) vlib_process_signal_event (bm->vlib_main, bond_process_node.index, BOND_SEND_GARP_NA, bif->hw_if_index); clib_spinlock_unlock_if_init (&bif->lockp); } /* * return 1 if s2 is preferred. * return -1 if s1 is preferred. */ static int bond_slave_sort (void *a1, void *a2) { u32 *s1 = a1; u32 *s2 = a2; slave_if_t *sif1 = bond_get_slave_by_sw_if_index (*s1); slave_if_t *sif2 = bond_get_slave_by_sw_if_index (*s2); bond_if_t *bif; ASSERT (sif1); ASSERT (sif2); /* * sort entries according to preference rules: * 1. biggest weight * 2. numa-node * 3. current active slave (to prevent churning) * 4. lowest sw_if_index (for deterministic behavior) * */ if (sif2->weight > sif1->weight) return 1; if (sif2->weight < sif1->weight) return -1; else { if (sif2->is_local_numa > sif1->is_local_numa) return 1; if (sif2->is_local_numa < sif1->is_local_numa) return -1; else { bif = bond_get_master_by_dev_instance (sif1->bif_dev_instance); /* Favor the current active slave to avoid churning */ if (bif->active_slaves[0] == sif2->sw_if_index) return 1; if (bif->active_slaves[0] == sif1->sw_if_index) return -1; /* go for the tiebreaker as the last resort */ if (sif1->sw_if_index > sif2->sw_if_index) return 1; if (sif1->sw_if_index < sif2->sw_if_index) return -1; else ASSERT (0); } } return 0; } static void bond_sort_slaves (bond_if_t * bif) { bond_main_t *bm = &bond_main; u32 old_active = bif->active_slaves[0]; vec_sort_with_function (bif->active_slaves, bond_slave_sort); if (old_active != bif->active_slaves[0]) vlib_process_signal_event (bm->vlib_main, bond_process_node.index, BOND_SEND_GARP_NA, bif->hw_if_index); } void bond_enable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif) { bond_if_t *bif; bond_main_t *bm = &bond_main; vnet_main_t *vnm = vnet_get_main (); vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sif->sw_if_index); int i; uword p; bif = bond_get_master_by_dev_instance (sif->bif_dev_instance); clib_spinlock_lock_if_init (&bif->lockp); vec_foreach_index (i, bif->active_slaves) { p = *vec_elt_at_index (bif->active_slaves, i); if (p == sif->sw_if_index) goto done; } if (sif->lacp_enabled && bif->numa_only && (vm->numa_node == hw->numa_node)) { vec_insert_elts (bif->active_slaves, &sif->sw_if_index, 1, bif->n_numa_slaves); bif->n_numa_slaves++; } else vec_add1 (bif->active_slaves, sif->sw_if_index); sif->is_local_numa = (vm->numa_node == hw->numa_node) ? 1 : 0; if (bif->mode == BOND_MODE_ACTIVE_BACKUP) { if (vec_len (bif->active_slaves) == 1) /* First slave becomes active? */ vlib_process_signal_event (bm->vlib_main, bond_process_node.index, BOND_SEND_GARP_NA, bif->hw_if_index); else bond_sort_slaves (bif); } done: clib_spinlock_unlock_if_init (&bif->lockp); } int bond_dump_ifs (bond_interface_details_t ** out_bondifs) { vnet_main_t *vnm = vnet_get_main (); bond_main_t *bm = &bond_main; bond_if_t *bif; vnet_hw_interface_t *hi; bond_interface_details_t *r_bondifs = NULL; bond_interface_details_t *bondif = NULL; /* *INDENT-OFF* */ pool_foreach (bif, bm->interfaces, vec_add2(r_bondifs, bondif, 1); clib_memset (bondif, 0, sizeof (*bondif)); bondif->id = bif->id; bondif->sw_if_index = bif->sw_if_index; hi = vnet_get_hw_interface (vnm, bif->hw_if_index); clib_memcpy(bondif->interface_name, hi->name, MIN (ARRAY_LEN (bondif->interface_name) - 1, vec_len ((const char *) hi->name))); /* enforce by memset() above */ ASSERT(0 == bondif->interface_name[ARRAY_LEN (bondif->interface_name) - 1]); bondif->mode = bif->mode; bondif->lb = bif->lb; bondif->numa_only = bif->numa_only; bondif->active_slaves = vec_len (bif->active_slaves); bondif->slaves = vec_len (bif->slaves); ); /* *INDENT-ON* */ *out_bondifs = r_bondifs; return 0; } int bond_dump_slave_ifs (slave_interface_details_t ** out_slaveifs, u32 bond_sw_if_index) { vnet_main_t *vnm = vnet_get_main (); bond_if_t *bif; vnet_hw_interface_t *hi; vnet_sw_interface_t *sw; slave_interface_details_t *r_slaveifs = NULL; slave_interface_details_t *slaveif = NULL; u32 *sw_if_index = NULL; slave_if_t *sif; bif = bond_get_master_by_sw_if_index (bond_sw_if_index); if (!bif) return 1; vec_foreach (sw_if_index, bif->slaves) { vec_add2 (r_slaveifs, slaveif, 1); clib_memset (slaveif, 0, sizeof (*slaveif)); sif = bond_get_slave_by_sw_if_index (*sw_if_index); if (sif) { sw = vnet_get_sw_interface (vnm, sif->sw_if_index); hi = vnet_get_hw_interface (vnm, sw->hw_if_index); clib_memcpy (slaveif->interface_name, hi->name, MIN (ARRAY_LEN (slaveif->interface_name) - 1, vec_len ((const char *) hi->name))); /* enforce by memset() above */ ASSERT (0 == slaveif->interface_name[ARRAY_LEN (slaveif->interface_name) - 1]); slaveif->sw_if_index = sif->sw_if_index; slaveif->is_passive = sif->is_passive; slaveif->is_long_timeout = sif->is_long_timeout; slaveif->is_local_numa = sif->is_local_numa; slaveif->weight = sif->weight; } } *out_slaveifs = r_slaveifs; return 0; } /* * Manage secondary mac addresses when attaching/detaching a slave. * If adding, copies any secondary addresses from master to slave * If deleting, deletes the master's secondary addresses from the slave * */ static void bond_slave_add_del_mac_addrs (bond_if_t * bif, u32 sif_sw_if_index, u8 is_add) { vnet_main_t *vnm = vnet_get_main (); ethernet_interface_t *b_ei; mac_address_t *sec_mac; vnet_hw_interface_t *s_hwif; b_ei = ethernet_get_interface (ðernet_main, bif->hw_if_index); if (!b_ei || !b_ei->secondary_addrs) return; s_hwif = vnet_get_sup_hw_interface (vnm, sif_sw_if_index); vec_foreach (sec_mac, b_ei->secondary_addrs) vnet_hw_interface_add_del_mac_address (vnm, s_hwif->hw_if_index, sec_mac->bytes, is_add); } static void bond_delete_neighbor (vlib_main_t * vm, bond_if_t * bif, slave_if_t * sif) { bond_main_t *bm = &bond_main; vnet_main_t *vnm = vnet_get_main (); int i; vnet_hw_interface_t *sif_hw; sif_hw = vnet_get_sup_hw_interface (vnm, sif->sw_if_index); bif->port_number_bitmap = clib_bitmap_set (bif->port_number_bitmap, ntohs (sif->actor_admin.port_number) - 1, 0); bm->slave_by_sw_if_index[sif->sw_if_index] = 0; vec_free (sif->last_marker_pkt); vec_free (sif->last_rx_pkt); vec_foreach_index (i, bif->slaves) { uword p = *vec_elt_at_index (bif->slaves, i); if (p == sif->sw_if_index) { vec_del1 (bif->slaves, i); break; } } bond_disable_collecting_distributing (vm, sif); vnet_feature_enable_disable ("device-input", "bond-input", sif->sw_if_index, 0, 0, 0); /* Put back the old mac */ vnet_hw_interface_change_mac_address (vnm, sif_hw->hw_if_index, sif->persistent_hw_address); /* delete the bond's secondary/virtual mac addrs from the slave */ bond_slave_add_del_mac_addrs (bif, sif->sw_if_index, 0 /* is_add */ ); if ((bif->mode == BOND_MODE_LACP) && bm->lacp_enable_disable) (*bm->lacp_enable_disable) (vm, bif, sif, 0); if (bif->mode == BOND_MODE_LACP) { stat_segment_deregister_state_counter (bm->stats[bif->sw_if_index][sif->sw_if_index].actor_state); stat_segment_deregister_state_counter (bm->stats[bif->sw_if_index][sif->sw_if_index].partner_state); } pool_put (bm->neighbors, sif); } int bond_delete_if (vlib_main_t * vm, u32 sw_if_index) { bond_main_t *bm = &bond_main; vnet_main_t *vnm = vnet_get_main (); bond_if_t *bif; slave_if_t *sif; vnet_hw_interface_t *hw; u32 *sif_sw_if_index; u32 *s_list = 0; hw = vnet_get_sup_hw_interface (vnm, sw_if_index); if (hw == NULL || bond_dev_class.index != hw->dev_class_index) return VNET_API_ERROR_INVALID_SW_IF_INDEX; bif = bond_get_master_by_dev_instance (hw->dev_instance); vec_append (s_list, bif->slaves); vec_foreach (sif_sw_if_index, s_list) { sif = bond_get_slave_by_sw_if_index (*sif_sw_if_index); if (sif) bond_delete_neighbor (vm, bif, sif); } vec_free (s_list); /* bring down the interface */ vnet_hw_interface_set_flags (vnm, bif->hw_if_index, 0); vnet_sw_interface_set_flags (vnm, bif->sw_if_index, 0); ethernet_delete_interface (vnm, bif->hw_if_index); clib_bitmap_free (bif->port_number_bitmap); hash_unset (bm->bond_by_sw_if_index, bif->sw_if_index); hash_unset (bm->id_used, bif->id); clib_memset (bif, 0, sizeof (*bif)); pool_put (bm->interfaces, bif); return 0; }
/*
 *------------------------------------------------------------------
 * Copyright (c) 2017 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *------------------------------------------------------------------
 */

#ifndef _MEMIF_H_
#define _MEMIF_H_

#include <stdint.h>

#ifndef MEMIF_CACHELINE_SIZE
#define MEMIF_CACHELINE_SIZE 64
#endif

#define MEMIF_COOKIE		0x3E31F20
#define MEMIF_VERSION_MAJOR	2
#define MEMIF_VERSION_MINOR	0
#define MEMIF_VERSION		((MEMIF_VERSION_MAJOR << 8) | MEMIF_VERSION_MINOR)

/*
 *  Type definitions
 */

typedef enum memif_msg_type
{
  MEMIF_MSG_TYPE_NONE = 0,
  MEMIF_MSG_TYPE_ACK = 1,
  MEMIF_MSG_TYPE_HELLO = 2,
  MEMIF_MSG_TYPE_INIT = 3,
  MEMIF_MSG_TYPE_ADD_REGION = 4,
  MEMIF_MSG_TYPE_ADD_RING = 5,
  MEMIF_MSG_TYPE_CONNECT = 6,
  MEMIF_MSG_TYPE_CONNECTED = 7,
  MEMIF_MSG_TYPE_DISCONNECT = 8,
} memif_msg_type_t;

typedef enum
{
  MEMIF_RING_S2M = 0,
  MEMIF_RING_M2S = 1
} memif_ring_type_t;

typedef enum
{
  MEMIF_INTERFACE_MODE_ETHERNET = 0,
  MEMIF_INTERFACE_MODE_IP = 1,
  MEMIF_INTERFACE_MODE_PUNT_INJECT = 2,
} memif_interface_mode_t;

typedef uint16_t memif_region_index_t;
typedef uint32_t memif_region_offset_t;
typedef uint64_t memif_region_size_t;
typedef uint16_t memif_ring_index_t;
typedef uint32_t memif_interface_id_t;
typedef uint16_t memif_version_t;
typedef uint8_t memif_log2_ring_size_t;

/*
 *  Socket messages
 */

typedef struct __attribute__ ((packed))
{
  uint8_t name[32];
  memif_version_t min_version;
  memif_version_t max_version;
  memif_region_index_t max_region;
  memif_ring_index_t max_m2s_ring;
  memif_ring_index_t max_s2m_ring;
  memif_log2_ring_size_t max_log2_ring_size;
} memif_msg_hello_t;

typedef struct __attribute__ ((packed))
{
  memif_version_t version;
  memif_interface_id_t id;
  memif_interface_mode_t mode:8;
  uint8_t secret[24];
  uint8_t name[32];
} memif_msg_init_t;

typedef struct __attribute__ ((packed))
{
  memif_region_index_t index;
  memif_region_size_t size;
} memif_msg_add_region_t;

typedef struct __attribute__ ((packed))
{
  uint16_t flags;
#define MEMIF_MSG_ADD_RING_FLAG_S2M	(1 << 0)
  memif_ring_index_t index;
  memif_region_index_t region;
  memif_region_offset_t offset;
  memif_log2_ring_size_t log2_ring_size;
  uint16_t private_hdr_size;	/* used for private metadata */
} memif_msg_add_ring_t;

typedef struct __attribute__ ((packed))
{
  uint8_t if_name[32];
} memif_msg_connect_t;

typedef struct __attribute__ ((packed))
{
  uint8_t if_name[32];
} memif_msg_connected_t;

typedef struct __attribute__ ((packed))
{
  uint32_t code;
  uint8_t string[96];
} memif_msg_disconnect_t;

typedef struct __attribute__ ((packed, aligned (128)))
{
  memif_msg_type_t type:16;
  union
  {
    memif_msg_hello_t hello;
    memif_msg_init_t init;
    memif_msg_add_region_t add_region;
    memif_msg_add_ring_t add_ring;
    memif_msg_connect_t connect;
    memif_msg_connected_t connected;
    memif_msg_disconnect_t disconnect;
  };
} memif_msg_t;

_Static_assert (sizeof (memif_msg_t) == 128,
		"Size of memif_msg_t must be 128");

/*
 *  Ring and Descriptor Layout
 */

typedef