/* * l2_bd.c : layer 2 bridge domain * * Copyright (c) 2013 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /** * @file * @brief Ethernet Bridge Domain. * * Code in this file manages Layer 2 bridge domains. * */ bd_main_t bd_main; /** Init bridge domain if not done already. For feature bitmap, set all bits except ARP termination */ void bd_validate (l2_bridge_domain_t * bd_config) { if (bd_is_valid (bd_config)) return; bd_config->feature_bitmap = ~(L2INPUT_FEAT_ARP_TERM | L2INPUT_FEAT_UU_FWD | L2INPUT_FEAT_ARP_UFWD); bd_config->bvi_sw_if_index = ~0; bd_config->uu_fwd_sw_if_index = ~0; bd_config->members = 0; bd_config->flood_count = 0; bd_config->tun_master_count = 0; bd_config->tun_normal_count = 0; bd_config->no_flood_count = 0; bd_config->mac_by_ip4 = 0; bd_config->mac_by_ip6 = hash_create_mem (0, sizeof (ip6_address_t), sizeof (uword)); } u32 bd_find_index (bd_main_t * bdm, u32 bd_id) { u32 *p = (u32 *) hash_get (bdm->bd_index_by_bd_id, bd_id); if (!p) return ~0; return p[0]; } u32 bd_add_bd_index (bd_main_t * bdm, u32 bd_id) { ASSERT (!hash_get (bdm->bd_index_by_bd_id, bd_id)); u32 rv = clib_bitmap_first_clear (bdm->bd_index_bitmap); /* mark this index taken */ bdm->bd_index_bitmap = clib_bitmap_set (bdm->bd_index_bitmap, rv, 1); hash_set (bdm->bd_index_by_bd_id, bd_id, rv); vec_validate (l2input_main.bd_configs, rv); l2input_main.bd_configs[rv].bd_id = bd_id; l2input_main.bd_configs[rv].learn_limit = l2learn_main.bd_default_learn_limit; l2input_main.bd_configs[rv].learn_count = 0; return rv; } static inline void bd_free_ip_mac_tables (l2_bridge_domain_t * bd) { u64 mac_addr; ip6_address_t *ip6_addr_key; hash_free (bd->mac_by_ip4); /* *INDENT-OFF* */ hash_foreach_mem (ip6_addr_key, mac_addr, bd->mac_by_ip6, ({ clib_mem_free (ip6_addr_key); /* free memory used for ip6 addr key */ })); /* *INDENT-ON* */ hash_free (bd->mac_by_ip6); } static int bd_delete (bd_main_t * bdm, u32 bd_index) { l2_bridge_domain_t *bd = &l2input_main.bd_configs[bd_index]; u32 bd_id = bd->bd_id; /* flush non-static MACs in BD and removed bd_id from hash table */ l2fib_flush_bd_mac (vlib_get_main (), bd_index); hash_unset (bdm->bd_index_by_bd_id, bd_id); /* mark this index clear */ bdm->bd_index_bitmap = clib_bitmap_set (bdm->bd_index_bitmap, bd_index, 0); /* clear BD config for reuse: bd_id to -1 and clear feature_bitmap */ bd->bd_id = ~0; bd->feature_bitmap = 0; bd->learn_limit = 0; bd->learn_count = ~0; /* free BD tag */ vec_free (bd->bd_tag); /* free memory used by BD */ vec_free (bd->members); bd_free_ip_mac_tables (bd); return 0; } static void update_flood_count (l2_bridge_domain_t * bd_config) { bd_config->flood_count = (vec_len (bd_config->members) - (bd_config->tun_master_count ? bd_config->tun_normal_count : 0)); bd_config->flood_count -= bd_config->no_flood_count; } void bd_add_member (l2_bridge_domain_t * bd_config, l2_flood_member_t * member) { u32 ix = 0; vnet_sw_interface_t *sw_if = vnet_get_sw_interface (vnet_get_main (), member->sw_if_index); /* * Add one element to the vector * vector is ordered [ bvi, normal/tun_masters..., tun_normals... no_flood] * When flooding, the bvi interface (if present) must be the last member * processed due to how BVI processing can change the packet. To enable * this order, we make the bvi interface the first in the vector and * flooding walks the vector in reverse. The flood-count determines where * in the member list to start the walk from. */ switch (sw_if->flood_class) { case VNET_FLOOD_CLASS_NO_FLOOD: bd_config->no_flood_count++; ix = vec_len (bd_config->members); break; case VNET_FLOOD_CLASS_BVI: ix = 0; break; case VNET_FLOOD_CLASS_TUNNEL_MASTER: bd_config->tun_master_count++; /* Fall through */ case VNET_FLOOD_CLASS_NORMAL: ix = (vec_len (bd_config->members) - bd_config->tun_normal_count - bd_config->no_flood_count); break; case VNET_FLOOD_CLASS_TUNNEL_NORMAL: ix = (vec_len (bd_config->members) - bd_config->no_flood_count); bd_config->tun_normal_count++; break; } vec_insert_elts (bd_config->members, member, 1, ix); update_flood_count (bd_config); } #define BD_REMOVE_ERROR_OK 0 #define BD_REMOVE_ERROR_NOT_FOUND 1 u32 bd_remove_member (l2_bridge_domain_t * bd_config, u32 sw_if_index) { u32 ix; /* Find and delete the member */ vec_foreach_index (ix, bd_config->members) { l2_flood_member_t *m = vec_elt_at_index (bd_config->members, ix); if (m->sw_if_index == sw_if_index) { vnet_sw_interface_t *sw_if = vnet_get_sw_interface (vnet_get_main (), sw_if_index); if (sw_if->flood_class != VNET_FLOOD_CLASS_NORMAL) { if (sw_if->flood_class == VNET_FLOOD_CLASS_TUNNEL_MASTER) bd_config->tun_master_count--; else if (sw_if->flood_class == VNET_FLOOD_CLASS_TUNNEL_NORMAL) bd_config->tun_normal_count--; else if (sw_if->flood_class == VNET_FLOOD_CLASS_NO_FLOOD) bd_config->no_flood_count--; } vec_delete (bd_config->members, 1, ix); update_flood_count (bd_config); return BD_REMOVE_ERROR_OK; } } return BD_REMOVE_ERROR_NOT_FOUND; } clib_error_t * l2bd_init (vlib_main_t * vm) { bd_main_t *bdm = &bd_main; bdm->bd_index_by_bd_id = hash_create (0, sizeof (uword)); /* * create a placeholder bd with bd_id of 0 and bd_index of 0 with feature set * to packet drop only. Thus, packets received from any L2 interface with * uninitialized bd_index of 0 can be dropped safely. */ u32 bd_index = bd_add_bd_index (bdm, 0); ASSERT (bd_index == 0); l2input_main.bd_configs[0].feature_bitmap = L2INPUT_FEAT_DROP; bdm->vlib_main = vm; return 0; } VLIB_INIT_FUNCTION (l2bd_init); l2_bridge_domain_t * bd_get (u32 bd_index) { if (bd_index < vec_len (l2input_main.bd_configs)) return (vec_elt_at_index (l2input_main.bd_configs, bd_index)); return (NULL); } u32 bd_input_walk (u32 bd_index, bd_input_walk_fn_t fn, void *data) { l2_flood_member_t *member; l2_bridge_domain_t *bd; u32 sw_if_index; sw_if_index = ~0; bd = bd_get (bd_index); ASSERT (bd); vec_foreach (member, bd->members) { if (WALK_STOP == fn (bd_index, member->sw_if_index)) { sw_if_index = member->sw_if_index; break; } } return (sw_if_index); } static void b2_input_recache (u32 bd_index) { bd_input_walk (bd_index, l2input_recache, NULL); } /** Set the learn/forward/flood flags for the bridge domain. Return 0 if ok, non-zero if for an error. */ u32 bd_set_flags (vlib_main_t * vm, u32 bd_index, bd_flags_t flags, u32 enable) { l2_bridge_domain_t *bd_config = l2input_bd_config (bd_index); bd_validate (bd_config); u32 feature_bitmap = 0; if (flags & L2_LEARN) { feature_bitmap |= L2INPUT_FEAT_LEARN; } if (flags & L2_FWD) { feature_bitmap |= L2INPUT_FEAT_FWD; } if (flags & L2_FLOOD) { feature_bitmap |= L2INPUT_FEAT_FLOOD; } if (flags & L2_UU_FLOOD) { feature_bitmap |= L2INPUT_FEAT_UU_FLOOD; } if (flags & L2_ARP_TERM) { feature_bitmap |= L2INPUT_FEAT_ARP_TERM; } if (flags & L2_ARP_UFWD) { feature_bitmap |= L2INPUT_FEAT_ARP_UFWD; } if (enable) { bd_config->feature_bitmap |= feature_bitmap; } else { bd_config->feature_bitmap &= ~feature_bitmap; } b2_input_recache (bd_index); return bd_config->feature_bitmap; } /** Set the mac age for the bridge domain. */ void bd_set_mac_age (vlib_main_t * vm, u32 bd_index, u8 age) { l2_bridge_domain_t *bd_config; int enable = 0; vec_validate (l2input_main.bd_configs, bd_index); bd_config = vec_elt_at_index (l2input_main.bd_configs, bd_index); bd_config->mac_age = age; b2_input_recache (bd_index); /* check if there is at least one bd with mac aging enabled */ vec_foreach (bd_config, l2input_main.bd_configs) enable |= bd_config->bd_id != ~0 && bd_config->mac_age != 0; vlib_process_signal_event (vm, l2fib_mac_age_scanner_process_node.index, enable ? L2_MAC_AGE_PROCESS_EVENT_START : L2_MAC_AGE_PROCESS_EVENT_STOP, 0); } /** Set learn limit for the bridge domain. */ void bd_set_learn_limit (vlib_main_t *vm, u32 bd_index, u32 learn_limit) { l2_bridge_domain_t *bd_config; vec_validate (l2input_main.bd_configs, bd_index); bd_config = vec_elt_at_index (l2input_main.bd_configs, bd_index); bd_config->learn_limit = learn_limit; } /** Set the tag for the bridge domain. */ static void bd_set_bd_tag (vlib_main_t * vm, u32 bd_index, u8 * bd_tag) { u8 *old; l2_bridge_domain_t *bd_config; vec_validate (l2input_main.bd_configs, bd_index); bd_config = vec_elt_at_index (l2input_main.bd_configs, bd_index); old = bd_config->bd_tag; if (bd_tag[0]) { bd_config->bd_tag = format (0, "%s%c", bd_tag, 0); } else { bd_config->bd_tag = NULL; } vec_free (old); } /** Set bridge-domain learn enable/disable. The CLI format is: set bridge-domain learn [disable] */ static clib_error_t * bd_learn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { bd_main_t *bdm = &bd_main; clib_error_t *error = 0; u32 bd_index, bd_id; u32 enable; uword *p; if (!unformat (input, "%d", &bd_id)) { error = clib_error_return (0, "expecting bridge-domain id but got `%U'", format_unformat_error, input); goto done; } if (bd_id == 0) return clib_error_return (0, "No operations on the default bridge domain are supported"); p = hash_get (bdm->bd_index_by_bd_id, bd_id); if (p == 0) return clib_error_return (0, "No such bridge domain %d", bd_id); bd_index = p[0]; enable = 1; if (unformat (input, "disable")) { enable = 0; } /* set the bridge domain flag */ bd_set_flags (vm, bd_index, L2_LEARN, enable); done: return error; } /*? * Layer 2 learning can be enabled and disabled on each * interface and on each bridge-domain. Use this command to * manage bridge-domains. It is enabled by default. * * @cliexpar * Example of how to enable learning (where 200 is the bridge-domain-id): * @cliexcmd{set bridge-domain learn 200} * Example of how to disable learning (where 200 is the bridge-domain-id): * @cliexcmd{set bridge-domain learn 200 disable} ?*/ /* *INDENT-OFF* */ VLIB_CLI_COMMAND (bd_learn_cli, static) = { .path = "set bridge-domain learn", .short_help = "set bridge-domain learn [disable]", .function = bd_learn, }; /* *INDENT-ON* */ static clib_error_t * bd_default_learn_limit (vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd) { l2learn_main_t *l2m = &l2learn_main; clib_error_t *error = 0; u32 learn_limit; if (!unformat (input, "%d", &learn_limit)) { error = clib_error_return ( 0, "expecting per bridge-domain max entry number got`%U'", format_unformat_error, input); goto done; } l2m->bd_default_learn_limit = learn_limit; done: return error; } VLIB_CLI_COMMAND (bd_default_learn_limit_cli, static) = { .path = "set bridge-domain default-learn-limit", .short_help = "set bridge-domain default-learn-limit ", .function = bd_default_learn_limit, }; /** Set bridge-domain forward enable/disable. The CLI format is: set bridge-domain forward [disable] */ static clib_error_t * bd_fwd (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { bd_main_t *bdm = &bd_main; clib_error_t *error = 0; u32 bd_index, bd_id; u32 enable; uword *p; if (!unformat (input, "%d", &bd_id)) { error = clib_error_return (0, "expecting bridge-domain id but got `%U'", format_unformat_error, input); goto done; } if (bd_id == 0) return clib_err
/*
 * Copyright (c) 2019 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include <sys/fcntl.h>
#include <vppinfra/mpcap.h>

/*
 * Unfortunately, the "make test" infra won't work with mapped pcap files.
 * Given enough work [mostly in .py code], one could fix that.
 */

/**
 * @file
 * @brief mapped pcap file support
 *
 * Usage:
 *
 * <code><pre>
 * \#include <vnet/unix/mpcap.h>
 *
 * static mpcap_main_t mpcap = {
 *  .file_name = "/tmp/ip4",
 *  .n_packets_to_capture = 2,
 *  .packet_type = MPCAP_PACKET_TYPE_ip,
 * };
 * </pre></code>
 *
 * To add a buffer:
 *
 *  <code><pre>mpcap_add_buffer (&mpcap, vm, pi0, 128);</pre></code>
 *
 * File will be written after @c n_packets_to_capture
 * or call to mpcap_close
 *
 */

/**
 * @brief Close a mapped pcap file
 * @param mpcap_main_t * pm
 * @return rc - clib_error_t
 *
 */
__clib_export clib_error_t *
mpcap_close (mpcap_main_t * pm)
{
  u64 actual_size = pm->current_va - pm->file_baseva;

  /* Not open? Done... */
  if ((pm->flags & MPCAP_FLAG_INIT_DONE) == 0)
    return 0;

  (void) munmap (pm->file_baseva, pm->max_file_size);
  pm->file_baseva = 0;
  pm->current_va = 0;
  pm->flags &= ~MPCAP_FLAG_INIT_DONE;

  if ((pm->flags & MPCAP_FLAG_WRITE_ENABLE) == 0)
    return 0;

  if (truncate (pm->file_name, actual_size) < 0)
    clib_unix_warning ("setting file size to %llu", actual_size);

  return 0;
}

/**
 * @brief Initialize a mapped pcap file
 * @param mpcap_main_t * pm
 * @return rc - clib_error_t
 *
 */
__clib_export clib_error_t *
mpcap_init (mpcap_main_t * pm)
{
  mpcap_file_header_t *fh;
  u8 zero = 0;
  int fd;

  if (pm->flags & MPCAP_FLAG_INIT_DONE)
    return 0;

  if (!pm->file_name)
    pm->file_name = "/tmp/vppinfra.mpcap";

  if (pm->flags & MPCAP_FLAG_THREAD_SAFE)
    clib_spinlock_init (&pm->lock);

  fd = open (pm->file_name, O_CREAT | O_TRUNC | O_RDWR, 0664);
  if (fd < 0)
    {
      return clib_error_return_unix (0, "failed to create `%s'",
				     pm->file_name);
    }

  if (pm->max_file_size == 0ULL)
    pm->max_file_size = MPCAP_DEFAULT_FILE_SIZE;

  /* Round to a multiple of the page size */
  pm->max_file_size += (u64) clib_mem_get_page_size ();
  pm->max_file_size &= ~(u64) clib_mem_get_page_size ();

  /* Set file size. */
  if (lseek (fd, pm->max_file_size - 1, SEEK_SET) == (off_t) - 1)
    {
      close (fd);
      (void) unlink (pm->file_name);
      return clib_error_return_unix (0, "file size seek");
    }

  if (write (fd, &zero, 1) != 1)
    {
      close (fd);
      (void) unlink (pm->file_name);
      return clib_error_return_unix (0, "file size write");
    }

  pm->file_baseva = mmap (0, pm->max_file_size,
			  PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
  if (pm->file_baseva == (u8 *) MAP_FAILED)
    {
      clib_error_t *error = clib_error_return_unix (0, "mmap");
      close (fd);
      (void) unlink (pm->file_name);
      return error;
    }
  (void) close (fd);

  pm->flags |= MPCAP_FLAG_INIT_DONE | MPCAP_FLAG_WRITE_ENABLE;
  pm->n_packets_captured = 0;
  pm->n_mpcap_data_written = 0;

  /* Initialize file header */
  fh = pm->file_header = (mpcap_file_header_t *) pm->file_baseva;
  pm->current_va = pm->file_baseva + sizeof (*fh);

  fh->magic = 0xa1b2c3d4;
  fh->major_version = 2;
  fh->minor_version = 4;
  fh->time_zone = 0;
  fh->max_packet_size_in_bytes = 1 << 16;
  fh->packet_type = pm->packet_type;
  return 0;
}


/**
 * @brief mmap a mapped pcap file, e.g. to read from another process
 * @param pcap_main_t *pm
 * @return rc - clib_error_t
 */
clib_error_t *
mpcap_map (mpcap_main_t * pm)
{
  clib_error_t *error = 0;
  int fd = -1;
  mpcap_file_header_t *fh;
  mpcap_packet_header_t *ph;
  struct stat statb;
  u64 packets_read = 0;
  u32 min_packet_bytes = ~0;
  u32 max_packet_bytes = 0;

  fd = open (pm->file_name, O_RDONLY);
  if (fd < 0)
    {
      error = clib_error_return_unix (0, "open `%s'", pm->file_name);
      goto done;
    }

  if (fstat (fd, &statb) < 0)
    {
      error = clib_error_return_unix (0, "stat `%s'", pm->file_name);
      goto done;
    }

  if ((statb.st_mode & S_IFREG) == 0)
    {
      error = clib_error_return (0, "'%s' is not a regular file",
				 pm->file_name);
      goto done;
    }

  if (statb.st_size < sizeof (*fh) + sizeof (