aboutsummaryrefslogtreecommitdiffstats
path: root/src/vnet/devices/tap/tap.c
blob: 3739561cc592a031c37fa63ff9986543cebf400b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
.highlight .hll { background-color: #ffffcc }
.highlight .c { color: #888888 } /* Comment */
.highlight .err { color: #a61717; background-color: #e3d2d2 } /* Error */
.highlight .k { color: #008800; font-weight: bold } /* Keyword */
.highlight .ch { color: #888888 } /* Comment.Hashbang */
.highlight .cm { color: #888888 } /* Comment.Multiline */
.highlight .cp { color: #cc0000; font-weight: bold } /* Comment.Preproc */
.highlight .cpf { color: #888888 } /* Comment.PreprocFile */
.highlight .c1 { color: #888888 } /* Comment.Single */
.highlight .cs { color: #cc0000; font-weight: bold; background-color: #fff0f0 } /* Comment.Special */
.highlight .gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */
.highlight .ge { font-style: italic } /* Generic.Emph */
.highlight .gr { color: #aa0000 } /* Generic.Error */
.highlight .gh { color: #333333 } /* Generic.Heading */
.highlight .gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */
.highlight .go { color: #888888 } /* Generic.Output */
.highlight .gp { color: #555555 } /* Generic.Prompt */
.highlight .gs { font-weight: bold } /* Generic.Strong */
.highlight .gu { color: #666666 } /* Generic.Subheading */
.highlight .gt { color: #aa0000 } /* Generic.Traceback */
.highlight .kc { color: #008800; font-weight: bold } /* Keyword.Constant */
.highlight .kd { color: #008800; font-weight: bold } /* Keyword.Declaration */
.highlight .kn { color: #008800; font-weight: bold } /* Keyword.Namespace */
.highlight .kp { color: #008800 } /* Keyword.Pseudo */
.highlight .kr { color: #008800; font-weight: bold } /* Keyword.Reserved */
.highlight .kt { color: #888888; font-weight: bold } /* Keyword.Type */
.highlight .m { color: #0000DD; font-weight: bold } /* Literal.Number */
.highlight .s { color: #dd2200; background-color: #fff0f0 } /* Literal.String */
.highlight .na { color: #336699 } /* Name.Attribute */
.highlight .nb { color: #003388 } /* Name.Builtin */
.highlight .nc { color: #bb0066; font-weight: bold } /* Name.Class */
.highlight .no { color: #003366; font-weight: bold } /* Name.Constant */
.highlight .nd { color: #555555 } /* Name.Decorator */
.highlight .ne { color: #bb0066; font-weight: bold } /* Name.Exception */
.highlight .nf { color: #0066bb; font-weight: bold } /* Name.Function */
.highlight .nl { color: #336699; font-style: italic } /* Name.Label */
.highlight .nn { color: #bb0066; font-weight: bold } /* Name.Namespace */
.highlight .py { color: #336699; font-weight: bold } /* Name.Property */
.highlight .nt { color: #bb0066; font-weight: bold } /* Name.Tag */
.highlight .nv { color: #336699 } /* Name.Variable */
.highlight .ow { color: #008800 } /* Operator.Word */
.highlight .w { color: #bbbbbb } /* Text.Whitespace */
.highlight .mb { color: #0000DD; font-weight: bold } /* Literal.Number.Bin */
.highlight .mf { color: #0000DD; font-weight: bold } /* Literal.Number.Float */
.highlight .mh { color: #0000DD; font-weight: bold } /* Literal.Number.Hex */
.highlight .mi { color: #0000DD; font-weight: bold } /* Literal.Number.Integer */
.highlight .mo { color: #0000DD; font-weight: bold } /* Literal.Number.Oct */
.highlight .sa { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Affix */
.highlight .sb { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Backtick */
.highlight .sc { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Char */
.highlight .dl { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Delimiter */
.highlight .sd { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Doc */
.highlight .s2 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Double */
.highlight .se { color: #0044dd; background-color: #fff0f0 } /* Literal.String.Escape */
.highlight .sh { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Heredoc */
.highlight .si { color: #3333bb; background-color: #fff0f0 } /* Literal.String.Interpol */
.highlight .sx { color: #22bb22; background-color: #f0fff0 } /* Literal.String.Other */
.highlight .sr { color: #008800; background-color: #fff0ff } /* Literal.String.Regex */
.highlight .s1 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Single */
.highlight .ss { color: #aa6600; background-color: #fff0f0 } /* Literal.String.Symbol */
.highlight .bp { color: #003388 } /* Name.Builtin.Pseudo */
.highlight .fm { color: #0066bb; font-weight: bold } /* Name.Function.Magic */
.highlight .vc { color: #336699 } /* Name.Variable.Class */
.highlight .vg { color: #dd7700 } /* Name.Variable.Global */
.highlight .vi { color: #3333bb } /* Name.Variable.Instance */
.highlight .vm { color: #336699 } /* Name.Variable.Magic */
.highlight .il { color: #0000DD; font-weight: bold } /* Literal.Number.Integer.Long */
/*
 * teib.h: next-hop resolution
 *
 * Copyright (c) 2016 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef __TEIB_H__
#define __TEIB_H__

#include <vnet/ip/ip.h>

/**
 * Tunnel Endpoint Information Base.
 *
 * A TEIB entry represents the mapping between a peer on an interface in the overlay
 * and a next-hop address in the underlay.
 *  i.e. there's a multipoint tunnel providing the overlay (henace a peer on
 *   that tunnel) which is reachable via 'tunnel destination' address in the
 *   underlay.
 *
 * Such overlay to underlay mappings might be providied by a protocol like NHRP
 */
typedef struct teib_entry_t_ teib_entry_t;

/** accessors for the opaque struct */
extern u32 teib_entry_get_sw_if_index (const teib_entry_t * ne);
extern u32 teib_entry_get_fib_index (const teib_entry_t * ne);
extern const ip46_address_t *teib_entry_get_peer (const teib_entry_t * ne);
extern const fib_prefix_t *teib_entry_get_nh (const teib_entry_t * ne);
extern u8 *format_teib_entry (u8 * s, va_list * args);

/**
 * Create a new TEIB entry
 */
extern int teib_entry_add (u32 sw_if_index,
			   const ip46_address_t * peer,
			   u32 nh_table_id, const ip46_address_t * nh);

extern int teib_entry_del (u32 sw_if_index, const ip46_address_t * peer);

extern teib_entry_t *teib_entry_find (u32 sw_if_index,
				      const ip46_address_t * peer);
extern teib_entry_t *teib_entry_get (index_t nei);

extern void teib_entry_adj_stack (const teib_entry_t * ne, adj_index_t ai);

typedef walk_rc_t (*teib_walk_cb_t) (index_t nei, void *ctx);

extern void teib_walk (teib_walk_cb_t fn, void *ctx);
extern void teib_walk_itf (u32 sw_if_index, teib_walk_cb_t fn, void *ctx);

/**
 * Notifications for the creation and deletion of TEIB entries
 */
typedef void (*teib_entry_added_t) (const teib_entry_t * ne);
typedef void (*teib_entry_deleted_t) (const teib_entry_t * ne);

typedef struct teib_vft_t_
{
  teib_entry_added_t nv_added;
  teib_entry_deleted_t nv_deleted;
} teib_vft_t;

extern void teib_register (const teib_vft_t * vft);

#endif

/*
 * fd.io coding-style-patch-verification: ON
 *
 * Local Variables:
 * eval: (c-set-style "gnu")
 * End:
 */
>506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611
/*
 *------------------------------------------------------------------
 * Copyright (c) 2017 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *------------------------------------------------------------------
 */

#define _GNU_SOURCE
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <net/if.h>
#include <linux/if_tun.h>
#include <sys/ioctl.h>
#include <linux/virtio_net.h>
#include <linux/vhost.h>
#include <sys/eventfd.h>
#include <sched.h>

#include <linux/netlink.h>
#include <linux/rtnetlink.h>

#include <vlib/vlib.h>
#include <vlib/unix/unix.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/ip/ip4_packet.h>
#include <vnet/ip/ip6_packet.h>
#include <vnet/devices/netlink.h>
#include <vnet/devices/virtio/virtio.h>
#include <vnet/devices/tap/tap.h>

tap_main_t tap_main;

#define _IOCTL(fd,a,...) \
  if (ioctl (fd, a, __VA_ARGS__) < 0) \
    { \
      err = clib_error_return_unix (0, "ioctl(" #a ")"); \
      goto error; \
    }

static u32
virtio_eth_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi,
			u32 flags)
{
  /* nothing for now */
  //TODO On MTU change call vnet_netlink_set_if_mtu
  return 0;
}

static int
open_netns_fd (char *netns)
{
  u8 *s = 0;
  int fd;

  if (strncmp (netns, "pid:", 4) == 0)
    s = format (0, "/proc/%u/ns/net%c", atoi (netns + 4), 0);
  else if (netns[0] == '/')
    s = format (0, "%s%c", netns, 0);
  else
    s = format (0, "/var/run/netns/%s%c", netns, 0);

  fd = open ((char *) s, O_RDONLY);
  vec_free (s);
  return fd;
}

#define TAP_MAX_INSTANCE 1024

void
tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args)
{
  vnet_main_t *vnm = vnet_get_main ();
  vlib_thread_main_t *thm = vlib_get_thread_main ();
  virtio_main_t *vim = &virtio_main;
  tap_main_t *tm = &tap_main;
  vnet_sw_interface_t *sw;
  vnet_hw_interface_t *hw;
  int i;
  int old_netns_fd = -1;
  struct ifreq ifr;
  size_t hdrsz;
  struct vhost_memory *vhost_mem = 0;
  virtio_if_t *vif = 0;
  clib_error_t *err = 0;

  if (args->id != ~0)
    {
      if (clib_bitmap_get (tm->tap_ids, args->id))
	{
	  args->rv = VNET_API_ERROR_INVALID_INTERFACE;
	  args->error = clib_error_return (0, "interface already exists");
	  return;
	}
    }
  else
    {
      args->id = clib_bitmap_first_clear (tm->tap_ids);
    }

  if (args->id > TAP_MAX_INSTANCE)
    {
      args->rv = VNET_API_ERROR_UNSPECIFIED;
      args->error = clib_error_return (0, "cannot find free interface id");
      return;
    }

  clib_memset (&ifr, 0, sizeof (ifr));
  pool_get (vim->interfaces, vif);
  vif->dev_instance = vif - vim->interfaces;
  vif->tap_fd = -1;
  vif->id = args->id;

  if ((vif->fd = open ("/dev/vhost-net", O_RDWR | O_NONBLOCK)) < 0)
    {
      args->rv = VNET_API_ERROR_SYSCALL_ERROR_1;
      args->error = clib_error_return_unix (0, "open '/dev/vhost-net'");
      goto error;
    }

  _IOCTL (vif->fd, VHOST_GET_FEATURES, &vif->remote_features);

  if ((vif->remote_features & VIRTIO_FEATURE (VIRTIO_NET_F_MRG_RXBUF)) == 0)
    {
      args->rv = VNET_API_ERROR_UNSUPPORTED;
      args->error = clib_error_return (0, "vhost-net backend doesn't support "
				       "VIRTIO_NET_F_MRG_RXBUF feature");
      goto error;
    }

  if ((vif->remote_features & VIRTIO_FEATURE (VIRTIO_RING_F_INDIRECT_DESC)) ==
      0)
    {
      args->rv = VNET_API_ERROR_UNSUPPORTED;
      args->error = clib_error_return (0, "vhost-net backend doesn't support "
				       "VIRTIO_RING_F_INDIRECT_DESC feature");
      goto error;
    }

  if ((vif->remote_features & VIRTIO_FEATURE (VIRTIO_F_VERSION_1)) == 0)
    {
      args->rv = VNET_API_ERROR_UNSUPPORTED;
      args->error = clib_error_return (0, "vhost-net backend doesn't support "
				       "VIRTIO_F_VERSION_1 features");
      goto error;
    }

  vif->features |= VIRTIO_FEATURE (VIRTIO_NET_F_MRG_RXBUF);
  vif->features |= VIRTIO_FEATURE (VIRTIO_F_VERSION_1);
  vif->features |= VIRTIO_FEATURE (VIRTIO_RING_F_INDIRECT_DESC);

  virtio_set_net_hdr_size (vif);

  _IOCTL (vif->fd, VHOST_SET_FEATURES, &vif->features);

  if ((vif->tap_fd = open ("/dev/net/tun", O_RDWR | O_NONBLOCK)) < 0)
    {
      args->rv = VNET_API_ERROR_SYSCALL_ERROR_2;
      args->error = clib_error_return_unix (0, "open '/dev/net/tun'");
      goto error;
    }

  ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE | IFF_VNET_HDR;
  _IOCTL (vif->tap_fd, TUNSETIFF, (void *) &ifr);
  vif->ifindex = if_nametoindex (ifr.ifr_ifrn.ifrn_name);

  unsigned int offload = 0;
  hdrsz = sizeof (struct virtio_net_hdr_v1);
  if (args->tap_flags & TAP_FLAG_GSO)
    {
      offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6;
      vif->gso_enabled = 1;
    }
  else
    {
      vif->gso_enabled = 0;
    }

  _IOCTL (vif->tap_fd, TUNSETOFFLOAD, offload);
  _IOCTL (vif->tap_fd, TUNSETVNETHDRSZ, &hdrsz);
  _IOCTL (vif->fd, VHOST_SET_OWNER, 0);

  /* if namespace is specified, all further netlink messages should be excuted
     after we change our net namespace */
  if (args->host_namespace)
    {
      int fd;
      int rc;
      old_netns_fd = open ("/proc/self/ns/net", O_RDONLY);
      if ((fd = open_netns_fd ((char *) args->host_namespace)) == -1)
	{
	  args->rv = VNET_API_ERROR_SYSCALL_ERROR_2;
	  args->error = clib_error_return_unix (0, "open_netns_fd '%s'",
						args->host_namespace);
	  goto error;
	}
      args->error = vnet_netlink_set_link_netns (vif->ifindex, fd,
						 (char *) args->host_if_name);
      if (args->error)
	{
	  args->rv = VNET_API_ERROR_NETLINK_ERROR;
	  goto error;
	}
      rc = setns (fd, CLONE_NEWNET);
      close (fd);
      if (rc == -1)
	{
	  args->rv = VNET_API_ERROR_SYSCALL_ERROR_3;
	  args->error = clib_error_return_unix (0, "setns '%s'",
						args->host_namespace);
	  goto error;
	}
      if ((vif->ifindex = if_nametoindex ((char *) args->host_if_name)) == 0)
	{
	  args->rv = VNET_API_ERROR_SYSCALL_ERROR_3;
	  args->error = clib_error_return_unix (0, "if_nametoindex '%s'",
						args->host_if_name);
	  goto error;
	}
    }
  else
    {
      if (args->host_if_name)
	{
	  args->error = vnet_netlink_set_link_name (vif->ifindex,
						    (char *)
						    args->host_if_name);
	  if (args->error)
	    {
	      args->rv = VNET_API_ERROR_NETLINK_ERROR;
	      goto error;
	    }
	}
    }

  if (!ethernet_mac_address_is_zero (args->host_mac_addr))
    {
      args->error = vnet_netlink_set_link_addr (vif->ifindex,
						args->host_mac_addr);
      if (args->error)
	{
	  args->rv = VNET_API_ERROR_NETLINK_ERROR;
	  goto error;
	}
    }

  if (args->host_bridge)
    {
      args->error = vnet_netlink_set_link_master (vif->ifindex,
						  (char *) args->host_bridge);
      if (args->error)
	{
	  args->rv = VNET_API_ERROR_NETLINK_ERROR;
	  goto error;
	}
    }


  if (args->host_ip4_prefix_len)
    {
      args->error = vnet_netlink_add_ip4_addr (vif->ifindex,
					       &args->host_ip4_addr,
					       args->host_ip4_prefix_len);
      if (args->error)
	{
	  args->rv = VNET_API_ERROR_NETLINK_ERROR;
	  goto error;
	}
    }

  if (args->host_ip6_prefix_len)
    {
      args->error = vnet_netlink_add_ip6_addr (vif->ifindex,
					       &args->host_ip6_addr,
					       args->host_ip6_prefix_len);
      if (args->error)
	{
	  args->rv = VNET_API_ERROR_NETLINK_ERROR;
	  goto error;
	}
    }

  args->error = vnet_netlink_set_link_state (vif->ifindex, 1 /* UP */ );
  if (args->error)
    {
      args->rv = VNET_API_ERROR_NETLINK_ERROR;
      goto error;
    }

  if (args->host_ip4_gw_set)
    {
      args->error = vnet_netlink_add_ip4_route (0, 0, &args->host_ip4_gw);
      if (args->error)
	{
	  args->rv = VNET_API_ERROR_NETLINK_ERROR;
	  goto error;
	}
    }

  if (args->host_ip6_gw_set)
    {
      args->error = vnet_netlink_add_ip6_route (0, 0, &args->host_ip6_gw);
      if (args->error)
	{
	  args->rv = VNET_API_ERROR_NETLINK_ERROR;
	  goto error;
	}
    }

  /* switch back to old net namespace */
  if (args->host_namespace)
    {
      if (setns (old_netns_fd, CLONE_NEWNET) == -1)
	{
	  args->rv = VNET_API_ERROR_SYSCALL_ERROR_2;
	  args->error = clib_error_return_unix (0, "setns '%s'",
						args->host_namespace);
	  goto error;
	}
    }

  /* Set vhost memory table */
  i = sizeof (struct vhost_memory) + sizeof (struct vhost_memory_region);
  vhost_mem = clib_mem_alloc (i);
  clib_memset (vhost_mem, 0, i);
  vhost_mem->nregions = 1;
  vhost_mem->regions[0].memory_size = (1ULL << 47) - 4096;
  _IOCTL (vif->fd, VHOST_SET_MEM_TABLE, vhost_mem);

  if ((args->error = virtio_vring_init (vm, vif, 0, args->rx_ring_sz)))
    {
      args->rv = VNET_API_ERROR_INIT_FAILED;
      goto error;
    }

  if ((args->error = virtio_vring_init (vm, vif, 1, args->tx_ring_sz)))
    {
      args->rv = VNET_API_ERROR_INIT_FAILED;
      goto error;
    }

  if (!args->mac_addr_set)
    {
      f64 now = vlib_time_now (vm);
      u32 rnd;
      rnd = (u32) (now * 1e6);
      rnd = random_u32 (&rnd);

      memcpy (args->mac_addr + 2, &rnd, sizeof (rnd));
      args->mac_addr[0] = 2;
      args->mac_addr[1] = 0xfe;
    }
  vif->rx_ring_sz = args->rx_ring_sz != 0 ? args->rx_ring_sz : 256;
  vif->tx_ring_sz = args->tx_ring_sz != 0 ? args->tx_ring_sz : 256;
  clib_memcpy (vif->mac_addr, args->mac_addr, 6);

  vif->host_if_name = args->host_if_name;
  args->host_if_name = 0;
  vif->net_ns = args->host_namespace;
  args->host_namespace = 0;
  vif->host_bridge = args->host_bridge;
  args->host_bridge = 0;
  clib_memcpy (vif->host_mac_addr, args->host_mac_addr, 6);
  vif->host_ip4_prefix_len = args->host_ip4_prefix_len;
  vif->host_ip6_prefix_len = args->host_ip6_prefix_len;
  if (args->host_ip4_prefix_len)
    clib_memcpy (&vif->host_ip4_addr, &args->host_ip4_addr, 4);
  if (args->host_ip6_prefix_len)
    clib_memcpy (&vif->host_ip6_addr, &args->host_ip6_addr, 16);

  vif->type = VIRTIO_IF_TYPE_TAP;
  args->error = ethernet_register_interface (vnm, virtio_device_class.index,
					     vif->dev_instance,
					     vif->mac_addr,
					     &vif->hw_if_index,
					     virtio_eth_flag_change);
  if (args->error)
    {
      args->rv = VNET_API_ERROR_INVALID_REGISTRATION;
      goto error;
    }

  tm->tap_ids = clib_bitmap_set (tm->tap_ids, vif->id, 1);
  sw = vnet_get_hw_sw_interface (vnm, vif->hw_if_index);
  vif->sw_if_index = sw->sw_if_index;
  args->sw_if_index = vif->sw_if_index;
  hw = vnet_get_hw_interface (vnm, vif->hw_if_index);
  hw->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_INT_MODE;
  if (args->tap_flags & TAP_FLAG_GSO)
    {
      hw->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO;
      vnm->interface_main.gso_interface_count++;
    }
  vnet_hw_interface_set_input_node (vnm, vif->hw_if_index,
				    virtio_input_node.index);
  vnet_hw_interface_assign_rx_thread (vnm, vif->hw_if_index, 0, ~0);
  vnet_hw_interface_set_rx_mode (vnm, vif->hw_if_index, 0,
				 VNET_HW_INTERFACE_RX_MODE_DEFAULT);
  vif->per_interface_next_index = ~0;
  virtio_vring_set_numa_node (vm, vif, 0);
  vif->flags |= VIRTIO_IF_FLAG_ADMIN_UP;
  vnet_hw_interface_set_flags (vnm, vif->hw_if_index,
			       VNET_HW_INTERFACE_FLAG_LINK_UP);
  if (thm->n_vlib_mains > 1)
    clib_spinlock_init (&vif->lockp);
  goto done;

error:
  if (err)
    {
      ASSERT (args->error == 0);
      args->error = err;
      args->rv = VNET_API_ERROR_SYSCALL_ERROR_3;
    }
  if (vif->tap_fd != -1)
    close (vif->tap_fd);
  if (vif->fd != -1)
    close (vif->fd);
  vec_foreach_index (i, vif->vrings) virtio_vring_free (vm, vif, i);
  vec_free (vif->vrings);
  clib_memset (vif, 0, sizeof (virtio_if_t));
  pool_put (vim->interfaces, vif);

done:
  if (vhost_mem)
    clib_mem_free (vhost_mem);
  if (old_netns_fd != -1)
    close (old_netns_fd);
}

int
tap_delete_if (vlib_main_t * vm, u32 sw_if_index)
{
  vnet_main_t *vnm = vnet_get_main ();
  virtio_main_t *mm = &virtio_main;
  tap_main_t *tm = &tap_main;
  int i;
  virtio_if_t *vif;
  vnet_hw_interface_t *hw;

  hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
  if (hw == NULL || virtio_device_class.index != hw->dev_class_index)
    return VNET_API_ERROR_INVALID_SW_IF_INDEX;

  vif = pool_elt_at_index (mm->interfaces, hw->dev_instance);

  if (vif->type != VIRTIO_IF_TYPE_TAP)
    return VNET_API_ERROR_INVALID_INTERFACE;

  /* decrement if this was a GSO interface */
  if (hw->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO)
    vnm->interface_main.gso_interface_count--;

  /* bring down the interface */
  vnet_hw_interface_set_flags (vnm, vif->hw_if_index, 0);
  vnet_sw_interface_set_flags (vnm, vif->sw_if_index, 0);
  vnet_hw_interface_unassign_rx_thread (vnm, vif->hw_if_index, 0);

  ethernet_delete_interface (vnm, vif->hw_if_index);
  vif->hw_if_index = ~0;

  if (vif->tap_fd != -1)
    close (vif->tap_fd);
  if (vif->fd != -1)
    close (vif->fd);

  vec_foreach_index (i, vif->vrings) virtio_vring_free (vm, vif, i);
  vec_free (vif->vrings);

  tm->tap_ids = clib_bitmap_set (tm->tap_ids, vif->id, 0);
  clib_spinlock_free (&vif->lockp);
  clib_memset (vif, 0, sizeof (*vif));
  pool_put (mm->interfaces, vif);

  return 0;
}

int
tap_gso_enable_disable (vlib_main_t * vm, u32 sw_if_index, int enable_disable)
{
  vnet_main_t *vnm = vnet_get_main ();
  virtio_main_t *mm = &virtio_main;
  virtio_if_t *vif;
  vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
  clib_error_t *err = 0;

  if (hw == NULL || virtio_device_class.index != hw->dev_class_index)
    return VNET_API_ERROR_INVALID_SW_IF_INDEX;

  vif = pool_elt_at_index (mm->interfaces, hw->dev_instance);

  const unsigned int gso_on = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6;
  const unsigned int gso_off = 0;
  unsigned int offload = enable_disable ? gso_on : gso_off;
  _IOCTL (vif->tap_fd, TUNSETOFFLOAD, offload);
  vif->gso_enabled = enable_disable ? 1 : 0;
  if (enable_disable)
    {
      if ((hw->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO) == 0)
	{
	  vnm->interface_main.gso_interface_count++;
	  hw->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO;
	}
    }
  else
    {
      if ((hw->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO) != 0)
	{
	  vnm->interface_main.gso_interface_count--;
	  hw->flags &= ~VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO;
	}
    }

error:
  if (err)
    {
      clib_warning ("Error %s gso on sw_if_index %d",
		    enable_disable ? "enabling" : "disabling", sw_if_index);
      return VNET_API_ERROR_SYSCALL_ERROR_3;
    }
  return 0;
}

int
tap_dump_ifs (tap_interface_details_t ** out_tapids)
{
  vnet_main_t *vnm = vnet_get_main ();
  virtio_main_t *mm = &virtio_main;
  virtio_if_t *vif;
  vnet_hw_interface_t *hi;
  tap_interface_details_t *r_tapids = NULL;
  tap_interface_details_t *tapid = NULL;

  /* *INDENT-OFF* */
  pool_foreach (vif, mm->interfaces,
    if (vif->type != VIRTIO_IF_TYPE_TAP)
      continue;
    vec_add2(r_tapids, tapid, 1);
    clib_memset (tapid, 0, sizeof (*tapid));
    tapid->id = vif->id;
    tapid->sw_if_index = vif->sw_if_index;
    hi = vnet_get_hw_interface (vnm, vif->hw_if_index);
    clib_memcpy(tapid->dev_name, hi->name,
                MIN (ARRAY_LEN (tapid->dev_name) - 1,
                     strlen ((const char *) hi->name)));
    tapid->rx_ring_sz = vif->rx_ring_sz;
    tapid->tx_ring_sz = vif->tx_ring_sz;
    clib_memcpy(tapid->host_mac_addr, vif->host_mac_addr, 6);
    if (vif->host_if_name)
      {
        clib_memcpy(tapid->host_if_name, vif->host_if_name,
                    MIN (ARRAY_LEN (tapid->host_if_name) - 1,
                    strlen ((const char *) vif->host_if_name)));
      }
    if (vif->net_ns)
      {
        clib_memcpy(tapid->host_namespace, vif->net_ns,
                    MIN (ARRAY_LEN (tapid->host_namespace) - 1,
                    strlen ((const char *) vif->net_ns)));
      }
    if (vif->host_bridge)
      {
        clib_memcpy(tapid->host_bridge, vif->host_bridge,
                    MIN (ARRAY_LEN (tapid->host_bridge) - 1,
                    strlen ((const char *) vif->host_bridge)));
      }
    if (vif->host_ip4_prefix_len)
      clib_memcpy(tapid->host_ip4_addr, &vif->host_ip4_addr, 4);
    tapid->host_ip4_prefix_len = vif->host_ip4_prefix_len;
    if (vif->host_ip6_prefix_len)
      clib_memcpy(tapid->host_ip6_addr, &vif->host_ip6_addr, 16);
    tapid->host_ip6_prefix_len = vif->host_ip6_prefix_len;
  );
  /* *INDENT-ON* */

  *out_tapids = r_tapids;

  return 0;
}

static clib_error_t *
tap_init (vlib_main_t * vm)
{
  tap_main_t *tm = &tap_main;
  clib_error_t *error = 0;

  tm->log_default = vlib_log_register_class ("tap", 0);
  vlib_log_debug (tm->log_default, "initialized");

  return error;
}

VLIB_INIT_FUNCTION (tap_init);

/*
 * fd.io coding-style-patch-verification: ON
 *
 * Local Variables:
 * eval: (c-set-style "gnu")
 * End:
 */