aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDamjan Marion <damarion@cisco.com>2017-10-13 18:29:53 +0200
committerDamjan Marion <dmarion.lists@gmail.com>2017-11-30 10:06:38 +0000
commit8389fb9112bcf96def69539fa1de13a7a08923f5 (patch)
tree74be33b7a0dac20e91ce6fc2fd0a30a1ffaa1752
parent8de88c03056a3cca1f5eb9dacc53bbb391a407fe (diff)
virtio: fast TAP interfaces with vhost-net backend
Change-Id: Ided667356d5c6fb9648eb34685aabd6b16a598b7 Signed-off-by: Damjan Marion <damarion@cisco.com> Signed-off-by: Steven Luong <sluong@cisco.com>
-rw-r--r--src/vat/api_format.c259
-rw-r--r--src/vnet.am12
-rw-r--r--src/vnet/api_errno.h4
-rw-r--r--src/vnet/devices/virtio/cli.c300
-rw-r--r--src/vnet/devices/virtio/device.c328
-rw-r--r--src/vnet/devices/virtio/node.c302
-rw-r--r--src/vnet/devices/virtio/tap.c361
-rw-r--r--src/vnet/devices/virtio/tap.h56
-rw-r--r--src/vnet/devices/virtio/tapv2.api94
-rw-r--r--src/vnet/devices/virtio/tapv2_api.c222
-rw-r--r--src/vnet/devices/virtio/virtio.c159
-rw-r--r--src/vnet/devices/virtio/virtio.h131
-rw-r--r--src/vnet/vnet_all_api_h.h1
-rw-r--r--src/vpp/api/custom_dump.c44
14 files changed, 2271 insertions, 2 deletions
diff --git a/src/vat/api_format.c b/src/vat/api_format.c
index cfdce0bec2e..2a56423265e 100644
--- a/src/vat/api_format.c
+++ b/src/vat/api_format.c
@@ -1694,6 +1694,74 @@ static void vl_api_tap_delete_reply_t_handler_json
vam->result_ready = 1;
}
+static void
+vl_api_tap_create_v2_reply_t_handler (vl_api_tap_create_v2_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+ if (vam->async_mode)
+ {
+ vam->async_errors += (retval < 0);
+ }
+ else
+ {
+ vam->retval = retval;
+ vam->sw_if_index = ntohl (mp->sw_if_index);
+ vam->result_ready = 1;
+ }
+
+}
+
+static void vl_api_tap_create_v2_reply_t_handler_json
+ (vl_api_tap_create_v2_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t node;
+
+ vat_json_init_object (&node);
+ vat_json_object_add_int (&node, "retval", ntohl (mp->retval));
+ vat_json_object_add_uint (&node, "sw_if_index", ntohl (mp->sw_if_index));
+
+ vat_json_print (vam->ofp, &node);
+ vat_json_free (&node);
+
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+
+}
+
+static void
+vl_api_tap_delete_v2_reply_t_handler (vl_api_tap_delete_v2_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+ if (vam->async_mode)
+ {
+ vam->async_errors += (retval < 0);
+ }
+ else
+ {
+ vam->retval = retval;
+ vam->result_ready = 1;
+ }
+}
+
+static void vl_api_tap_delete_v2_reply_t_handler_json
+ (vl_api_tap_delete_v2_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t node;
+
+ vat_json_init_object (&node);
+ vat_json_object_add_int (&node, "retval", ntohl (mp->retval));
+
+ vat_json_print (vam->ofp, &node);
+ vat_json_free (&node);
+
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+}
+
static void vl_api_mpls_tunnel_add_del_reply_t_handler
(vl_api_mpls_tunnel_add_del_reply_t * mp)
{
@@ -5378,6 +5446,9 @@ _(TAP_CONNECT_REPLY, tap_connect_reply) \
_(TAP_MODIFY_REPLY, tap_modify_reply) \
_(TAP_DELETE_REPLY, tap_delete_reply) \
_(SW_INTERFACE_TAP_DETAILS, sw_interface_tap_details) \
+_(TAP_CREATE_V2_REPLY, tap_create_v2_reply) \
+_(TAP_DELETE_V2_REPLY, tap_delete_v2_reply) \
+_(SW_INTERFACE_TAP_V2_DETAILS, sw_interface_tap_v2_details) \
_(IP_ADD_DEL_ROUTE_REPLY, ip_add_del_route_reply) \
_(IP_TABLE_ADD_DEL_REPLY, ip_table_add_del_reply) \
_(IP_MROUTE_ADD_DEL_REPLY, ip_mroute_add_del_reply) \
@@ -7733,6 +7804,142 @@ api_tap_delete (vat_main_t * vam)
}
static int
+api_tap_create_v2 (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_tap_create_v2_t *mp;
+ u8 mac_address[6];
+ u8 random_mac = 1;
+ u8 name_set = 0;
+ u8 *tap_name;
+ u8 *net_ns;
+ u8 net_ns_set = 0;
+ int ret;
+ int rx_ring_sz = 0, tx_ring_sz = 0;
+
+ memset (mac_address, 0, sizeof (mac_address));
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "hw-addr %U", unformat_ethernet_address, mac_address))
+ {
+ random_mac = 0;
+ }
+ else if (unformat (i, "name %s", &tap_name))
+ name_set = 1;
+ else if (unformat (i, "host-ns %s", &net_ns))
+ net_ns_set = 1;
+ else if (unformat (i, "rx-ring-size %d", &rx_ring_sz))
+ ;
+ else if (unformat (i, "tx-ring-size %d", &tx_ring_sz))
+ ;
+ else
+ break;
+ }
+
+ if (name_set == 0)
+ {
+ errmsg ("missing tap name. ");
+ return -99;
+ }
+ if (vec_len (tap_name) > 63)
+ {
+ errmsg ("tap name too long. ");
+ return -99;
+ }
+ if (vec_len (net_ns) > 63)
+ {
+ errmsg ("host name space too long. ");
+ return -99;
+ }
+ if (!is_pow2 (rx_ring_sz))
+ {
+ errmsg ("rx ring size must be power of 2. ");
+ return -99;
+ }
+ if (rx_ring_sz > 32768)
+ {
+ errmsg ("rx ring size must be 32768 or lower. ");
+ return -99;
+ }
+ if (!is_pow2 (tx_ring_sz))
+ {
+ errmsg ("tx ring size must be power of 2. ");
+ return -99;
+ }
+ if (tx_ring_sz > 32768)
+ {
+ errmsg ("tx ring size must be 32768 or lower. ");
+ return -99;
+ }
+
+ vec_add1 (tap_name, 0);
+ vec_add1 (net_ns, 0);
+
+ /* Construct the API message */
+ M (TAP_CREATE_V2, mp);
+
+ mp->use_random_mac = random_mac;
+ clib_memcpy (mp->mac_address, mac_address, 6);
+ clib_memcpy (mp->tap_name, tap_name, vec_len (tap_name));
+ mp->net_ns_set = net_ns_set;
+ mp->rx_ring_sz = rx_ring_sz;
+ mp->tx_ring_sz = tx_ring_sz;
+ if (net_ns)
+ clib_memcpy (mp->net_ns, net_ns, vec_len (net_ns));
+
+ vec_free (tap_name);
+ vec_free (net_ns);
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static int
+api_tap_delete_v2 (vat_main_t * vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_tap_delete_v2_t *mp;
+ u32 sw_if_index = ~0;
+ u8 sw_if_index_set = 0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else
+ break;
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing vpp interface name. ");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (TAP_DELETE_V2, mp);
+
+ mp->sw_if_index = ntohl (sw_if_index);
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static int
api_ip_table_add_del (vat_main_t * vam)
{
unformat_input_t *i = vam->input;
@@ -12199,6 +12406,53 @@ api_sw_interface_tap_dump (vat_main_t * vam)
return ret;
}
+static void vl_api_sw_interface_tap_v2_details_t_handler
+ (vl_api_sw_interface_tap_v2_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+
+ print (vam->ofp, "%-16s %d",
+ mp->dev_name, clib_net_to_host_u32 (mp->sw_if_index));
+}
+
+static void vl_api_sw_interface_tap_v2_details_t_handler_json
+ (vl_api_sw_interface_tap_v2_details_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ vat_json_node_t *node = NULL;
+
+ if (VAT_JSON_ARRAY != vam->json_tree.type)
+ {
+ ASSERT (VAT_JSON_NONE == vam->json_tree.type);
+ vat_json_init_array (&vam->json_tree);
+ }
+ node = vat_json_array_add (&vam->json_tree);
+
+ vat_json_init_object (node);
+ vat_json_object_add_uint (node, "sw_if_index", ntohl (mp->sw_if_index));
+ vat_json_object_add_string_copy (node, "dev_name", mp->dev_name);
+}
+
+static int
+api_sw_interface_tap_v2_dump (vat_main_t * vam)
+{
+ vl_api_sw_interface_tap_v2_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ int ret;
+
+ print (vam->ofp, "\n%-16s %s", "dev_name", "sw_if_index");
+ /* Get list of tap interfaces */
+ M (SW_INTERFACE_TAP_V2_DUMP, mp);
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ MPING (CONTROL_PING, mp_ping);
+ S (mp_ping);
+
+ W (ret);
+ return ret;
+}
+
static uword unformat_vxlan_decap_next
(unformat_input_t * input, va_list * args)
{
@@ -22375,6 +22629,11 @@ _(tap_modify, \
_(tap_delete, \
"<vpp-if-name> | sw_if_index <id>") \
_(sw_interface_tap_dump, "") \
+_(tap_create_v2, \
+ "name <name> [hw-addr <mac-addr>] [host-ns <name>] [rx-ring-size <num> [tx-ring-size <num>]") \
+_(tap_delete_v2, \
+ "<vpp-if-name> | sw_if_index <id>") \
+_(sw_interface_tap_v2_dump, "") \
_(ip_table_add_del, \
"table-id <n> [ipv6]\n") \
_(ip_add_del_route, \
diff --git a/src/vnet.am b/src/vnet.am
index 72e67dcb298..00a6b231c7b 100644
--- a/src/vnet.am
+++ b/src/vnet.am
@@ -868,13 +868,23 @@ API_FILES += vnet/pg/pg.api
########################################
libvnet_la_SOURCES += \
+ vnet/devices/virtio/cli.c \
+ vnet/devices/virtio/device.c \
+ vnet/devices/virtio/node.c \
+ vnet/devices/virtio/tap.c \
+ vnet/devices/virtio/tapv2_api.c \
vnet/devices/virtio/vhost-user.c \
- vnet/devices/virtio/vhost_user_api.c
+ vnet/devices/virtio/vhost_user_api.c \
+ vnet/devices/virtio/virtio.c
+
nobase_include_HEADERS += \
+ vnet/devices/virtio/virtio.h \
+ vnet/devices/virtio/tapv2.api.h \
vnet/devices/virtio/vhost-user.h \
vnet/devices/virtio/vhost_user.api.h
+API_FILES += vnet/devices/virtio/tapv2.api
API_FILES += vnet/devices/virtio/vhost_user.api
########################################
diff --git a/src/vnet/api_errno.h b/src/vnet/api_errno.h
index e4ba8ddfb95..8c0cc0aba9b 100644
--- a/src/vnet/api_errno.h
+++ b/src/vnet/api_errno.h
@@ -132,7 +132,9 @@ _(NAME_SERVER_NO_ADDRESSES, -139, "No addresses available") \
_(NAME_SERVER_NEXT_SERVER, -140, "Retry with new server") \
_(APP_CONNECT_FILTERED, -141, "Connect was filtered") \
_(ACL_IN_USE_INBOUND, -142, "Inbound ACL in use") \
-_(ACL_IN_USE_OUTBOUND, -143, "Outbound ACL in use")
+_(ACL_IN_USE_OUTBOUND, -143, "Outbound ACL in use") \
+_(NAMESPACE_CREATE, -144, "Failed to create netlink namespace") \
+_(VIRTIO_INIT, -145, "Failed to init virtio ring")
typedef enum
{
diff --git a/src/vnet/devices/virtio/cli.c b/src/vnet/devices/virtio/cli.c
new file mode 100644
index 00000000000..0c1b75f8c4e
--- /dev/null
+++ b/src/vnet/devices/virtio/cli.c
@@ -0,0 +1,300 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+#include <stdint.h>
+#include <net/if.h>
+#include <sys/ioctl.h>
+#include <inttypes.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vnet/ethernet/ethernet.h>
+#include <linux/virtio_net.h>
+#include <linux/vhost.h>
+#include <vnet/devices/virtio/virtio.h>
+#include <vnet/devices/virtio/tap.h>
+
+static clib_error_t *
+tap_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ int rv;
+ tap_create_if_args_t args = { 0 };
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return clib_error_return (0, "Missing name <interface>");
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "name %s", &args.name))
+ ;
+ else if (unformat (line_input, "host-ns %s", &args.net_ns))
+ ;
+ else if (unformat (line_input, "rx-ring-size %d", &args.rx_ring_sz))
+ ;
+ else if (unformat (line_input, "tx-ring-size %d", &args.tx_ring_sz))
+ ;
+ else if (unformat (line_input, "hw-addr %U",
+ unformat_ethernet_address, args.hw_addr))
+ args.hw_addr_set = 1;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ unformat_free (line_input);
+
+ rv = tap_create_if (vm, &args);
+
+ vec_free (args.name);
+
+ if (rv == VNET_API_ERROR_SYSCALL_ERROR_1)
+ return clib_error_return_unix (0, "open '/dev/vhost-net'");
+ else if (rv == VNET_API_ERROR_SYSCALL_ERROR_2)
+ return clib_error_return_unix (0, "open '/dev/net/tun'");
+ else if (rv == VNET_API_ERROR_UNSUPPORTED)
+ return clib_error_return (0, "vhost-net backend doesn't support needed"
+ " features");
+ else if (rv == VNET_API_ERROR_NAMESPACE_CREATE)
+ return clib_error_return (0, "failed to create netlink namespace");
+ else if (rv == VNET_API_ERROR_VIRTIO_INIT)
+ return clib_error_return (0, "failed to init virtio ring");
+ else if (rv == VNET_API_ERROR_INVALID_REGISTRATION)
+ return clib_error_return (0, "failed to register interface");
+ else if (rv != 0)
+ return clib_error_return (0, "error on creating tap interface");
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (tap_create_command, static) = {
+ .path = "create tap",
+ .short_help = "create tap {name <if-name>} [hw-addr <mac-address>]"
+ "[rx-ring-size <size>] [tx-ring-size <size>] [host-ns <netns>]",
+ .function = tap_create_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+tap_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u32 sw_if_index = ~0;
+ vnet_main_t *vnm = vnet_get_main ();
+ int rv;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return clib_error_return (0, "Missing <interface>");
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "sw_if_index %d", &sw_if_index))
+ ;
+ else if (unformat (line_input, "%U", unformat_vnet_sw_interface,
+ vnm, &sw_if_index))
+ ;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ unformat_free (line_input);
+
+ if (sw_if_index == ~0)
+ return clib_error_return (0,
+ "please specify interface name or sw_if_index");
+
+ rv = tap_delete_if (vm, sw_if_index);
+ if (rv == VNET_API_ERROR_INVALID_SW_IF_INDEX)
+ return clib_error_return (0, "not a tap interface");
+ else if (rv != 0)
+ return clib_error_return (0, "error on deleting tap interface");
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (tap_delete__command, static) =
+{
+ .path = "delete tap",
+ .short_help = "delete tap {<interface> | sw_if_index <sw_idx>}",
+ .function = tap_delete_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+tap_show_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ virtio_main_t *mm = &virtio_main;
+ virtio_if_t *vif;
+ vnet_main_t *vnm = vnet_get_main ();
+ int show_descr = 0;
+ clib_error_t *error = 0;
+ u32 hw_if_index, *hw_if_indices = 0;
+ virtio_vring_t *vring;
+ int i, j;
+ struct feat_struct
+ {
+ u8 bit;
+ char *str;
+ };
+ struct feat_struct *feat_entry;
+
+ static struct feat_struct feat_array[] = {
+#define _(s,b) { .str = #s, .bit = b, },
+ foreach_virtio_net_features
+#undef _
+ {.str = NULL}
+ };
+
+ struct feat_struct *flag_entry;
+ static struct feat_struct flags_array[] = {
+#define _(b,e,s) { .bit = b, .str = s, },
+ foreach_virtio_if_flag
+#undef _
+ {.str = NULL}
+ };
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat
+ (input, "%U", unformat_vnet_hw_interface, vnm, &hw_if_index))
+ vec_add1 (hw_if_indices, hw_if_index);
+ else if (unformat (input, "descriptors"))
+ show_descr = 1;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ }
+
+ if (vec_len (hw_if_indices) == 0)
+ {
+ /* *INDENT-OFF* */
+ pool_foreach (vif, mm->interfaces,
+ vec_add1 (hw_if_indices, vif->hw_if_index);
+ );
+ /* *INDENT-ON* */
+ }
+
+ for (hw_if_index = 0; hw_if_index < vec_len (hw_if_indices); hw_if_index++)
+ {
+ vnet_hw_interface_t *hi =
+ vnet_get_hw_interface (vnm, hw_if_indices[hw_if_index]);
+ vif = pool_elt_at_index (mm->interfaces, hi->dev_instance);
+ vlib_cli_output (vm, "interface %U", format_vnet_sw_if_index_name,
+ vnm, vif->sw_if_index);
+ if (vif->name)
+ vlib_cli_output (vm, " name \"%s\"", vif->name);
+ if (vif->net_ns)
+ vlib_cli_output (vm, " host-ns \"%s\"", vif->net_ns);
+ vlib_cli_output (vm, " flags 0x%x", vif->flags);
+ flag_entry = (struct feat_struct *) &flags_array;
+ while (flag_entry->str)
+ {
+ if (vif->flags & (1ULL << flag_entry->bit))
+ vlib_cli_output (vm, " %s (%d)", flag_entry->str,
+ flag_entry->bit);
+ flag_entry++;
+ }
+ vlib_cli_output (vm, " fd %d", vif->fd);
+ vlib_cli_output (vm, " tap-fd %d", vif->tap_fd);
+ vlib_cli_output (vm, " features 0x%lx", vif->features);
+ feat_entry = (struct feat_struct *) &feat_array;
+ while (feat_entry->str)
+ {
+ if (vif->features & (1ULL << feat_entry->bit))
+ vlib_cli_output (vm, " %s (%d)", feat_entry->str,
+ feat_entry->bit);
+ feat_entry++;
+ }
+ vlib_cli_output (vm, " remote-features 0x%lx", vif->remote_features);
+ feat_entry = (struct feat_struct *) &feat_array;
+ while (feat_entry->str)
+ {
+ if (vif->remote_features & (1ULL << feat_entry->bit))
+ vlib_cli_output (vm, " %s (%d)", feat_entry->str,
+ feat_entry->bit);
+ feat_entry++;
+ }
+ vec_foreach_index (i, vif->vrings)
+ {
+ // RX = 0, TX = 1
+ vring = vec_elt_at_index (vif->vrings, i);
+ vlib_cli_output (vm, " Virtqueue (%s)", (i & 1) ? "TX" : "RX");
+ vlib_cli_output (vm, " qsz %d, last_used_idx %d, desc_in_use %d",
+ vring->size, vring->last_used_idx,
+ vring->desc_in_use);
+ vlib_cli_output (vm,
+ " avail.flags 0x%x avail.idx %d used.flags 0x%x used.idx %d",
+ vring->avail->flags, vring->avail->idx,
+ vring->used->flags, vring->used->idx);
+ vlib_cli_output (vm, " kickfd %d, callfd %d", vring->kick_fd,
+ vring->call_fd);
+ if (show_descr)
+ {
+ vlib_cli_output (vm, "\n descriptor table:\n");
+ vlib_cli_output (vm,
+ " id addr len flags next user_addr\n");
+ vlib_cli_output (vm,
+ " ===== ================== ===== ====== ===== ==================\n");
+ vring = vif->vrings;
+ for (j = 0; j < vring->size; j++)
+ {
+ struct vring_desc *desc = &vring->desc[j];
+ vlib_cli_output (vm,
+ " %-5d 0x%016lx %-5d 0x%04x %-5d 0x%016lx\n",
+ j, desc->addr,
+ desc->len,
+ desc->flags, desc->next, desc->addr);
+ }
+ }
+ }
+ }
+done:
+ vec_free (hw_if_indices);
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (tap_show_command, static) = {
+ .path = "show tap",
+ .short_help = "show tap {<interface>] [descriptors]",
+ .function = tap_show_command_fn,
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+tap_cli_init (vlib_main_t * vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (tap_cli_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/devices/virtio/device.c b/src/vnet/devices/virtio/device.c
new file mode 100644
index 00000000000..275a3c74990
--- /dev/null
+++ b/src/vnet/devices/virtio/device.c
@@ -0,0 +1,328 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <linux/virtio_net.h>
+#include <linux/vhost.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/devices/virtio/virtio.h>
+
+#define foreach_virtio_tx_func_error \
+_(NO_FREE_SLOTS, "no free tx slots") \
+_(TRUNC_PACKET, "packet > buffer size -- truncated in tx ring") \
+_(PENDING_MSGS, "pending msgs in tx ring") \
+_(NO_TX_QUEUES, "no tx queues")
+
+typedef enum
+{
+#define _(f,s) TAP_TX_ERROR_##f,
+ foreach_virtio_tx_func_error
+#undef _
+ TAP_TX_N_ERROR,
+} virtio_tx_func_error_t;
+
+static char *virtio_tx_func_error_strings[] = {
+#define _(n,s) s,
+ foreach_virtio_tx_func_error
+#undef _
+};
+
+u8 *
+format_virtio_device_name (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ virtio_main_t *mm = &virtio_main;
+ virtio_if_t *vif = pool_elt_at_index (mm->interfaces, dev_instance);
+
+ if (vif->type == VIRTIO_IF_TYPE_TAP)
+ {
+ s = format (s, "tap-%s", vif->name);
+ }
+ else
+ s = format (s, "virtio%lu", vif->dev_instance);
+
+ return s;
+}
+
+static u8 *
+format_virtio_device (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ int verbose = va_arg (*args, int);
+ u32 indent = format_get_indent (s);
+
+ s = format (s, "VIRTIO interface");
+ if (verbose)
+ {
+ s = format (s, "\n%U instance %u", format_white_space, indent + 2,
+ dev_instance);
+ }
+ return s;
+}
+
+static u8 *
+format_virtio_tx_trace (u8 * s, va_list * args)
+{
+ s = format (s, "Unimplemented...");
+ return s;
+}
+
+static_always_inline void
+virtio_free_used_desc (vlib_main_t * vm, virtio_vring_t * vring)
+{
+ u16 used = vring->desc_in_use;
+ u16 sz = vring->size;
+ u16 mask = sz - 1;
+ u16 last = vring->last_used_idx;
+ u16 n_left = vring->used->idx - last;
+
+ if (n_left == 0)
+ return;
+
+ while (n_left)
+ {
+ struct vring_used_elem *e = &vring->used->ring[last & mask];
+ u16 slot = e->id;
+ struct vring_desc *d = &vring->desc[slot];
+
+ if (PREDICT_FALSE (d->flags & VRING_DESC_F_INDIRECT))
+ {
+ d = uword_to_pointer (d->addr, struct vring_desc *);
+ vec_free (d);
+ }
+
+ vlib_buffer_free (vm, &vring->buffers[slot], 1);
+ used--;
+ last++;
+ n_left--;
+ }
+ vring->desc_in_use = used;
+ vring->last_used_idx = last;
+}
+
+static_always_inline u16
+add_buffer_to_slot (vlib_main_t * vm, virtio_vring_t * vring, u32 bi,
+ u16 avail, u16 next, u16 mask)
+{
+ u16 n_added = 0;
+ const int hdr_sz = sizeof (struct virtio_net_hdr_v1);
+ struct vring_desc *d;
+ d = &vring->desc[next];
+ vlib_buffer_t *b = vlib_get_buffer (vm, bi);
+
+ if (PREDICT_TRUE ((b->flags & VLIB_BUFFER_NEXT_PRESENT) == 0))
+ {
+ d->addr = pointer_to_uword (vlib_buffer_get_current (b)) - hdr_sz;
+ d->len = b->current_length + hdr_sz;
+ d->flags = 0;
+ }
+ else
+ {
+ struct vring_desc *id, *descs = 0;
+
+ /* first buffer in chain */
+ vec_add2_aligned (descs, id, 1, CLIB_CACHE_LINE_BYTES);
+ id->addr = pointer_to_uword (vlib_buffer_get_current (b)) - hdr_sz;
+ id->len = b->current_length + hdr_sz;
+
+ while (b->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ id->flags = VRING_DESC_F_NEXT;
+ id->next = vec_len (descs);
+ vec_add2_aligned (descs, id, 1, CLIB_CACHE_LINE_BYTES);
+ b = vlib_get_buffer (vm, b->next_buffer);
+ id->addr = pointer_to_uword (vlib_buffer_get_current (b));
+ id->len = b->current_length;
+ }
+
+ d->addr = pointer_to_uword (descs);
+ d->len = vec_len (descs) * sizeof (struct vring_desc);
+ d->flags = VRING_DESC_F_INDIRECT;
+ }
+ vring->buffers[next] = bi;
+ vring->avail->ring[avail & mask] = next;
+ n_added++;
+ return n_added;
+}
+
+
+static_always_inline uword
+virtio_interface_tx_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame, virtio_if_t * vif)
+{
+ u8 qid = 0;
+ u16 n_left = frame->n_vectors;
+ virtio_vring_t *vring = vec_elt_at_index (vif->vrings, (qid << 1) + 1);
+ u16 used, next, avail;
+ u16 sz = vring->size;
+ u16 mask = sz - 1;
+ u32 *buffers = vlib_frame_args (frame);
+
+ /* free consumed buffers */
+ virtio_free_used_desc (vm, vring);
+
+ used = vring->desc_in_use;
+ next = vring->desc_next;
+ avail = vring->avail->idx;
+
+ while (n_left && used < sz)
+ {
+ u16 n_added;
+ n_added = add_buffer_to_slot (vm, vring, buffers[0], avail, next, mask);
+ avail += n_added;
+ next = (next + n_added) & mask;
+ used += n_added;
+ buffers++;
+ n_left--;
+ }
+
+ if (n_left != frame->n_vectors)
+ {
+ CLIB_MEMORY_STORE_BARRIER ();
+ vring->avail->idx = avail;
+ vring->desc_next = next;
+ vring->desc_in_use = used;
+ if ((vring->used->flags & VIRTIO_RING_FLAG_MASK_INT) == 0)
+ {
+ u64 x = 1;
+ CLIB_UNUSED (int r) = write (vring->kick_fd, &x, sizeof (x));
+ }
+ }
+
+
+ if (n_left)
+ {
+ vlib_error_count (vm, node->node_index, TAP_TX_ERROR_NO_FREE_SLOTS,
+ n_left);
+ vlib_buffer_free (vm, buffers, n_left);
+ }
+
+ return frame->n_vectors - n_left;
+}
+
+static uword
+virtio_interface_tx (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ virtio_main_t *nm = &virtio_main;
+ vnet_interface_output_runtime_t *rund = (void *) node->runtime_data;
+ virtio_if_t *vif = pool_elt_at_index (nm->interfaces, rund->dev_instance);
+
+ return virtio_interface_tx_inline (vm, node, frame, vif);
+}
+
+static void
+virtio_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index,
+ u32 node_index)
+{
+ virtio_main_t *apm = &virtio_main;
+ vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
+ virtio_if_t *vif = pool_elt_at_index (apm->interfaces, hw->dev_instance);
+
+ /* Shut off redirection */
+ if (node_index == ~0)
+ {
+ vif->per_interface_next_index = node_index;
+ return;
+ }
+
+ vif->per_interface_next_index =
+ vlib_node_add_next (vlib_get_main (), virtio_input_node.index,
+ node_index);
+}
+
+static void
+virtio_clear_hw_interface_counters (u32 instance)
+{
+ /* Nothing for now */
+}
+
+static clib_error_t *
+virtio_interface_rx_mode_change (vnet_main_t * vnm, u32 hw_if_index, u32 qid,
+ vnet_hw_interface_rx_mode mode)
+{
+ virtio_main_t *mm = &virtio_main;
+ vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
+ virtio_if_t *vif = pool_elt_at_index (mm->interfaces, hw->dev_instance);
+ virtio_vring_t *vring = vec_elt_at_index (vif->vrings, qid);
+
+ if (mode == VNET_HW_INTERFACE_RX_MODE_POLLING)
+ vring->avail->flags |= VIRTIO_RING_FLAG_MASK_INT;
+ else
+ vring->avail->flags &= ~VIRTIO_RING_FLAG_MASK_INT;
+
+ return 0;
+}
+
+static clib_error_t *
+virtio_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
+{
+ virtio_main_t *mm = &virtio_main;
+ vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
+ virtio_if_t *vif = pool_elt_at_index (mm->interfaces, hw->dev_instance);
+ static clib_error_t *error = 0;
+
+ if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
+ vif->flags |= VIRTIO_IF_FLAG_ADMIN_UP;
+ else
+ vif->flags &= ~VIRTIO_IF_FLAG_ADMIN_UP;
+
+ return error;
+ return 0;
+}
+
+static clib_error_t *
+virtio_subif_add_del_function (vnet_main_t * vnm,
+ u32 hw_if_index,
+ struct vnet_sw_interface_t *st, int is_add)
+{
+ /* Nothing for now */
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VNET_DEVICE_CLASS (virtio_device_class) = {
+ .name = "virtio",
+ .tx_function = virtio_interface_tx,
+ .format_device_name = format_virtio_device_name,
+ .format_device = format_virtio_device,
+ .format_tx_trace = format_virtio_tx_trace,
+ .tx_function_n_errors = TAP_TX_N_ERROR,
+ .tx_function_error_strings = virtio_tx_func_error_strings,
+ .rx_redirect_to_node = virtio_set_interface_next_node,
+ .clear_counters = virtio_clear_hw_interface_counters,
+ .admin_up_down_function = virtio_interface_admin_up_down,
+ .subif_add_del_function = virtio_subif_add_del_function,
+ .rx_mode_change_function = virtio_interface_rx_mode_change,
+};
+
+VLIB_DEVICE_TX_FUNCTION_MULTIARCH(virtio_device_class,
+ virtio_interface_tx)
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/devices/virtio/node.c b/src/vnet/devices/virtio/node.c
new file mode 100644
index 00000000000..f746ada7326
--- /dev/null
+++ b/src/vnet/devices/virtio/node.c
@@ -0,0 +1,302 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <net/if.h>
+#include <linux/if_tun.h>
+#include <sys/ioctl.h>
+#include <linux/virtio_net.h>
+#include <linux/vhost.h>
+#include <sys/eventfd.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/devices/devices.h>
+#include <vnet/feature/feature.h>
+#include <vnet/devices/virtio/virtio.h>
+
+
+#define foreach_virtio_input_error \
+ _(UNKNOWN, "unknown")
+
+typedef enum
+{
+#define _(f,s) TAP_INPUT_ERROR_##f,
+ foreach_virtio_input_error
+#undef _
+ TAP_INPUT_N_ERROR,
+} virtio_input_error_t;
+
+static char *virtio_input_error_strings[] = {
+#define _(n,s) s,
+ foreach_virtio_input_error
+#undef _
+};
+
+typedef struct
+{
+ u32 next_index;
+ u32 hw_if_index;
+ u16 ring;
+ u16 len;
+ struct virtio_net_hdr_v1 hdr;
+} virtio_input_trace_t;
+
+static u8 *
+format_virtio_input_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ virtio_input_trace_t *t = va_arg (*args, virtio_input_trace_t *);
+ u32 indent = format_get_indent (s);
+
+ s = format (s, "virtio: hw_if_index %d next-index %d vring %u len %u",
+ t->hw_if_index, t->next_index, t->ring, t->len);
+ s = format (s, "\n%Uhdr: flags 0x%02x gso_type 0x%02x hdr_len %u "
+ "gso_size %u csum_start %u csum_offset %u num_buffers %u",
+ format_white_space, indent + 2,
+ t->hdr.flags, t->hdr.gso_type, t->hdr.hdr_len, t->hdr.gso_size,
+ t->hdr.csum_start, t->hdr.csum_offset, t->hdr.num_buffers);
+ return s;
+}
+
+static_always_inline void
+virtio_refill_vring (vlib_main_t * vm, virtio_vring_t * vring)
+{
+ const int hdr_sz = sizeof (struct virtio_net_hdr_v1);
+ u16 used, next, avail, n_slots, n_alloc;
+ u16 sz = vring->size;
+ u16 mask = sz - 1;
+ int i;
+
+ used = vring->desc_in_use;
+
+ if (sz - used < sz / 8)
+ return;
+
+ n_slots = sz - used;
+ next = vring->desc_next;
+ avail = vring->avail->idx;
+ n_alloc = vlib_buffer_alloc (vm, &vring->buffers[next], n_slots);
+
+ if (PREDICT_FALSE (n_alloc < n_slots))
+ n_slots = n_alloc;
+
+ i = next + n_slots - sz;
+ if (PREDICT_FALSE (i > 0))
+ clib_memcpy (vring->buffers, &vring->buffers[sz], i * sizeof (u32));
+
+ while (n_slots)
+ {
+ struct vring_desc *d = &vring->desc[next];;
+ vlib_buffer_t *b = vlib_get_buffer (vm, vring->buffers[next]);
+ d->addr = pointer_to_uword (vlib_buffer_get_current (b)) - hdr_sz;
+ d->len = VLIB_BUFFER_DATA_SIZE + hdr_sz;
+ d->flags = VRING_DESC_F_WRITE;
+ vring->avail->ring[avail & mask] = next;
+ avail++;
+ next = (next + 1) & mask;
+ n_slots--;
+ used++;
+ }
+ CLIB_MEMORY_STORE_BARRIER ();
+ vring->avail->idx = avail;
+ vring->desc_next = next;
+ vring->desc_in_use = used;
+
+ if ((vring->used->flags & VIRTIO_RING_FLAG_MASK_INT) == 0)
+ {
+ u64 b = 1;
+ CLIB_UNUSED (int r) = write (vring->kick_fd, &b, sizeof (b));
+ }
+}
+
+static_always_inline uword
+virtio_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame, virtio_if_t * vif, u16 qid)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 thread_index = vlib_get_thread_index ();
+ uword n_trace = vlib_get_trace_count (vm, node);
+ virtio_vring_t *vring = vec_elt_at_index (vif->vrings, 0);
+ u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+ const int hdr_sz = sizeof (struct virtio_net_hdr_v1);
+ u32 *to_next = 0;
+ u32 n_rx_packets = 0;
+ u32 n_rx_bytes = 0;
+ u16 mask = vring->size - 1;
+ u16 last = vring->last_used_idx;
+ u16 n_left = vring->used->idx - last;
+
+ if (n_left == 0)
+ goto refill;
+
+ while (n_left)
+ {
+ u32 n_left_to_next;
+ u32 next0 = next_index;
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left && n_left_to_next)
+ {
+ u16 num_buffers;
+ struct vring_used_elem *e = &vring->used->ring[last & mask];
+ struct virtio_net_hdr_v1 *hdr;
+ u16 slot = e->id;
+ u16 len = e->len - hdr_sz;
+ u32 bi0 = vring->buffers[slot];
+ vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
+ hdr = vlib_buffer_get_current (b0) - hdr_sz;
+ num_buffers = hdr->num_buffers;
+
+ b0->current_data = 0;
+ b0->current_length = len;
+ b0->total_length_not_including_first_buffer = 0;
+ b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = vif->sw_if_index;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+
+ /* if multisegment packet */
+ if (PREDICT_FALSE (num_buffers > 1))
+ {
+ vlib_buffer_t *pb, *cb;
+ pb = b0;
+ while (num_buffers > 1)
+ {
+ last++;
+ e = &vring->used->ring[last & mask];
+ u32 cbi = vring->buffers[e->id];
+ cb = vlib_get_buffer (vm, cbi);
+
+ /* current buffer */
+ cb->current_data = -hdr_sz;
+ cb->current_length = e->len;
+
+ /* previous buffer */
+ pb->next_buffer = cbi;
+ pb->flags |= VLIB_BUFFER_NEXT_PRESENT;
+
+ /* first buffer */
+ b0->total_length_not_including_first_buffer += e->len;
+
+ pb = cb;
+ vring->desc_in_use--;
+ num_buffers--;
+ n_left--;
+ }
+ }
+
+ if (PREDICT_FALSE (vif->per_interface_next_index != ~0))
+ next0 = vif->per_interface_next_index;
+ else
+ /* redirect if feature path enabled */
+ vnet_feature_start_device_input_x1 (vif->sw_if_index, &next0, b0);
+ /* trace */
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
+
+ if (PREDICT_FALSE (n_trace > 0))
+ {
+ virtio_input_trace_t *tr;
+ vlib_trace_buffer (vm, node, next0, b0,
+ /* follow_chain */ 0);
+ vlib_set_trace_count (vm, node, --n_trace);
+ tr = vlib_add_trace (vm, node, b0, sizeof (*tr));
+ tr->next_index = next0;
+ tr->hw_if_index = vif->hw_if_index;
+ tr->len = len;
+ clib_memcpy (&tr->hdr, hdr, hdr_sz);
+ }
+
+ /* enqueue buffer */
+ to_next[0] = bi0;
+ vring->desc_in_use--;
+ to_next += 1;
+ n_left_to_next--;
+ n_left--;
+ last++;
+
+ /* enqueue */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+
+ /* next packet */
+ n_rx_packets++;
+ n_rx_bytes += len;
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ vring->last_used_idx = last;
+
+ vlib_increment_combined_counter (vnm->interface_main.combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_RX, thread_index,
+ vif->hw_if_index, n_rx_packets,
+ n_rx_bytes);
+
+refill:
+ virtio_refill_vring (vm, vring);
+
+ return n_rx_packets;
+}
+
+static uword
+virtio_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 n_rx = 0;
+ virtio_main_t *nm = &virtio_main;
+ vnet_device_input_runtime_t *rt = (void *) node->runtime_data;
+ vnet_device_and_queue_t *dq;
+
+ foreach_device_and_queue (dq, rt->devices_and_queues)
+ {
+ virtio_if_t *mif;
+ mif = vec_elt_at_index (nm->interfaces, dq->dev_instance);
+ if (mif->flags & VIRTIO_IF_FLAG_ADMIN_UP)
+ {
+ n_rx += virtio_device_input_inline (vm, node, frame, mif,
+ dq->queue_id);
+ }
+ }
+
+ return n_rx;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (virtio_input_node) = {
+ .function = virtio_input_fn,
+ .name = "virtio-input",
+ .sibling_of = "device-input",
+ .format_trace = format_virtio_input_trace,
+ .type = VLIB_NODE_TYPE_INPUT,
+ .state = VLIB_NODE_STATE_INTERRUPT,
+ .n_errors = TAP_INPUT_N_ERROR,
+ .error_strings = virtio_input_error_strings,
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (virtio_input_node, virtio_input_fn)
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/devices/virtio/tap.c b/src/vnet/devices/virtio/tap.c
new file mode 100644
index 00000000000..658ba6bfc68
--- /dev/null
+++ b/src/vnet/devices/virtio/tap.c
@@ -0,0 +1,361 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <net/if.h>
+#include <linux/if_tun.h>
+#include <sys/ioctl.h>
+#include <linux/virtio_net.h>
+#include <linux/vhost.h>
+#include <sys/eventfd.h>
+
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/devices/virtio/virtio.h>
+#include <vnet/devices/virtio/tap.h>
+
+#define _IOCTL(fd,a,...) \
+ if (ioctl (fd, a, __VA_ARGS__) < 0) \
+ { \
+ err = clib_error_return_unix (0, "ioctl(" #a ")"); \
+ goto error; \
+ }
+
+static u32
+virtio_eth_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi,
+ u32 flags)
+{
+ /* nothing for now */
+ return 0;
+}
+
+
+clib_error_t *
+clib_netlink_set_if_attr (int ifindex, unsigned short rta_type, void *data,
+ int data_len)
+{
+ clib_error_t *err = 0;
+ int sock;
+ struct sockaddr_nl ra = { 0 };
+ struct
+ {
+ struct nlmsghdr nh;
+ struct ifinfomsg ifmsg;
+ char attrbuf[512];
+ } req;
+ struct rtattr *rta;
+
+ memset (&req, 0, sizeof (req));
+ if ((sock = socket (AF_NETLINK, SOCK_RAW, NETLINK_ROUTE)) == -1)
+ {
+ err = clib_error_return_unix (0, "socket(AF_NETLINK)");
+ goto error;
+ }
+
+ ra.nl_family = AF_NETLINK;
+ ra.nl_pid = getpid ();
+
+ if ((bind (sock, (struct sockaddr *) &ra, sizeof (ra))) == -1)
+ return clib_error_return_unix (0, "bind");
+
+ req.nh.nlmsg_len = NLMSG_LENGTH (sizeof (struct ifinfomsg));
+ req.nh.nlmsg_flags = NLM_F_REQUEST;
+ req.nh.nlmsg_type = RTM_SETLINK;
+ req.ifmsg.ifi_family = AF_UNSPEC;
+ req.ifmsg.ifi_index = ifindex;
+ req.ifmsg.ifi_change = 0xffffffff;
+ rta = (struct rtattr *) (((char *) &req) + NLMSG_ALIGN (req.nh.nlmsg_len));
+ rta->rta_type = rta_type;
+ rta->rta_len = RTA_LENGTH (data_len);
+ req.nh.nlmsg_len = NLMSG_ALIGN (req.nh.nlmsg_len) + RTA_LENGTH (data_len);
+ memcpy (RTA_DATA (rta), data, data_len);
+
+ if ((send (sock, &req, req.nh.nlmsg_len, 0)) == -1)
+ err = clib_error_return_unix (0, "send");
+
+error:
+ return err;
+}
+
+clib_error_t *
+clib_netlink_set_if_mtu (int ifindex, int mtu)
+{
+ clib_error_t *err;
+
+ err = clib_netlink_set_if_attr (ifindex, IFLA_MTU, &mtu, sizeof (int));
+ return err;
+}
+
+clib_error_t *
+clib_netlink_set_if_namespace (int ifindex, char *net_ns)
+{
+ clib_error_t *err;
+ int ns_fd;
+ u8 *s;
+ s = format (0, "/var/run/netns/%s%c", net_ns, 0);
+ ns_fd = open ((char *) s, O_RDONLY);
+ vec_free (s);
+ if (ns_fd == -1)
+ return clib_error_return (0, "namespace '%s' doesn't exist", net_ns);
+
+ err =
+ clib_netlink_set_if_attr (ifindex, IFLA_NET_NS_FD, &ns_fd, sizeof (int));
+ close (ns_fd);
+ return err;
+}
+
+int
+tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ virtio_main_t *vim = &virtio_main;
+ vnet_sw_interface_t *sw;
+ vnet_hw_interface_t *hw;
+ int i;
+ clib_error_t *err = 0;
+ struct ifreq ifr;
+ size_t hdrsz;
+ struct vhost_memory *vhost_mem = 0;
+ virtio_if_t *vif = 0;
+ int rv = 0;
+
+ memset (&ifr, 0, sizeof (ifr));
+ pool_get (vim->interfaces, vif);
+ vif->dev_instance = vif - vim->interfaces;
+ vif->tap_fd = -1;
+
+ if ((vif->fd = open ("/dev/vhost-net", O_RDWR | O_NONBLOCK)) < 0)
+ {
+ rv = VNET_API_ERROR_SYSCALL_ERROR_1;
+ goto error;
+ }
+
+ _IOCTL (vif->fd, VHOST_GET_FEATURES, &vif->remote_features);
+
+ if ((vif->remote_features & (1ULL << VIRTIO_NET_F_MRG_RXBUF)) == 0)
+ {
+ rv = VNET_API_ERROR_UNSUPPORTED;
+ goto error;
+ }
+
+ if ((vif->remote_features & (1ULL << VIRTIO_RING_F_INDIRECT_DESC)) == 0)
+ {
+ rv = VNET_API_ERROR_UNSUPPORTED;
+ goto error;
+ }
+
+ if ((vif->remote_features & (1ULL << VIRTIO_F_VERSION_1)) == 0)
+ {
+ rv = VNET_API_ERROR_UNSUPPORTED;
+ goto error;
+ }
+
+ vif->features |= 1ULL << VIRTIO_NET_F_MRG_RXBUF;
+ vif->features |= 1ULL << VIRTIO_F_VERSION_1;
+ vif->features |= 1ULL << VIRTIO_RING_F_INDIRECT_DESC;
+
+ _IOCTL (vif->fd, VHOST_SET_FEATURES, &vif->features);
+
+ if ((vif->tap_fd = open ("/dev/net/tun", O_RDWR | O_NONBLOCK)) < 0)
+ {
+ rv = VNET_API_ERROR_SYSCALL_ERROR_2;
+ goto error;
+ }
+
+ ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE | IFF_VNET_HDR;
+ strncpy (ifr.ifr_ifrn.ifrn_name, (char *) args->name, IF_NAMESIZE);
+ _IOCTL (vif->tap_fd, TUNSETIFF, (void *) &ifr);
+
+ vif->ifindex = if_nametoindex ((char *) args->name);
+
+ unsigned int offload = 0;
+ hdrsz = sizeof (struct virtio_net_hdr_v1);
+ _IOCTL (vif->tap_fd, TUNSETOFFLOAD, offload);
+ _IOCTL (vif->tap_fd, TUNSETVNETHDRSZ, &hdrsz);
+ _IOCTL (vif->fd, VHOST_SET_OWNER, 0);
+
+ if (args->net_ns)
+ {
+ err = clib_netlink_set_if_namespace (vif->ifindex,
+ (char *) args->net_ns);
+ if (err)
+ {
+ rv = VNET_API_ERROR_NAMESPACE_CREATE;
+ goto error;
+ }
+ }
+
+ /* Set vhost memory table */
+ i = sizeof (struct vhost_memory) + sizeof (struct vhost_memory_region);
+ vhost_mem = clib_mem_alloc (i);
+ memset (vhost_mem, 0, i);
+ vhost_mem->nregions = 1;
+ vhost_mem->regions[0].memory_size = (1ULL << 47) - 4096;
+ _IOCTL (vif->fd, VHOST_SET_MEM_TABLE, vhost_mem);
+
+ if ((err = virtio_vring_init (vm, vif, 0, args->rx_ring_sz)))
+ {
+ rv = VNET_API_ERROR_VIRTIO_INIT;
+ goto error;
+ }
+
+ if ((err = virtio_vring_init (vm, vif, 1, args->tx_ring_sz)))
+ {
+ rv = VNET_API_ERROR_VIRTIO_INIT;
+ goto error;
+ }
+
+ if (!args->hw_addr_set)
+ {
+ f64 now = vlib_time_now (vm);
+ u32 rnd;
+ rnd = (u32) (now * 1e6);
+ rnd = random_u32 (&rnd);
+
+ memcpy (args->hw_addr + 2, &rnd, sizeof (rnd));
+ args->hw_addr[0] = 2;
+ args->hw_addr[1] = 0xfe;
+ }
+ vif->name = args->name;
+ args->name = 0;
+ vif->net_ns = args->net_ns;
+ args->net_ns = 0;
+ err = ethernet_register_interface (vnm, virtio_device_class.index,
+ vif->dev_instance, args->hw_addr,
+ &vif->hw_if_index,
+ virtio_eth_flag_change);
+ if (err)
+ rv = VNET_API_ERROR_INVALID_REGISTRATION;
+
+ sw = vnet_get_hw_sw_interface (vnm, vif->hw_if_index);
+ vif->sw_if_index = sw->sw_if_index;
+ args->sw_if_index = vif->sw_if_index;
+ hw = vnet_get_hw_interface (vnm, vif->hw_if_index);
+ hw->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_INT_MODE;
+ vnet_hw_interface_set_input_node (vnm, vif->hw_if_index,
+ virtio_input_node.index);
+ vnet_hw_interface_assign_rx_thread (vnm, vif->hw_if_index, 0, ~0);
+ vnet_hw_interface_set_rx_mode (vnm, vif->hw_if_index, 0,
+ VNET_HW_INTERFACE_RX_MODE_DEFAULT);
+ vif->per_interface_next_index = ~0;
+ vif->type = VIRTIO_IF_TYPE_TAP;
+ vif->flags |= VIRTIO_IF_FLAG_ADMIN_UP;
+ vnet_hw_interface_set_flags (vnm, vif->hw_if_index,
+ VNET_HW_INTERFACE_FLAG_LINK_UP);
+ goto done;
+
+error:
+ if (vif->tap_fd != -1)
+ close (vif->tap_fd);
+ if (vif->fd != -1)
+ close (vif->fd);
+ vec_foreach_index (i, vif->vrings) virtio_vring_free (vif, i);
+ memset (vif, 0, sizeof (virtio_if_t));
+ pool_put (vim->interfaces, vif);
+
+done:
+ if (vhost_mem)
+ clib_mem_free (vhost_mem);
+
+ return rv;
+}
+
+int
+tap_delete_if (vlib_main_t * vm, u32 sw_if_index)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ virtio_main_t *mm = &virtio_main;
+ int i;
+ virtio_if_t *vif;
+ vnet_hw_interface_t *hw;
+
+ hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
+ if (hw == NULL || virtio_device_class.index != hw->dev_class_index)
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+ vif = pool_elt_at_index (mm->interfaces, hw->dev_instance);
+
+ /* bring down the interface */
+ vnet_hw_interface_set_flags (vnm, vif->hw_if_index, 0);
+ vnet_sw_interface_set_flags (vnm, vif->sw_if_index, 0);
+
+ ethernet_delete_interface (vnm, vif->hw_if_index);
+ vif->hw_if_index = ~0;
+
+ if (vif->tap_fd != -1)
+ close (vif->tap_fd);
+ if (vif->fd != -1)
+ close (vif->fd);
+
+ vec_foreach_index (i, vif->vrings) virtio_vring_free (vif, i);
+ vec_free (vif->vrings);
+
+ memset (vif, 0, sizeof (*vif));
+ pool_put (mm->interfaces, vif);
+
+ return 0;
+}
+
+int
+tap_dump_ifs (tap_interface_details_t ** out_tapids)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ virtio_main_t *mm = &virtio_main;
+ virtio_if_t *vif;
+ vnet_hw_interface_t *hi;
+ tap_interface_details_t *r_tapids = NULL;
+ tap_interface_details_t *tapid = NULL;
+
+ /* *INDENT-OFF* */
+ pool_foreach (vif, mm->interfaces,
+ vec_add2(r_tapids, tapid, 1);
+ memset (tapid, 0, sizeof (*tapid));
+ tapid->sw_if_index = vif->sw_if_index;
+ hi = vnet_get_hw_interface (vnm, vif->hw_if_index);
+ clib_memcpy(tapid->dev_name, hi->name,
+ MIN (ARRAY_LEN (tapid->dev_name) - 1,
+ strlen ((const char *) hi->name)));
+ );
+ /* *INDENT-ON* */
+
+ *out_tapids = r_tapids;
+
+ return 0;
+}
+
+static clib_error_t *
+tap_init (vlib_main_t * vm)
+{
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (tap_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/devices/virtio/tap.h b/src/vnet/devices/virtio/tap.h
new file mode 100644
index 00000000000..58dcb5bda51
--- /dev/null
+++ b/src/vnet/devices/virtio/tap.h
@@ -0,0 +1,56 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef _VNET_DEVICES_VIRTIO_TAP_H_
+#define _VNET_DEVICES_VIRTIO_TAP_H_
+
+#ifndef MIN
+#define MIN(x,y) (((x)<(y))?(x):(y))
+#endif
+
+typedef struct
+{
+ u8 *name;
+ u8 *net_ns;
+ u8 hw_addr_set;
+ u8 hw_addr[6];
+ u16 rx_ring_sz;
+ u16 tx_ring_sz;
+ /* return */
+ u32 sw_if_index;
+} tap_create_if_args_t;
+
+/** TAP interface details struct */
+typedef struct
+{
+ u32 sw_if_index;
+ u8 dev_name[64];
+} tap_interface_details_t;
+
+int tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args);
+int tap_delete_if (vlib_main_t * vm, u32 sw_if_index);
+int tap_dump_ifs (tap_interface_details_t ** out_tapids);
+
+#endif /* _VNET_DEVICES_VIRTIO_TAP_H_ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/devices/virtio/tapv2.api b/src/vnet/devices/virtio/tapv2.api
new file mode 100644
index 00000000000..e1592cf7d61
--- /dev/null
+++ b/src/vnet/devices/virtio/tapv2.api
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** \file
+
+ This file defines vpe control-plane API messages for
+ the Linux kernel TAP device driver
+*/
+
+vl_api_version 1.0.0
+
+/** \brief Initialize a new tap interface with the given paramters
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param use_random_mac - let the system generate a unique mac address
+ @param tap_name - name to associate with the new interface
+ @param mac_address - mac addr to assign to the interface if use_radom not set
+ @param net_ns_set - net_ns is entered
+ @param net_ns - netlink name space
+ @param tx_ring_sz - the number of entries of TX ring
+ @param rx_ring_sz - the number of entries of RX ring
+*/
+define tap_create_v2
+{
+ u32 client_index;
+ u32 context;
+ u8 use_random_mac;
+ u8 tap_name[64];
+ u8 mac_address[6];
+ u8 net_ns_set;
+ u8 net_ns[64];
+ u16 tx_ring_sz; /* optional, default is 256 entries, must be power of 2 */
+ u16 rx_ring_sz; /* optional, default is 256 entries, must be power of 2 */
+};
+
+/** \brief Reply for tap create reply
+ @param context - returned sender context, to match reply w/ request
+ @param retval - return code
+ @param sw_if_index - software index allocated for the new tap interface
+*/
+define tap_create_v2_reply
+{
+ u32 context;
+ i32 retval;
+ u32 sw_if_index;
+};
+
+/** \brief Delete tap interface
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - interface index of existing tap interface
+*/
+autoreply define tap_delete_v2
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+};
+
+/** \brief Dump tap interfaces request */
+define sw_interface_tap_v2_dump
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Reply for tap dump request
+ @param sw_if_index - software index of tap interface
+ @param dev_name - Linux tap device name
+*/
+define sw_interface_tap_v2_details
+{
+ u32 context;
+ u32 sw_if_index;
+ u8 dev_name[64];
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/devices/virtio/tapv2_api.c b/src/vnet/devices/virtio/tapv2_api.c
new file mode 100644
index 00000000000..1c559e6c28e
--- /dev/null
+++ b/src/vnet/devices/virtio/tapv2_api.c
@@ -0,0 +1,222 @@
+/*
+ *------------------------------------------------------------------
+ * tap_api.c - vnet tap device driver API support
+ *
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ip/ip.h>
+
+#include <vnet/vnet_msg_enum.h>
+
+#define vl_typedefs /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vnet/vnet_all_api_h.h>
+#undef vl_printfun
+
+#include <vlibapi/api_helper_macros.h>
+#include <vnet/devices/virtio/tap.h>
+
+#define foreach_tapv2_api_msg \
+_(TAP_CREATE_V2, tap_create_v2) \
+_(TAP_DELETE_V2, tap_delete_v2) \
+_(SW_INTERFACE_TAP_V2_DUMP, sw_interface_tap_v2_dump)
+
+static void
+vl_api_tap_create_v2_t_handler (vl_api_tap_create_v2_t * mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ int rv;
+ vl_api_tap_create_v2_reply_t *rmp;
+ unix_shared_memory_queue_t *q;
+ tap_create_if_args_t _a, *ap = &_a;
+
+ memset (ap, 0, sizeof (*ap));
+
+ ap->name = mp->tap_name;
+ if (!mp->use_random_mac)
+ {
+ clib_memcpy (ap->hw_addr, mp->mac_address, 6);
+ ap->hw_addr_set = 1;
+ }
+ ap->rx_ring_sz = mp->rx_ring_sz;
+ ap->tx_ring_sz = mp->tx_ring_sz;
+ ap->sw_if_index = (u32) ~ 0;
+ if (mp->net_ns_set)
+ ap->net_ns = mp->net_ns;
+
+ rv = tap_create_if (vm, ap);
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (!q)
+ return;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_TAP_CREATE_V2_REPLY);
+ rmp->context = mp->context;
+ rmp->retval = ntohl (rv);
+ rmp->sw_if_index = ntohl (ap->sw_if_index);
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+tap_send_sw_interface_event_deleted (vpe_api_main_t * am,
+ unix_shared_memory_queue_t * q,
+ u32 sw_if_index)
+{
+ vl_api_sw_interface_event_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_EVENT);
+ mp->sw_if_index = ntohl (sw_if_index);
+
+ mp->admin_up_down = 0;
+ mp->link_up_down = 0;
+ mp->deleted = 1;
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+static void
+vl_api_tap_delete_v2_t_handler (vl_api_tap_delete_v2_t * mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ int rv;
+ vpe_api_main_t *vam = &vpe_api_main;
+ vl_api_tap_delete_v2_reply_t *rmp;
+ unix_shared_memory_queue_t *q;
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+
+ rv = tap_delete_if (vm, sw_if_index);
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (!q)
+ return;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_TAP_DELETE_V2_REPLY);
+ rmp->context = mp->context;
+ rmp->retval = ntohl (rv);
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+
+ if (!rv)
+ tap_send_sw_interface_event_deleted (vam, q, sw_if_index);
+}
+
+static void
+tap_send_sw_interface_details (vpe_api_main_t * am,
+ unix_shared_memory_queue_t * q,
+ tap_interface_details_t * tap_if, u32 context)
+{
+ vl_api_sw_interface_tap_v2_details_t *mp;
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_TAP_V2_DETAILS);
+ mp->sw_if_index = ntohl (tap_if->sw_if_index);
+ clib_memcpy (mp->dev_name, tap_if->dev_name,
+ MIN (ARRAY_LEN (mp->dev_name) - 1,
+ strlen ((const char *) tap_if->dev_name)));
+ mp->context = context;
+
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+static void
+vl_api_sw_interface_tap_v2_dump_t_handler (vl_api_sw_interface_tap_v2_dump_t *
+ mp)
+{
+ int rv;
+ vpe_api_main_t *am = &vpe_api_main;
+ unix_shared_memory_queue_t *q;
+ tap_interface_details_t *tapifs = NULL;
+ tap_interface_details_t *tap_if = NULL;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ rv = tap_dump_ifs (&tapifs);
+ if (rv)
+ return;
+
+ vec_foreach (tap_if, tapifs)
+ {
+ tap_send_sw_interface_details (am, q, tap_if, mp->context);
+ }
+
+ vec_free (tapifs);
+}
+
+#define vl_msg_name_crc_list
+#include <vnet/vnet_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+tap_setup_message_id_table (api_main_t * am)
+{
+#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id);
+ foreach_vl_msg_name_crc_tapv2;
+#undef _
+}
+
+static clib_error_t *
+tapv2_api_hookup (vlib_main_t * vm)
+{
+ api_main_t *am = &api_main;
+
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_tapv2_api_msg;
+#undef _
+
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ tap_setup_message_id_table (am);
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (tapv2_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/devices/virtio/virtio.c b/src/vnet/devices/virtio/virtio.c
new file mode 100644
index 00000000000..63ca6011a9e
--- /dev/null
+++ b/src/vnet/devices/virtio/virtio.c
@@ -0,0 +1,159 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <net/if.h>
+#include <linux/if_tun.h>
+#include <sys/ioctl.h>
+#include <linux/virtio_net.h>
+#include <linux/vhost.h>
+#include <sys/eventfd.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/devices/virtio/virtio.h>
+
+virtio_main_t virtio_main;
+
+#define _IOCTL(fd,a,...) \
+ if (ioctl (fd, a, __VA_ARGS__) < 0) \
+ { \
+ err = clib_error_return_unix (0, "ioctl(" #a ")"); \
+ goto error; \
+ }
+
+static clib_error_t *
+call_read_ready (clib_file_t * uf)
+{
+ virtio_main_t *nm = &virtio_main;
+ vnet_main_t *vnm = vnet_get_main ();
+ u16 qid = uf->private_data & 0xFFFF;
+ virtio_if_t *vif =
+ vec_elt_at_index (nm->interfaces, uf->private_data >> 16);
+ u64 b;
+
+ CLIB_UNUSED (ssize_t size) = read (uf->file_descriptor, &b, sizeof (b));
+ if ((qid & 1) == 0)
+ vnet_device_input_set_interrupt_pending (vnm, vif->hw_if_index, qid);
+
+ return 0;
+}
+
+
+clib_error_t *
+virtio_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 idx, u16 sz)
+{
+ clib_error_t *err = 0;
+ virtio_vring_t *vring;
+ struct vhost_vring_state state;
+ struct vhost_vring_addr addr;
+ struct vhost_vring_file file;
+ clib_file_t t = { 0 };
+ int i;
+
+ if (!is_pow2 (sz))
+ return clib_error_return (0, "ring size must be power of 2");
+
+ if (sz > 32768)
+ return clib_error_return (0, "ring size must be 32768 or lower");
+
+ if (sz == 0)
+ sz = 256;
+
+ vec_validate_aligned (vif->vrings, idx, CLIB_CACHE_LINE_BYTES);
+ vring = vec_elt_at_index (vif->vrings, idx);
+
+ i = sizeof (struct vring_desc) * sz;
+ i = round_pow2 (i, CLIB_CACHE_LINE_BYTES);
+ vring->desc = clib_mem_alloc_aligned (i, CLIB_CACHE_LINE_BYTES);
+ memset (vring->desc, 0, i);
+
+ i = sizeof (struct vring_avail) + sz * sizeof (vring->avail->ring[0]);
+ i = round_pow2 (i, CLIB_CACHE_LINE_BYTES);
+ vring->avail = clib_mem_alloc_aligned (i, CLIB_CACHE_LINE_BYTES);
+ memset (vring->avail, 0, i);
+ // tell kernel that we don't need interrupt
+ vring->avail->flags = VIRTIO_RING_FLAG_MASK_INT;
+
+ i = sizeof (struct vring_used) + sz * sizeof (struct vring_used_elem);
+ i = round_pow2 (i, CLIB_CACHE_LINE_BYTES);
+ vring->used = clib_mem_alloc_aligned (i, CLIB_CACHE_LINE_BYTES);
+ memset (vring->used, 0, i);
+
+ ASSERT (vring->buffers == 0);
+ vec_validate_aligned (vring->buffers, sz * 2, CLIB_CACHE_LINE_BYTES);
+
+ vring->size = sz;
+ vring->call_fd = eventfd (0, EFD_NONBLOCK | EFD_CLOEXEC);
+ vring->kick_fd = eventfd (0, EFD_CLOEXEC);
+
+ t.read_function = call_read_ready;
+ t.file_descriptor = vring->call_fd;
+ t.private_data = vif->dev_instance << 16 | idx;
+ vring->call_file_index = clib_file_add (&file_main, &t);
+
+ state.index = idx;
+ state.num = sz;
+ _IOCTL (vif->fd, VHOST_SET_VRING_NUM, &state);
+
+ addr.index = idx;
+ addr.flags = 0;
+ addr.desc_user_addr = pointer_to_uword (vring->desc);
+ addr.avail_user_addr = pointer_to_uword (vring->avail);
+ addr.used_user_addr = pointer_to_uword (vring->used);
+ _IOCTL (vif->fd, VHOST_SET_VRING_ADDR, &addr);
+
+ file.index = idx;
+ file.fd = vring->kick_fd;
+ _IOCTL (vif->fd, VHOST_SET_VRING_KICK, &file);
+ file.fd = vring->call_fd;
+ _IOCTL (vif->fd, VHOST_SET_VRING_CALL, &file);
+ file.fd = vif->tap_fd;
+ _IOCTL (vif->fd, VHOST_NET_SET_BACKEND, &file);
+
+error:
+ return err;
+}
+
+clib_error_t *
+virtio_vring_free (virtio_if_t * vif, u32 idx)
+{
+ //TODO free buffers and indirect descriptor allocs
+ virtio_vring_t *vring = vec_elt_at_index (vif->vrings, idx);
+ if (vring->desc)
+ clib_mem_free (vring->desc);
+ if (vring->avail)
+ clib_mem_free (vring->avail);
+ if (vring->used)
+ clib_mem_free (vring->used);
+ clib_file_del_by_index (&file_main, vring->call_file_index);
+ close (vring->kick_fd);
+ close (vring->call_fd);
+ vec_free (vring->buffers);
+ return 0;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/devices/virtio/virtio.h b/src/vnet/devices/virtio/virtio.h
new file mode 100644
index 00000000000..7dcd90ac353
--- /dev/null
+++ b/src/vnet/devices/virtio/virtio.h
@@ -0,0 +1,131 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef _VNET_DEVICES_VIRTIO_VIRTIO_H_
+#define _VNET_DEVICES_VIRTIO_VIRTIO_H_
+
+#define foreach_virtio_net_features \
+ _ (VIRTIO_NET_F_CSUM, 0) /* Host handles pkts w/ partial csum */ \
+ _ (VIRTIO_NET_F_GUEST_CSUM, 1) /* Guest handles pkts w/ partial csum */ \
+ _ (VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, 2) /* Dynamic offload configuration. */ \
+ _ (VIRTIO_NET_F_MTU, 3) /* Initial MTU advice. */ \
+ _ (VIRTIO_NET_F_MAC, 5) /* Host has given MAC address. */ \
+ _ (VIRTIO_NET_F_GSO, 6) /* Host handles pkts w/ any GSO. */ \
+ _ (VIRTIO_NET_F_GUEST_TSO4, 7) /* Guest can handle TSOv4 in. */ \
+ _ (VIRTIO_NET_F_GUEST_TSO6, 8) /* Guest can handle TSOv6 in. */ \
+ _ (VIRTIO_NET_F_GUEST_ECN, 9) /* Guest can handle TSO[6] w/ ECN in. */ \
+ _ (VIRTIO_NET_F_GUEST_UFO, 10) /* Guest can handle UFO in. */ \
+ _ (VIRTIO_NET_F_HOST_TSO4, 11) /* Host can handle TSOv4 in. */ \
+ _ (VIRTIO_NET_F_HOST_TSO6, 12) /* Host can handle TSOv6 in. */ \
+ _ (VIRTIO_NET_F_HOST_ECN, 13) /* Host can handle TSO[6] w/ ECN in. */ \
+ _ (VIRTIO_NET_F_HOST_UFO, 14) /* Host can handle UFO in. */ \
+ _ (VIRTIO_NET_F_MRG_RXBUF, 15) /* Host can merge receive buffers. */ \
+ _ (VIRTIO_NET_F_STATUS, 16) /* virtio_net_config.status available */ \
+ _ (VIRTIO_NET_F_CTRL_VQ, 17) /* Control channel available */ \
+ _ (VIRTIO_NET_F_CTRL_RX, 18) /* Control channel RX mode support */ \
+ _ (VIRTIO_NET_F_CTRL_VLAN, 19) /* Control channel VLAN filtering */ \
+ _ (VIRTIO_NET_F_CTRL_RX_EXTRA, 20) /* Extra RX mode control support */ \
+ _ (VIRTIO_NET_F_GUEST_ANNOUNCE, 21) /* Guest can announce device on the network */ \
+ _ (VIRTIO_NET_F_MQ, 22) /* Device supports Receive Flow Steering */ \
+ _ (VIRTIO_NET_F_CTRL_MAC_ADDR, 23) /* Set MAC address */ \
+ _ (VIRTIO_F_NOTIFY_ON_EMPTY, 24) \
+ _ (VHOST_F_LOG_ALL, 26) /* Log all write descriptors */ \
+ _ (VIRTIO_F_ANY_LAYOUT, 27) /* Can the device handle any descripor layout */ \
+ _ (VIRTIO_RING_F_INDIRECT_DESC, 28) /* Support indirect buffer descriptors */ \
+ _ (VIRTIO_RING_F_EVENT_IDX, 29) /* The Guest publishes the used index for which it expects an interrupt \
+ * at the end of the avail ring. Host should ignore the avail->flags field. */ \
+/* The Host publishes the avail index for which it expects a kick \
+ * at the end of the used ring. Guest should ignore the used->flags field. */ \
+ _ (VHOST_USER_F_PROTOCOL_FEATURES, 30) \
+ _ (VIRTIO_F_VERSION_1, 32)
+
+#define foreach_virtio_if_flag \
+ _(0, ADMIN_UP, "admin-up") \
+ _(1, DELETING, "deleting")
+
+typedef enum
+{
+#define _(a, b, c) VIRTIO_IF_FLAG_##b = (1 << a),
+ foreach_virtio_if_flag
+#undef _
+} virtio_if_flag_t;
+
+typedef enum
+{
+ VIRTIO_IF_TYPE_TAP,
+ VIRTIO_IF_N_TYPES,
+} virtio_if_type_t;
+
+
+typedef struct
+{
+ struct vring_desc *desc;
+ struct vring_used *used;
+ struct vring_avail *avail;
+ u16 desc_in_use;
+ u16 desc_next;
+ int kick_fd;
+ int call_fd;
+ u16 size;
+#define VIRTIO_RING_FLAG_MASK_INT 1
+ u32 flags;
+ u32 call_file_index;
+ u32 *buffers;
+ u16 last_used_idx;
+} virtio_vring_t;
+
+typedef struct
+{
+ u32 flags;
+ u32 dev_instance;
+ u32 hw_if_index;
+ u32 sw_if_index;
+ u32 per_interface_next_index;
+ int fd;
+ int tap_fd;
+ virtio_vring_t *vrings;
+
+ u64 features, remote_features;
+
+ virtio_if_type_t type;
+ u8 *name;
+ u8 *net_ns;
+ int ifindex;
+} virtio_if_t;
+
+typedef struct
+{
+ virtio_if_t *interfaces;
+} virtio_main_t;
+
+extern virtio_main_t virtio_main;
+extern vnet_device_class_t virtio_device_class;
+extern vlib_node_registration_t virtio_input_node;
+
+clib_error_t *virtio_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 idx,
+ u16 sz);
+clib_error_t *virtio_vring_free (virtio_if_t * vif, u32 idx);
+
+#endif /* _VNET_DEVICES_VIRTIO_VIRTIO_H_ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/vnet_all_api_h.h b/src/vnet/vnet_all_api_h.h
index 6c7c63683f8..2e7419b77ce 100644
--- a/src/vnet/vnet_all_api_h.h
+++ b/src/vnet/vnet_all_api_h.h
@@ -32,6 +32,7 @@
#include <vnet/devices/af_packet/af_packet.api.h>
#include <vnet/devices/netmap/netmap.api.h>
#include <vnet/devices/virtio/vhost_user.api.h>
+#include <vnet/devices/virtio/tapv2.api.h>
#include <vnet/gre/gre.api.h>
#include <vnet/interface.api.h>
#include <vnet/map/map.api.h>
diff --git a/src/vpp/api/custom_dump.c b/src/vpp/api/custom_dump.c
index f27acda24dd..428e1636569 100644
--- a/src/vpp/api/custom_dump.c
+++ b/src/vpp/api/custom_dump.c
@@ -547,6 +547,47 @@ static void *vl_api_sw_interface_tap_dump_t_print
FINISH;
}
+static void *vl_api_tap_create_v2_t_print
+ (vl_api_tap_create_v2_t * mp, void *handle)
+{
+ u8 *s;
+ u8 null_mac[6];
+
+ memset (null_mac, 0, sizeof (null_mac));
+
+ s = format (0, "SCRIPT: tap_create_v2 ");
+ s = format (s, "name %s ", mp->tap_name);
+ if (memcmp (mp->mac_address, null_mac, 6))
+ s = format (s, "hw-addr %U ", format_ethernet_address, mp->mac_address);
+ if (mp->net_ns_set)
+ s = format (s, "host-ns %s ", mp->net_ns);
+ if (mp->tx_ring_sz)
+ s = format (s, "tx-ring-size %d ", mp->tx_ring_sz);
+ if (mp->rx_ring_sz)
+ s = format (s, "rx-ring-size %d ", mp->rx_ring_sz);
+ FINISH;
+}
+
+static void *vl_api_tap_delete_v2_t_print
+ (vl_api_tap_delete_v2_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: tap_delete_v2 ");
+ s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index));
+
+ FINISH;
+}
+
+static void *vl_api_sw_interface_tap_v2_dump_t_print
+ (vl_api_sw_interface_tap_v2_dump_t * mp, void *handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: sw_interface_tap_v2_dump ");
+
+ FINISH;
+}
static void *vl_api_ip_add_del_route_t_print
(vl_api_ip_add_del_route_t * mp, void *handle)
@@ -3269,6 +3310,9 @@ _(TAP_CONNECT, tap_connect) \
_(TAP_MODIFY, tap_modify) \
_(TAP_DELETE, tap_delete) \
_(SW_INTERFACE_TAP_DUMP, sw_interface_tap_dump) \
+_(TAP_CREATE_V2, tap_create_v2) \
+_(TAP_DELETE_V2, tap_delete_v2) \
+_(SW_INTERFACE_TAP_V2_DUMP, sw_interface_tap_v2_dump) \
_(IP_ADD_DEL_ROUTE, ip_add_del_route) \
_(PROXY_ARP_ADD_DEL, proxy_arp_add_del) \
_(PROXY_ARP_INTFC_ENABLE_DISABLE, proxy_arp_intfc_enable_disable) \