diff options
author | Damjan Marion <damarion@cisco.com> | 2017-10-13 18:29:53 +0200 |
---|---|---|
committer | Damjan Marion <dmarion.lists@gmail.com> | 2017-11-30 10:06:38 +0000 |
commit | 8389fb9112bcf96def69539fa1de13a7a08923f5 (patch) | |
tree | 74be33b7a0dac20e91ce6fc2fd0a30a1ffaa1752 /src/vnet/devices | |
parent | 8de88c03056a3cca1f5eb9dacc53bbb391a407fe (diff) |
virtio: fast TAP interfaces with vhost-net backend
Change-Id: Ided667356d5c6fb9648eb34685aabd6b16a598b7
Signed-off-by: Damjan Marion <damarion@cisco.com>
Signed-off-by: Steven Luong <sluong@cisco.com>
Diffstat (limited to 'src/vnet/devices')
-rw-r--r-- | src/vnet/devices/virtio/cli.c | 300 | ||||
-rw-r--r-- | src/vnet/devices/virtio/device.c | 328 | ||||
-rw-r--r-- | src/vnet/devices/virtio/node.c | 302 | ||||
-rw-r--r-- | src/vnet/devices/virtio/tap.c | 361 | ||||
-rw-r--r-- | src/vnet/devices/virtio/tap.h | 56 | ||||
-rw-r--r-- | src/vnet/devices/virtio/tapv2.api | 94 | ||||
-rw-r--r-- | src/vnet/devices/virtio/tapv2_api.c | 222 | ||||
-rw-r--r-- | src/vnet/devices/virtio/virtio.c | 159 | ||||
-rw-r--r-- | src/vnet/devices/virtio/virtio.h | 131 |
9 files changed, 1953 insertions, 0 deletions
diff --git a/src/vnet/devices/virtio/cli.c b/src/vnet/devices/virtio/cli.c new file mode 100644 index 00000000000..0c1b75f8c4e --- /dev/null +++ b/src/vnet/devices/virtio/cli.c @@ -0,0 +1,300 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ +#include <stdint.h> +#include <net/if.h> +#include <sys/ioctl.h> +#include <inttypes.h> + +#include <vlib/vlib.h> +#include <vlib/unix/unix.h> +#include <vnet/ethernet/ethernet.h> +#include <linux/virtio_net.h> +#include <linux/vhost.h> +#include <vnet/devices/virtio/virtio.h> +#include <vnet/devices/virtio/tap.h> + +static clib_error_t * +tap_create_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + int rv; + tap_create_if_args_t args = { 0 }; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return clib_error_return (0, "Missing name <interface>"); + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "name %s", &args.name)) + ; + else if (unformat (line_input, "host-ns %s", &args.net_ns)) + ; + else if (unformat (line_input, "rx-ring-size %d", &args.rx_ring_sz)) + ; + else if (unformat (line_input, "tx-ring-size %d", &args.tx_ring_sz)) + ; + else if (unformat (line_input, "hw-addr %U", + unformat_ethernet_address, args.hw_addr)) + args.hw_addr_set = 1; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + unformat_free (line_input); + + rv = tap_create_if (vm, &args); + + vec_free (args.name); + + if (rv == VNET_API_ERROR_SYSCALL_ERROR_1) + return clib_error_return_unix (0, "open '/dev/vhost-net'"); + else if (rv == VNET_API_ERROR_SYSCALL_ERROR_2) + return clib_error_return_unix (0, "open '/dev/net/tun'"); + else if (rv == VNET_API_ERROR_UNSUPPORTED) + return clib_error_return (0, "vhost-net backend doesn't support needed" + " features"); + else if (rv == VNET_API_ERROR_NAMESPACE_CREATE) + return clib_error_return (0, "failed to create netlink namespace"); + else if (rv == VNET_API_ERROR_VIRTIO_INIT) + return clib_error_return (0, "failed to init virtio ring"); + else if (rv == VNET_API_ERROR_INVALID_REGISTRATION) + return clib_error_return (0, "failed to register interface"); + else if (rv != 0) + return clib_error_return (0, "error on creating tap interface"); + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (tap_create_command, static) = { + .path = "create tap", + .short_help = "create tap {name <if-name>} [hw-addr <mac-address>]" + "[rx-ring-size <size>] [tx-ring-size <size>] [host-ns <netns>]", + .function = tap_create_command_fn, +}; +/* *INDENT-ON* */ + +static clib_error_t * +tap_delete_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + u32 sw_if_index = ~0; + vnet_main_t *vnm = vnet_get_main (); + int rv; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return clib_error_return (0, "Missing <interface>"); + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "sw_if_index %d", &sw_if_index)) + ; + else if (unformat (line_input, "%U", unformat_vnet_sw_interface, + vnm, &sw_if_index)) + ; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + unformat_free (line_input); + + if (sw_if_index == ~0) + return clib_error_return (0, + "please specify interface name or sw_if_index"); + + rv = tap_delete_if (vm, sw_if_index); + if (rv == VNET_API_ERROR_INVALID_SW_IF_INDEX) + return clib_error_return (0, "not a tap interface"); + else if (rv != 0) + return clib_error_return (0, "error on deleting tap interface"); + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (tap_delete__command, static) = +{ + .path = "delete tap", + .short_help = "delete tap {<interface> | sw_if_index <sw_idx>}", + .function = tap_delete_command_fn, +}; +/* *INDENT-ON* */ + +static clib_error_t * +tap_show_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + virtio_main_t *mm = &virtio_main; + virtio_if_t *vif; + vnet_main_t *vnm = vnet_get_main (); + int show_descr = 0; + clib_error_t *error = 0; + u32 hw_if_index, *hw_if_indices = 0; + virtio_vring_t *vring; + int i, j; + struct feat_struct + { + u8 bit; + char *str; + }; + struct feat_struct *feat_entry; + + static struct feat_struct feat_array[] = { +#define _(s,b) { .str = #s, .bit = b, }, + foreach_virtio_net_features +#undef _ + {.str = NULL} + }; + + struct feat_struct *flag_entry; + static struct feat_struct flags_array[] = { +#define _(b,e,s) { .bit = b, .str = s, }, + foreach_virtio_if_flag +#undef _ + {.str = NULL} + }; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (input, "%U", unformat_vnet_hw_interface, vnm, &hw_if_index)) + vec_add1 (hw_if_indices, hw_if_index); + else if (unformat (input, "descriptors")) + show_descr = 1; + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + goto done; + } + } + + if (vec_len (hw_if_indices) == 0) + { + /* *INDENT-OFF* */ + pool_foreach (vif, mm->interfaces, + vec_add1 (hw_if_indices, vif->hw_if_index); + ); + /* *INDENT-ON* */ + } + + for (hw_if_index = 0; hw_if_index < vec_len (hw_if_indices); hw_if_index++) + { + vnet_hw_interface_t *hi = + vnet_get_hw_interface (vnm, hw_if_indices[hw_if_index]); + vif = pool_elt_at_index (mm->interfaces, hi->dev_instance); + vlib_cli_output (vm, "interface %U", format_vnet_sw_if_index_name, + vnm, vif->sw_if_index); + if (vif->name) + vlib_cli_output (vm, " name \"%s\"", vif->name); + if (vif->net_ns) + vlib_cli_output (vm, " host-ns \"%s\"", vif->net_ns); + vlib_cli_output (vm, " flags 0x%x", vif->flags); + flag_entry = (struct feat_struct *) &flags_array; + while (flag_entry->str) + { + if (vif->flags & (1ULL << flag_entry->bit)) + vlib_cli_output (vm, " %s (%d)", flag_entry->str, + flag_entry->bit); + flag_entry++; + } + vlib_cli_output (vm, " fd %d", vif->fd); + vlib_cli_output (vm, " tap-fd %d", vif->tap_fd); + vlib_cli_output (vm, " features 0x%lx", vif->features); + feat_entry = (struct feat_struct *) &feat_array; + while (feat_entry->str) + { + if (vif->features & (1ULL << feat_entry->bit)) + vlib_cli_output (vm, " %s (%d)", feat_entry->str, + feat_entry->bit); + feat_entry++; + } + vlib_cli_output (vm, " remote-features 0x%lx", vif->remote_features); + feat_entry = (struct feat_struct *) &feat_array; + while (feat_entry->str) + { + if (vif->remote_features & (1ULL << feat_entry->bit)) + vlib_cli_output (vm, " %s (%d)", feat_entry->str, + feat_entry->bit); + feat_entry++; + } + vec_foreach_index (i, vif->vrings) + { + // RX = 0, TX = 1 + vring = vec_elt_at_index (vif->vrings, i); + vlib_cli_output (vm, " Virtqueue (%s)", (i & 1) ? "TX" : "RX"); + vlib_cli_output (vm, " qsz %d, last_used_idx %d, desc_in_use %d", + vring->size, vring->last_used_idx, + vring->desc_in_use); + vlib_cli_output (vm, + " avail.flags 0x%x avail.idx %d used.flags 0x%x used.idx %d", + vring->avail->flags, vring->avail->idx, + vring->used->flags, vring->used->idx); + vlib_cli_output (vm, " kickfd %d, callfd %d", vring->kick_fd, + vring->call_fd); + if (show_descr) + { + vlib_cli_output (vm, "\n descriptor table:\n"); + vlib_cli_output (vm, + " id addr len flags next user_addr\n"); + vlib_cli_output (vm, + " ===== ================== ===== ====== ===== ==================\n"); + vring = vif->vrings; + for (j = 0; j < vring->size; j++) + { + struct vring_desc *desc = &vring->desc[j]; + vlib_cli_output (vm, + " %-5d 0x%016lx %-5d 0x%04x %-5d 0x%016lx\n", + j, desc->addr, + desc->len, + desc->flags, desc->next, desc->addr); + } + } + } + } +done: + vec_free (hw_if_indices); + return error; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (tap_show_command, static) = { + .path = "show tap", + .short_help = "show tap {<interface>] [descriptors]", + .function = tap_show_command_fn, +}; +/* *INDENT-ON* */ + +clib_error_t * +tap_cli_init (vlib_main_t * vm) +{ + return 0; +} + +VLIB_INIT_FUNCTION (tap_cli_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/virtio/device.c b/src/vnet/devices/virtio/device.c new file mode 100644 index 00000000000..275a3c74990 --- /dev/null +++ b/src/vnet/devices/virtio/device.c @@ -0,0 +1,328 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <linux/virtio_net.h> +#include <linux/vhost.h> + +#include <vlib/vlib.h> +#include <vlib/unix/unix.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/devices/virtio/virtio.h> + +#define foreach_virtio_tx_func_error \ +_(NO_FREE_SLOTS, "no free tx slots") \ +_(TRUNC_PACKET, "packet > buffer size -- truncated in tx ring") \ +_(PENDING_MSGS, "pending msgs in tx ring") \ +_(NO_TX_QUEUES, "no tx queues") + +typedef enum +{ +#define _(f,s) TAP_TX_ERROR_##f, + foreach_virtio_tx_func_error +#undef _ + TAP_TX_N_ERROR, +} virtio_tx_func_error_t; + +static char *virtio_tx_func_error_strings[] = { +#define _(n,s) s, + foreach_virtio_tx_func_error +#undef _ +}; + +u8 * +format_virtio_device_name (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + virtio_main_t *mm = &virtio_main; + virtio_if_t *vif = pool_elt_at_index (mm->interfaces, dev_instance); + + if (vif->type == VIRTIO_IF_TYPE_TAP) + { + s = format (s, "tap-%s", vif->name); + } + else + s = format (s, "virtio%lu", vif->dev_instance); + + return s; +} + +static u8 * +format_virtio_device (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + int verbose = va_arg (*args, int); + u32 indent = format_get_indent (s); + + s = format (s, "VIRTIO interface"); + if (verbose) + { + s = format (s, "\n%U instance %u", format_white_space, indent + 2, + dev_instance); + } + return s; +} + +static u8 * +format_virtio_tx_trace (u8 * s, va_list * args) +{ + s = format (s, "Unimplemented..."); + return s; +} + +static_always_inline void +virtio_free_used_desc (vlib_main_t * vm, virtio_vring_t * vring) +{ + u16 used = vring->desc_in_use; + u16 sz = vring->size; + u16 mask = sz - 1; + u16 last = vring->last_used_idx; + u16 n_left = vring->used->idx - last; + + if (n_left == 0) + return; + + while (n_left) + { + struct vring_used_elem *e = &vring->used->ring[last & mask]; + u16 slot = e->id; + struct vring_desc *d = &vring->desc[slot]; + + if (PREDICT_FALSE (d->flags & VRING_DESC_F_INDIRECT)) + { + d = uword_to_pointer (d->addr, struct vring_desc *); + vec_free (d); + } + + vlib_buffer_free (vm, &vring->buffers[slot], 1); + used--; + last++; + n_left--; + } + vring->desc_in_use = used; + vring->last_used_idx = last; +} + +static_always_inline u16 +add_buffer_to_slot (vlib_main_t * vm, virtio_vring_t * vring, u32 bi, + u16 avail, u16 next, u16 mask) +{ + u16 n_added = 0; + const int hdr_sz = sizeof (struct virtio_net_hdr_v1); + struct vring_desc *d; + d = &vring->desc[next]; + vlib_buffer_t *b = vlib_get_buffer (vm, bi); + + if (PREDICT_TRUE ((b->flags & VLIB_BUFFER_NEXT_PRESENT) == 0)) + { + d->addr = pointer_to_uword (vlib_buffer_get_current (b)) - hdr_sz; + d->len = b->current_length + hdr_sz; + d->flags = 0; + } + else + { + struct vring_desc *id, *descs = 0; + + /* first buffer in chain */ + vec_add2_aligned (descs, id, 1, CLIB_CACHE_LINE_BYTES); + id->addr = pointer_to_uword (vlib_buffer_get_current (b)) - hdr_sz; + id->len = b->current_length + hdr_sz; + + while (b->flags & VLIB_BUFFER_NEXT_PRESENT) + { + id->flags = VRING_DESC_F_NEXT; + id->next = vec_len (descs); + vec_add2_aligned (descs, id, 1, CLIB_CACHE_LINE_BYTES); + b = vlib_get_buffer (vm, b->next_buffer); + id->addr = pointer_to_uword (vlib_buffer_get_current (b)); + id->len = b->current_length; + } + + d->addr = pointer_to_uword (descs); + d->len = vec_len (descs) * sizeof (struct vring_desc); + d->flags = VRING_DESC_F_INDIRECT; + } + vring->buffers[next] = bi; + vring->avail->ring[avail & mask] = next; + n_added++; + return n_added; +} + + +static_always_inline uword +virtio_interface_tx_inline (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame, virtio_if_t * vif) +{ + u8 qid = 0; + u16 n_left = frame->n_vectors; + virtio_vring_t *vring = vec_elt_at_index (vif->vrings, (qid << 1) + 1); + u16 used, next, avail; + u16 sz = vring->size; + u16 mask = sz - 1; + u32 *buffers = vlib_frame_args (frame); + + /* free consumed buffers */ + virtio_free_used_desc (vm, vring); + + used = vring->desc_in_use; + next = vring->desc_next; + avail = vring->avail->idx; + + while (n_left && used < sz) + { + u16 n_added; + n_added = add_buffer_to_slot (vm, vring, buffers[0], avail, next, mask); + avail += n_added; + next = (next + n_added) & mask; + used += n_added; + buffers++; + n_left--; + } + + if (n_left != frame->n_vectors) + { + CLIB_MEMORY_STORE_BARRIER (); + vring->avail->idx = avail; + vring->desc_next = next; + vring->desc_in_use = used; + if ((vring->used->flags & VIRTIO_RING_FLAG_MASK_INT) == 0) + { + u64 x = 1; + CLIB_UNUSED (int r) = write (vring->kick_fd, &x, sizeof (x)); + } + } + + + if (n_left) + { + vlib_error_count (vm, node->node_index, TAP_TX_ERROR_NO_FREE_SLOTS, + n_left); + vlib_buffer_free (vm, buffers, n_left); + } + + return frame->n_vectors - n_left; +} + +static uword +virtio_interface_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + virtio_main_t *nm = &virtio_main; + vnet_interface_output_runtime_t *rund = (void *) node->runtime_data; + virtio_if_t *vif = pool_elt_at_index (nm->interfaces, rund->dev_instance); + + return virtio_interface_tx_inline (vm, node, frame, vif); +} + +static void +virtio_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index, + u32 node_index) +{ + virtio_main_t *apm = &virtio_main; + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + virtio_if_t *vif = pool_elt_at_index (apm->interfaces, hw->dev_instance); + + /* Shut off redirection */ + if (node_index == ~0) + { + vif->per_interface_next_index = node_index; + return; + } + + vif->per_interface_next_index = + vlib_node_add_next (vlib_get_main (), virtio_input_node.index, + node_index); +} + +static void +virtio_clear_hw_interface_counters (u32 instance) +{ + /* Nothing for now */ +} + +static clib_error_t * +virtio_interface_rx_mode_change (vnet_main_t * vnm, u32 hw_if_index, u32 qid, + vnet_hw_interface_rx_mode mode) +{ + virtio_main_t *mm = &virtio_main; + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + virtio_if_t *vif = pool_elt_at_index (mm->interfaces, hw->dev_instance); + virtio_vring_t *vring = vec_elt_at_index (vif->vrings, qid); + + if (mode == VNET_HW_INTERFACE_RX_MODE_POLLING) + vring->avail->flags |= VIRTIO_RING_FLAG_MASK_INT; + else + vring->avail->flags &= ~VIRTIO_RING_FLAG_MASK_INT; + + return 0; +} + +static clib_error_t * +virtio_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) +{ + virtio_main_t *mm = &virtio_main; + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + virtio_if_t *vif = pool_elt_at_index (mm->interfaces, hw->dev_instance); + static clib_error_t *error = 0; + + if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) + vif->flags |= VIRTIO_IF_FLAG_ADMIN_UP; + else + vif->flags &= ~VIRTIO_IF_FLAG_ADMIN_UP; + + return error; + return 0; +} + +static clib_error_t * +virtio_subif_add_del_function (vnet_main_t * vnm, + u32 hw_if_index, + struct vnet_sw_interface_t *st, int is_add) +{ + /* Nothing for now */ + return 0; +} + +/* *INDENT-OFF* */ +VNET_DEVICE_CLASS (virtio_device_class) = { + .name = "virtio", + .tx_function = virtio_interface_tx, + .format_device_name = format_virtio_device_name, + .format_device = format_virtio_device, + .format_tx_trace = format_virtio_tx_trace, + .tx_function_n_errors = TAP_TX_N_ERROR, + .tx_function_error_strings = virtio_tx_func_error_strings, + .rx_redirect_to_node = virtio_set_interface_next_node, + .clear_counters = virtio_clear_hw_interface_counters, + .admin_up_down_function = virtio_interface_admin_up_down, + .subif_add_del_function = virtio_subif_add_del_function, + .rx_mode_change_function = virtio_interface_rx_mode_change, +}; + +VLIB_DEVICE_TX_FUNCTION_MULTIARCH(virtio_device_class, + virtio_interface_tx) +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/virtio/node.c b/src/vnet/devices/virtio/node.c new file mode 100644 index 00000000000..f746ada7326 --- /dev/null +++ b/src/vnet/devices/virtio/node.c @@ -0,0 +1,302 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <net/if.h> +#include <linux/if_tun.h> +#include <sys/ioctl.h> +#include <linux/virtio_net.h> +#include <linux/vhost.h> +#include <sys/eventfd.h> + +#include <vlib/vlib.h> +#include <vlib/unix/unix.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/devices/devices.h> +#include <vnet/feature/feature.h> +#include <vnet/devices/virtio/virtio.h> + + +#define foreach_virtio_input_error \ + _(UNKNOWN, "unknown") + +typedef enum +{ +#define _(f,s) TAP_INPUT_ERROR_##f, + foreach_virtio_input_error +#undef _ + TAP_INPUT_N_ERROR, +} virtio_input_error_t; + +static char *virtio_input_error_strings[] = { +#define _(n,s) s, + foreach_virtio_input_error +#undef _ +}; + +typedef struct +{ + u32 next_index; + u32 hw_if_index; + u16 ring; + u16 len; + struct virtio_net_hdr_v1 hdr; +} virtio_input_trace_t; + +static u8 * +format_virtio_input_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + virtio_input_trace_t *t = va_arg (*args, virtio_input_trace_t *); + u32 indent = format_get_indent (s); + + s = format (s, "virtio: hw_if_index %d next-index %d vring %u len %u", + t->hw_if_index, t->next_index, t->ring, t->len); + s = format (s, "\n%Uhdr: flags 0x%02x gso_type 0x%02x hdr_len %u " + "gso_size %u csum_start %u csum_offset %u num_buffers %u", + format_white_space, indent + 2, + t->hdr.flags, t->hdr.gso_type, t->hdr.hdr_len, t->hdr.gso_size, + t->hdr.csum_start, t->hdr.csum_offset, t->hdr.num_buffers); + return s; +} + +static_always_inline void +virtio_refill_vring (vlib_main_t * vm, virtio_vring_t * vring) +{ + const int hdr_sz = sizeof (struct virtio_net_hdr_v1); + u16 used, next, avail, n_slots, n_alloc; + u16 sz = vring->size; + u16 mask = sz - 1; + int i; + + used = vring->desc_in_use; + + if (sz - used < sz / 8) + return; + + n_slots = sz - used; + next = vring->desc_next; + avail = vring->avail->idx; + n_alloc = vlib_buffer_alloc (vm, &vring->buffers[next], n_slots); + + if (PREDICT_FALSE (n_alloc < n_slots)) + n_slots = n_alloc; + + i = next + n_slots - sz; + if (PREDICT_FALSE (i > 0)) + clib_memcpy (vring->buffers, &vring->buffers[sz], i * sizeof (u32)); + + while (n_slots) + { + struct vring_desc *d = &vring->desc[next];; + vlib_buffer_t *b = vlib_get_buffer (vm, vring->buffers[next]); + d->addr = pointer_to_uword (vlib_buffer_get_current (b)) - hdr_sz; + d->len = VLIB_BUFFER_DATA_SIZE + hdr_sz; + d->flags = VRING_DESC_F_WRITE; + vring->avail->ring[avail & mask] = next; + avail++; + next = (next + 1) & mask; + n_slots--; + used++; + } + CLIB_MEMORY_STORE_BARRIER (); + vring->avail->idx = avail; + vring->desc_next = next; + vring->desc_in_use = used; + + if ((vring->used->flags & VIRTIO_RING_FLAG_MASK_INT) == 0) + { + u64 b = 1; + CLIB_UNUSED (int r) = write (vring->kick_fd, &b, sizeof (b)); + } +} + +static_always_inline uword +virtio_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame, virtio_if_t * vif, u16 qid) +{ + vnet_main_t *vnm = vnet_get_main (); + u32 thread_index = vlib_get_thread_index (); + uword n_trace = vlib_get_trace_count (vm, node); + virtio_vring_t *vring = vec_elt_at_index (vif->vrings, 0); + u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; + const int hdr_sz = sizeof (struct virtio_net_hdr_v1); + u32 *to_next = 0; + u32 n_rx_packets = 0; + u32 n_rx_bytes = 0; + u16 mask = vring->size - 1; + u16 last = vring->last_used_idx; + u16 n_left = vring->used->idx - last; + + if (n_left == 0) + goto refill; + + while (n_left) + { + u32 n_left_to_next; + u32 next0 = next_index; + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left && n_left_to_next) + { + u16 num_buffers; + struct vring_used_elem *e = &vring->used->ring[last & mask]; + struct virtio_net_hdr_v1 *hdr; + u16 slot = e->id; + u16 len = e->len - hdr_sz; + u32 bi0 = vring->buffers[slot]; + vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); + hdr = vlib_buffer_get_current (b0) - hdr_sz; + num_buffers = hdr->num_buffers; + + b0->current_data = 0; + b0->current_length = len; + b0->total_length_not_including_first_buffer = 0; + b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID; + vnet_buffer (b0)->sw_if_index[VLIB_RX] = vif->sw_if_index; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + + /* if multisegment packet */ + if (PREDICT_FALSE (num_buffers > 1)) + { + vlib_buffer_t *pb, *cb; + pb = b0; + while (num_buffers > 1) + { + last++; + e = &vring->used->ring[last & mask]; + u32 cbi = vring->buffers[e->id]; + cb = vlib_get_buffer (vm, cbi); + + /* current buffer */ + cb->current_data = -hdr_sz; + cb->current_length = e->len; + + /* previous buffer */ + pb->next_buffer = cbi; + pb->flags |= VLIB_BUFFER_NEXT_PRESENT; + + /* first buffer */ + b0->total_length_not_including_first_buffer += e->len; + + pb = cb; + vring->desc_in_use--; + num_buffers--; + n_left--; + } + } + + if (PREDICT_FALSE (vif->per_interface_next_index != ~0)) + next0 = vif->per_interface_next_index; + else + /* redirect if feature path enabled */ + vnet_feature_start_device_input_x1 (vif->sw_if_index, &next0, b0); + /* trace */ + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); + + if (PREDICT_FALSE (n_trace > 0)) + { + virtio_input_trace_t *tr; + vlib_trace_buffer (vm, node, next0, b0, + /* follow_chain */ 0); + vlib_set_trace_count (vm, node, --n_trace); + tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->next_index = next0; + tr->hw_if_index = vif->hw_if_index; + tr->len = len; + clib_memcpy (&tr->hdr, hdr, hdr_sz); + } + + /* enqueue buffer */ + to_next[0] = bi0; + vring->desc_in_use--; + to_next += 1; + n_left_to_next--; + n_left--; + last++; + + /* enqueue */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + + /* next packet */ + n_rx_packets++; + n_rx_bytes += len; + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vring->last_used_idx = last; + + vlib_increment_combined_counter (vnm->interface_main.combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, thread_index, + vif->hw_if_index, n_rx_packets, + n_rx_bytes); + +refill: + virtio_refill_vring (vm, vring); + + return n_rx_packets; +} + +static uword +virtio_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_rx = 0; + virtio_main_t *nm = &virtio_main; + vnet_device_input_runtime_t *rt = (void *) node->runtime_data; + vnet_device_and_queue_t *dq; + + foreach_device_and_queue (dq, rt->devices_and_queues) + { + virtio_if_t *mif; + mif = vec_elt_at_index (nm->interfaces, dq->dev_instance); + if (mif->flags & VIRTIO_IF_FLAG_ADMIN_UP) + { + n_rx += virtio_device_input_inline (vm, node, frame, mif, + dq->queue_id); + } + } + + return n_rx; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (virtio_input_node) = { + .function = virtio_input_fn, + .name = "virtio-input", + .sibling_of = "device-input", + .format_trace = format_virtio_input_trace, + .type = VLIB_NODE_TYPE_INPUT, + .state = VLIB_NODE_STATE_INTERRUPT, + .n_errors = TAP_INPUT_N_ERROR, + .error_strings = virtio_input_error_strings, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (virtio_input_node, virtio_input_fn) +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/virtio/tap.c b/src/vnet/devices/virtio/tap.c new file mode 100644 index 00000000000..658ba6bfc68 --- /dev/null +++ b/src/vnet/devices/virtio/tap.c @@ -0,0 +1,361 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <net/if.h> +#include <linux/if_tun.h> +#include <sys/ioctl.h> +#include <linux/virtio_net.h> +#include <linux/vhost.h> +#include <sys/eventfd.h> + +#include <linux/netlink.h> +#include <linux/rtnetlink.h> + +#include <vlib/vlib.h> +#include <vlib/unix/unix.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/devices/virtio/virtio.h> +#include <vnet/devices/virtio/tap.h> + +#define _IOCTL(fd,a,...) \ + if (ioctl (fd, a, __VA_ARGS__) < 0) \ + { \ + err = clib_error_return_unix (0, "ioctl(" #a ")"); \ + goto error; \ + } + +static u32 +virtio_eth_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, + u32 flags) +{ + /* nothing for now */ + return 0; +} + + +clib_error_t * +clib_netlink_set_if_attr (int ifindex, unsigned short rta_type, void *data, + int data_len) +{ + clib_error_t *err = 0; + int sock; + struct sockaddr_nl ra = { 0 }; + struct + { + struct nlmsghdr nh; + struct ifinfomsg ifmsg; + char attrbuf[512]; + } req; + struct rtattr *rta; + + memset (&req, 0, sizeof (req)); + if ((sock = socket (AF_NETLINK, SOCK_RAW, NETLINK_ROUTE)) == -1) + { + err = clib_error_return_unix (0, "socket(AF_NETLINK)"); + goto error; + } + + ra.nl_family = AF_NETLINK; + ra.nl_pid = getpid (); + + if ((bind (sock, (struct sockaddr *) &ra, sizeof (ra))) == -1) + return clib_error_return_unix (0, "bind"); + + req.nh.nlmsg_len = NLMSG_LENGTH (sizeof (struct ifinfomsg)); + req.nh.nlmsg_flags = NLM_F_REQUEST; + req.nh.nlmsg_type = RTM_SETLINK; + req.ifmsg.ifi_family = AF_UNSPEC; + req.ifmsg.ifi_index = ifindex; + req.ifmsg.ifi_change = 0xffffffff; + rta = (struct rtattr *) (((char *) &req) + NLMSG_ALIGN (req.nh.nlmsg_len)); + rta->rta_type = rta_type; + rta->rta_len = RTA_LENGTH (data_len); + req.nh.nlmsg_len = NLMSG_ALIGN (req.nh.nlmsg_len) + RTA_LENGTH (data_len); + memcpy (RTA_DATA (rta), data, data_len); + + if ((send (sock, &req, req.nh.nlmsg_len, 0)) == -1) + err = clib_error_return_unix (0, "send"); + +error: + return err; +} + +clib_error_t * +clib_netlink_set_if_mtu (int ifindex, int mtu) +{ + clib_error_t *err; + + err = clib_netlink_set_if_attr (ifindex, IFLA_MTU, &mtu, sizeof (int)); + return err; +} + +clib_error_t * +clib_netlink_set_if_namespace (int ifindex, char *net_ns) +{ + clib_error_t *err; + int ns_fd; + u8 *s; + s = format (0, "/var/run/netns/%s%c", net_ns, 0); + ns_fd = open ((char *) s, O_RDONLY); + vec_free (s); + if (ns_fd == -1) + return clib_error_return (0, "namespace '%s' doesn't exist", net_ns); + + err = + clib_netlink_set_if_attr (ifindex, IFLA_NET_NS_FD, &ns_fd, sizeof (int)); + close (ns_fd); + return err; +} + +int +tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args) +{ + vnet_main_t *vnm = vnet_get_main (); + virtio_main_t *vim = &virtio_main; + vnet_sw_interface_t *sw; + vnet_hw_interface_t *hw; + int i; + clib_error_t *err = 0; + struct ifreq ifr; + size_t hdrsz; + struct vhost_memory *vhost_mem = 0; + virtio_if_t *vif = 0; + int rv = 0; + + memset (&ifr, 0, sizeof (ifr)); + pool_get (vim->interfaces, vif); + vif->dev_instance = vif - vim->interfaces; + vif->tap_fd = -1; + + if ((vif->fd = open ("/dev/vhost-net", O_RDWR | O_NONBLOCK)) < 0) + { + rv = VNET_API_ERROR_SYSCALL_ERROR_1; + goto error; + } + + _IOCTL (vif->fd, VHOST_GET_FEATURES, &vif->remote_features); + + if ((vif->remote_features & (1ULL << VIRTIO_NET_F_MRG_RXBUF)) == 0) + { + rv = VNET_API_ERROR_UNSUPPORTED; + goto error; + } + + if ((vif->remote_features & (1ULL << VIRTIO_RING_F_INDIRECT_DESC)) == 0) + { + rv = VNET_API_ERROR_UNSUPPORTED; + goto error; + } + + if ((vif->remote_features & (1ULL << VIRTIO_F_VERSION_1)) == 0) + { + rv = VNET_API_ERROR_UNSUPPORTED; + goto error; + } + + vif->features |= 1ULL << VIRTIO_NET_F_MRG_RXBUF; + vif->features |= 1ULL << VIRTIO_F_VERSION_1; + vif->features |= 1ULL << VIRTIO_RING_F_INDIRECT_DESC; + + _IOCTL (vif->fd, VHOST_SET_FEATURES, &vif->features); + + if ((vif->tap_fd = open ("/dev/net/tun", O_RDWR | O_NONBLOCK)) < 0) + { + rv = VNET_API_ERROR_SYSCALL_ERROR_2; + goto error; + } + + ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE | IFF_VNET_HDR; + strncpy (ifr.ifr_ifrn.ifrn_name, (char *) args->name, IF_NAMESIZE); + _IOCTL (vif->tap_fd, TUNSETIFF, (void *) &ifr); + + vif->ifindex = if_nametoindex ((char *) args->name); + + unsigned int offload = 0; + hdrsz = sizeof (struct virtio_net_hdr_v1); + _IOCTL (vif->tap_fd, TUNSETOFFLOAD, offload); + _IOCTL (vif->tap_fd, TUNSETVNETHDRSZ, &hdrsz); + _IOCTL (vif->fd, VHOST_SET_OWNER, 0); + + if (args->net_ns) + { + err = clib_netlink_set_if_namespace (vif->ifindex, + (char *) args->net_ns); + if (err) + { + rv = VNET_API_ERROR_NAMESPACE_CREATE; + goto error; + } + } + + /* Set vhost memory table */ + i = sizeof (struct vhost_memory) + sizeof (struct vhost_memory_region); + vhost_mem = clib_mem_alloc (i); + memset (vhost_mem, 0, i); + vhost_mem->nregions = 1; + vhost_mem->regions[0].memory_size = (1ULL << 47) - 4096; + _IOCTL (vif->fd, VHOST_SET_MEM_TABLE, vhost_mem); + + if ((err = virtio_vring_init (vm, vif, 0, args->rx_ring_sz))) + { + rv = VNET_API_ERROR_VIRTIO_INIT; + goto error; + } + + if ((err = virtio_vring_init (vm, vif, 1, args->tx_ring_sz))) + { + rv = VNET_API_ERROR_VIRTIO_INIT; + goto error; + } + + if (!args->hw_addr_set) + { + f64 now = vlib_time_now (vm); + u32 rnd; + rnd = (u32) (now * 1e6); + rnd = random_u32 (&rnd); + + memcpy (args->hw_addr + 2, &rnd, sizeof (rnd)); + args->hw_addr[0] = 2; + args->hw_addr[1] = 0xfe; + } + vif->name = args->name; + args->name = 0; + vif->net_ns = args->net_ns; + args->net_ns = 0; + err = ethernet_register_interface (vnm, virtio_device_class.index, + vif->dev_instance, args->hw_addr, + &vif->hw_if_index, + virtio_eth_flag_change); + if (err) + rv = VNET_API_ERROR_INVALID_REGISTRATION; + + sw = vnet_get_hw_sw_interface (vnm, vif->hw_if_index); + vif->sw_if_index = sw->sw_if_index; + args->sw_if_index = vif->sw_if_index; + hw = vnet_get_hw_interface (vnm, vif->hw_if_index); + hw->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_INT_MODE; + vnet_hw_interface_set_input_node (vnm, vif->hw_if_index, + virtio_input_node.index); + vnet_hw_interface_assign_rx_thread (vnm, vif->hw_if_index, 0, ~0); + vnet_hw_interface_set_rx_mode (vnm, vif->hw_if_index, 0, + VNET_HW_INTERFACE_RX_MODE_DEFAULT); + vif->per_interface_next_index = ~0; + vif->type = VIRTIO_IF_TYPE_TAP; + vif->flags |= VIRTIO_IF_FLAG_ADMIN_UP; + vnet_hw_interface_set_flags (vnm, vif->hw_if_index, + VNET_HW_INTERFACE_FLAG_LINK_UP); + goto done; + +error: + if (vif->tap_fd != -1) + close (vif->tap_fd); + if (vif->fd != -1) + close (vif->fd); + vec_foreach_index (i, vif->vrings) virtio_vring_free (vif, i); + memset (vif, 0, sizeof (virtio_if_t)); + pool_put (vim->interfaces, vif); + +done: + if (vhost_mem) + clib_mem_free (vhost_mem); + + return rv; +} + +int +tap_delete_if (vlib_main_t * vm, u32 sw_if_index) +{ + vnet_main_t *vnm = vnet_get_main (); + virtio_main_t *mm = &virtio_main; + int i; + virtio_if_t *vif; + vnet_hw_interface_t *hw; + + hw = vnet_get_sup_hw_interface (vnm, sw_if_index); + if (hw == NULL || virtio_device_class.index != hw->dev_class_index) + return VNET_API_ERROR_INVALID_SW_IF_INDEX; + + vif = pool_elt_at_index (mm->interfaces, hw->dev_instance); + + /* bring down the interface */ + vnet_hw_interface_set_flags (vnm, vif->hw_if_index, 0); + vnet_sw_interface_set_flags (vnm, vif->sw_if_index, 0); + + ethernet_delete_interface (vnm, vif->hw_if_index); + vif->hw_if_index = ~0; + + if (vif->tap_fd != -1) + close (vif->tap_fd); + if (vif->fd != -1) + close (vif->fd); + + vec_foreach_index (i, vif->vrings) virtio_vring_free (vif, i); + vec_free (vif->vrings); + + memset (vif, 0, sizeof (*vif)); + pool_put (mm->interfaces, vif); + + return 0; +} + +int +tap_dump_ifs (tap_interface_details_t ** out_tapids) +{ + vnet_main_t *vnm = vnet_get_main (); + virtio_main_t *mm = &virtio_main; + virtio_if_t *vif; + vnet_hw_interface_t *hi; + tap_interface_details_t *r_tapids = NULL; + tap_interface_details_t *tapid = NULL; + + /* *INDENT-OFF* */ + pool_foreach (vif, mm->interfaces, + vec_add2(r_tapids, tapid, 1); + memset (tapid, 0, sizeof (*tapid)); + tapid->sw_if_index = vif->sw_if_index; + hi = vnet_get_hw_interface (vnm, vif->hw_if_index); + clib_memcpy(tapid->dev_name, hi->name, + MIN (ARRAY_LEN (tapid->dev_name) - 1, + strlen ((const char *) hi->name))); + ); + /* *INDENT-ON* */ + + *out_tapids = r_tapids; + + return 0; +} + +static clib_error_t * +tap_init (vlib_main_t * vm) +{ + + return 0; +} + +VLIB_INIT_FUNCTION (tap_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/virtio/tap.h b/src/vnet/devices/virtio/tap.h new file mode 100644 index 00000000000..58dcb5bda51 --- /dev/null +++ b/src/vnet/devices/virtio/tap.h @@ -0,0 +1,56 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#ifndef _VNET_DEVICES_VIRTIO_TAP_H_ +#define _VNET_DEVICES_VIRTIO_TAP_H_ + +#ifndef MIN +#define MIN(x,y) (((x)<(y))?(x):(y)) +#endif + +typedef struct +{ + u8 *name; + u8 *net_ns; + u8 hw_addr_set; + u8 hw_addr[6]; + u16 rx_ring_sz; + u16 tx_ring_sz; + /* return */ + u32 sw_if_index; +} tap_create_if_args_t; + +/** TAP interface details struct */ +typedef struct +{ + u32 sw_if_index; + u8 dev_name[64]; +} tap_interface_details_t; + +int tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args); +int tap_delete_if (vlib_main_t * vm, u32 sw_if_index); +int tap_dump_ifs (tap_interface_details_t ** out_tapids); + +#endif /* _VNET_DEVICES_VIRTIO_TAP_H_ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/virtio/tapv2.api b/src/vnet/devices/virtio/tapv2.api new file mode 100644 index 00000000000..e1592cf7d61 --- /dev/null +++ b/src/vnet/devices/virtio/tapv2.api @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** \file + + This file defines vpe control-plane API messages for + the Linux kernel TAP device driver +*/ + +vl_api_version 1.0.0 + +/** \brief Initialize a new tap interface with the given paramters + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param use_random_mac - let the system generate a unique mac address + @param tap_name - name to associate with the new interface + @param mac_address - mac addr to assign to the interface if use_radom not set + @param net_ns_set - net_ns is entered + @param net_ns - netlink name space + @param tx_ring_sz - the number of entries of TX ring + @param rx_ring_sz - the number of entries of RX ring +*/ +define tap_create_v2 +{ + u32 client_index; + u32 context; + u8 use_random_mac; + u8 tap_name[64]; + u8 mac_address[6]; + u8 net_ns_set; + u8 net_ns[64]; + u16 tx_ring_sz; /* optional, default is 256 entries, must be power of 2 */ + u16 rx_ring_sz; /* optional, default is 256 entries, must be power of 2 */ +}; + +/** \brief Reply for tap create reply + @param context - returned sender context, to match reply w/ request + @param retval - return code + @param sw_if_index - software index allocated for the new tap interface +*/ +define tap_create_v2_reply +{ + u32 context; + i32 retval; + u32 sw_if_index; +}; + +/** \brief Delete tap interface + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - interface index of existing tap interface +*/ +autoreply define tap_delete_v2 +{ + u32 client_index; + u32 context; + u32 sw_if_index; +}; + +/** \brief Dump tap interfaces request */ +define sw_interface_tap_v2_dump +{ + u32 client_index; + u32 context; +}; + +/** \brief Reply for tap dump request + @param sw_if_index - software index of tap interface + @param dev_name - Linux tap device name +*/ +define sw_interface_tap_v2_details +{ + u32 context; + u32 sw_if_index; + u8 dev_name[64]; +}; + +/* + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/virtio/tapv2_api.c b/src/vnet/devices/virtio/tapv2_api.c new file mode 100644 index 00000000000..1c559e6c28e --- /dev/null +++ b/src/vnet/devices/virtio/tapv2_api.c @@ -0,0 +1,222 @@ +/* + *------------------------------------------------------------------ + * tap_api.c - vnet tap device driver API support + * + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <vnet/vnet.h> +#include <vlibmemory/api.h> + +#include <vnet/interface.h> +#include <vnet/api_errno.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ip/ip.h> + +#include <vnet/vnet_msg_enum.h> + +#define vl_typedefs /* define message structures */ +#include <vnet/vnet_all_api_h.h> +#undef vl_typedefs + +#define vl_endianfun /* define message structures */ +#include <vnet/vnet_all_api_h.h> +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) +#define vl_printfun +#include <vnet/vnet_all_api_h.h> +#undef vl_printfun + +#include <vlibapi/api_helper_macros.h> +#include <vnet/devices/virtio/tap.h> + +#define foreach_tapv2_api_msg \ +_(TAP_CREATE_V2, tap_create_v2) \ +_(TAP_DELETE_V2, tap_delete_v2) \ +_(SW_INTERFACE_TAP_V2_DUMP, sw_interface_tap_v2_dump) + +static void +vl_api_tap_create_v2_t_handler (vl_api_tap_create_v2_t * mp) +{ + vlib_main_t *vm = vlib_get_main (); + int rv; + vl_api_tap_create_v2_reply_t *rmp; + unix_shared_memory_queue_t *q; + tap_create_if_args_t _a, *ap = &_a; + + memset (ap, 0, sizeof (*ap)); + + ap->name = mp->tap_name; + if (!mp->use_random_mac) + { + clib_memcpy (ap->hw_addr, mp->mac_address, 6); + ap->hw_addr_set = 1; + } + ap->rx_ring_sz = mp->rx_ring_sz; + ap->tx_ring_sz = mp->tx_ring_sz; + ap->sw_if_index = (u32) ~ 0; + if (mp->net_ns_set) + ap->net_ns = mp->net_ns; + + rv = tap_create_if (vm, ap); + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (!q) + return; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_TAP_CREATE_V2_REPLY); + rmp->context = mp->context; + rmp->retval = ntohl (rv); + rmp->sw_if_index = ntohl (ap->sw_if_index); + + vl_msg_api_send_shmem (q, (u8 *) & rmp); +} + +static void +tap_send_sw_interface_event_deleted (vpe_api_main_t * am, + unix_shared_memory_queue_t * q, + u32 sw_if_index) +{ + vl_api_sw_interface_event_t *mp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_EVENT); + mp->sw_if_index = ntohl (sw_if_index); + + mp->admin_up_down = 0; + mp->link_up_down = 0; + mp->deleted = 1; + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + +static void +vl_api_tap_delete_v2_t_handler (vl_api_tap_delete_v2_t * mp) +{ + vlib_main_t *vm = vlib_get_main (); + int rv; + vpe_api_main_t *vam = &vpe_api_main; + vl_api_tap_delete_v2_reply_t *rmp; + unix_shared_memory_queue_t *q; + u32 sw_if_index = ntohl (mp->sw_if_index); + + rv = tap_delete_if (vm, sw_if_index); + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (!q) + return; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_TAP_DELETE_V2_REPLY); + rmp->context = mp->context; + rmp->retval = ntohl (rv); + + vl_msg_api_send_shmem (q, (u8 *) & rmp); + + if (!rv) + tap_send_sw_interface_event_deleted (vam, q, sw_if_index); +} + +static void +tap_send_sw_interface_details (vpe_api_main_t * am, + unix_shared_memory_queue_t * q, + tap_interface_details_t * tap_if, u32 context) +{ + vl_api_sw_interface_tap_v2_details_t *mp; + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_TAP_V2_DETAILS); + mp->sw_if_index = ntohl (tap_if->sw_if_index); + clib_memcpy (mp->dev_name, tap_if->dev_name, + MIN (ARRAY_LEN (mp->dev_name) - 1, + strlen ((const char *) tap_if->dev_name))); + mp->context = context; + + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + +static void +vl_api_sw_interface_tap_v2_dump_t_handler (vl_api_sw_interface_tap_v2_dump_t * + mp) +{ + int rv; + vpe_api_main_t *am = &vpe_api_main; + unix_shared_memory_queue_t *q; + tap_interface_details_t *tapifs = NULL; + tap_interface_details_t *tap_if = NULL; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + return; + + rv = tap_dump_ifs (&tapifs); + if (rv) + return; + + vec_foreach (tap_if, tapifs) + { + tap_send_sw_interface_details (am, q, tap_if, mp->context); + } + + vec_free (tapifs); +} + +#define vl_msg_name_crc_list +#include <vnet/vnet_all_api_h.h> +#undef vl_msg_name_crc_list + +static void +tap_setup_message_id_table (api_main_t * am) +{ +#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id); + foreach_vl_msg_name_crc_tapv2; +#undef _ +} + +static clib_error_t * +tapv2_api_hookup (vlib_main_t * vm) +{ + api_main_t *am = &api_main; + +#define _(N,n) \ + vl_msg_api_set_handlers(VL_API_##N, #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_tapv2_api_msg; +#undef _ + + /* + * Set up the (msg_name, crc, message-id) table + */ + tap_setup_message_id_table (am); + + return 0; +} + +VLIB_API_INIT_FUNCTION (tapv2_api_hookup); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/virtio/virtio.c b/src/vnet/devices/virtio/virtio.c new file mode 100644 index 00000000000..63ca6011a9e --- /dev/null +++ b/src/vnet/devices/virtio/virtio.c @@ -0,0 +1,159 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <net/if.h> +#include <linux/if_tun.h> +#include <sys/ioctl.h> +#include <linux/virtio_net.h> +#include <linux/vhost.h> +#include <sys/eventfd.h> + +#include <vlib/vlib.h> +#include <vlib/unix/unix.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/devices/virtio/virtio.h> + +virtio_main_t virtio_main; + +#define _IOCTL(fd,a,...) \ + if (ioctl (fd, a, __VA_ARGS__) < 0) \ + { \ + err = clib_error_return_unix (0, "ioctl(" #a ")"); \ + goto error; \ + } + +static clib_error_t * +call_read_ready (clib_file_t * uf) +{ + virtio_main_t *nm = &virtio_main; + vnet_main_t *vnm = vnet_get_main (); + u16 qid = uf->private_data & 0xFFFF; + virtio_if_t *vif = + vec_elt_at_index (nm->interfaces, uf->private_data >> 16); + u64 b; + + CLIB_UNUSED (ssize_t size) = read (uf->file_descriptor, &b, sizeof (b)); + if ((qid & 1) == 0) + vnet_device_input_set_interrupt_pending (vnm, vif->hw_if_index, qid); + + return 0; +} + + +clib_error_t * +virtio_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 idx, u16 sz) +{ + clib_error_t *err = 0; + virtio_vring_t *vring; + struct vhost_vring_state state; + struct vhost_vring_addr addr; + struct vhost_vring_file file; + clib_file_t t = { 0 }; + int i; + + if (!is_pow2 (sz)) + return clib_error_return (0, "ring size must be power of 2"); + + if (sz > 32768) + return clib_error_return (0, "ring size must be 32768 or lower"); + + if (sz == 0) + sz = 256; + + vec_validate_aligned (vif->vrings, idx, CLIB_CACHE_LINE_BYTES); + vring = vec_elt_at_index (vif->vrings, idx); + + i = sizeof (struct vring_desc) * sz; + i = round_pow2 (i, CLIB_CACHE_LINE_BYTES); + vring->desc = clib_mem_alloc_aligned (i, CLIB_CACHE_LINE_BYTES); + memset (vring->desc, 0, i); + + i = sizeof (struct vring_avail) + sz * sizeof (vring->avail->ring[0]); + i = round_pow2 (i, CLIB_CACHE_LINE_BYTES); + vring->avail = clib_mem_alloc_aligned (i, CLIB_CACHE_LINE_BYTES); + memset (vring->avail, 0, i); + // tell kernel that we don't need interrupt + vring->avail->flags = VIRTIO_RING_FLAG_MASK_INT; + + i = sizeof (struct vring_used) + sz * sizeof (struct vring_used_elem); + i = round_pow2 (i, CLIB_CACHE_LINE_BYTES); + vring->used = clib_mem_alloc_aligned (i, CLIB_CACHE_LINE_BYTES); + memset (vring->used, 0, i); + + ASSERT (vring->buffers == 0); + vec_validate_aligned (vring->buffers, sz * 2, CLIB_CACHE_LINE_BYTES); + + vring->size = sz; + vring->call_fd = eventfd (0, EFD_NONBLOCK | EFD_CLOEXEC); + vring->kick_fd = eventfd (0, EFD_CLOEXEC); + + t.read_function = call_read_ready; + t.file_descriptor = vring->call_fd; + t.private_data = vif->dev_instance << 16 | idx; + vring->call_file_index = clib_file_add (&file_main, &t); + + state.index = idx; + state.num = sz; + _IOCTL (vif->fd, VHOST_SET_VRING_NUM, &state); + + addr.index = idx; + addr.flags = 0; + addr.desc_user_addr = pointer_to_uword (vring->desc); + addr.avail_user_addr = pointer_to_uword (vring->avail); + addr.used_user_addr = pointer_to_uword (vring->used); + _IOCTL (vif->fd, VHOST_SET_VRING_ADDR, &addr); + + file.index = idx; + file.fd = vring->kick_fd; + _IOCTL (vif->fd, VHOST_SET_VRING_KICK, &file); + file.fd = vring->call_fd; + _IOCTL (vif->fd, VHOST_SET_VRING_CALL, &file); + file.fd = vif->tap_fd; + _IOCTL (vif->fd, VHOST_NET_SET_BACKEND, &file); + +error: + return err; +} + +clib_error_t * +virtio_vring_free (virtio_if_t * vif, u32 idx) +{ + //TODO free buffers and indirect descriptor allocs + virtio_vring_t *vring = vec_elt_at_index (vif->vrings, idx); + if (vring->desc) + clib_mem_free (vring->desc); + if (vring->avail) + clib_mem_free (vring->avail); + if (vring->used) + clib_mem_free (vring->used); + clib_file_del_by_index (&file_main, vring->call_file_index); + close (vring->kick_fd); + close (vring->call_fd); + vec_free (vring->buffers); + return 0; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/virtio/virtio.h b/src/vnet/devices/virtio/virtio.h new file mode 100644 index 00000000000..7dcd90ac353 --- /dev/null +++ b/src/vnet/devices/virtio/virtio.h @@ -0,0 +1,131 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#ifndef _VNET_DEVICES_VIRTIO_VIRTIO_H_ +#define _VNET_DEVICES_VIRTIO_VIRTIO_H_ + +#define foreach_virtio_net_features \ + _ (VIRTIO_NET_F_CSUM, 0) /* Host handles pkts w/ partial csum */ \ + _ (VIRTIO_NET_F_GUEST_CSUM, 1) /* Guest handles pkts w/ partial csum */ \ + _ (VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, 2) /* Dynamic offload configuration. */ \ + _ (VIRTIO_NET_F_MTU, 3) /* Initial MTU advice. */ \ + _ (VIRTIO_NET_F_MAC, 5) /* Host has given MAC address. */ \ + _ (VIRTIO_NET_F_GSO, 6) /* Host handles pkts w/ any GSO. */ \ + _ (VIRTIO_NET_F_GUEST_TSO4, 7) /* Guest can handle TSOv4 in. */ \ + _ (VIRTIO_NET_F_GUEST_TSO6, 8) /* Guest can handle TSOv6 in. */ \ + _ (VIRTIO_NET_F_GUEST_ECN, 9) /* Guest can handle TSO[6] w/ ECN in. */ \ + _ (VIRTIO_NET_F_GUEST_UFO, 10) /* Guest can handle UFO in. */ \ + _ (VIRTIO_NET_F_HOST_TSO4, 11) /* Host can handle TSOv4 in. */ \ + _ (VIRTIO_NET_F_HOST_TSO6, 12) /* Host can handle TSOv6 in. */ \ + _ (VIRTIO_NET_F_HOST_ECN, 13) /* Host can handle TSO[6] w/ ECN in. */ \ + _ (VIRTIO_NET_F_HOST_UFO, 14) /* Host can handle UFO in. */ \ + _ (VIRTIO_NET_F_MRG_RXBUF, 15) /* Host can merge receive buffers. */ \ + _ (VIRTIO_NET_F_STATUS, 16) /* virtio_net_config.status available */ \ + _ (VIRTIO_NET_F_CTRL_VQ, 17) /* Control channel available */ \ + _ (VIRTIO_NET_F_CTRL_RX, 18) /* Control channel RX mode support */ \ + _ (VIRTIO_NET_F_CTRL_VLAN, 19) /* Control channel VLAN filtering */ \ + _ (VIRTIO_NET_F_CTRL_RX_EXTRA, 20) /* Extra RX mode control support */ \ + _ (VIRTIO_NET_F_GUEST_ANNOUNCE, 21) /* Guest can announce device on the network */ \ + _ (VIRTIO_NET_F_MQ, 22) /* Device supports Receive Flow Steering */ \ + _ (VIRTIO_NET_F_CTRL_MAC_ADDR, 23) /* Set MAC address */ \ + _ (VIRTIO_F_NOTIFY_ON_EMPTY, 24) \ + _ (VHOST_F_LOG_ALL, 26) /* Log all write descriptors */ \ + _ (VIRTIO_F_ANY_LAYOUT, 27) /* Can the device handle any descripor layout */ \ + _ (VIRTIO_RING_F_INDIRECT_DESC, 28) /* Support indirect buffer descriptors */ \ + _ (VIRTIO_RING_F_EVENT_IDX, 29) /* The Guest publishes the used index for which it expects an interrupt \ + * at the end of the avail ring. Host should ignore the avail->flags field. */ \ +/* The Host publishes the avail index for which it expects a kick \ + * at the end of the used ring. Guest should ignore the used->flags field. */ \ + _ (VHOST_USER_F_PROTOCOL_FEATURES, 30) \ + _ (VIRTIO_F_VERSION_1, 32) + +#define foreach_virtio_if_flag \ + _(0, ADMIN_UP, "admin-up") \ + _(1, DELETING, "deleting") + +typedef enum +{ +#define _(a, b, c) VIRTIO_IF_FLAG_##b = (1 << a), + foreach_virtio_if_flag +#undef _ +} virtio_if_flag_t; + +typedef enum +{ + VIRTIO_IF_TYPE_TAP, + VIRTIO_IF_N_TYPES, +} virtio_if_type_t; + + +typedef struct +{ + struct vring_desc *desc; + struct vring_used *used; + struct vring_avail *avail; + u16 desc_in_use; + u16 desc_next; + int kick_fd; + int call_fd; + u16 size; +#define VIRTIO_RING_FLAG_MASK_INT 1 + u32 flags; + u32 call_file_index; + u32 *buffers; + u16 last_used_idx; +} virtio_vring_t; + +typedef struct +{ + u32 flags; + u32 dev_instance; + u32 hw_if_index; + u32 sw_if_index; + u32 per_interface_next_index; + int fd; + int tap_fd; + virtio_vring_t *vrings; + + u64 features, remote_features; + + virtio_if_type_t type; + u8 *name; + u8 *net_ns; + int ifindex; +} virtio_if_t; + +typedef struct +{ + virtio_if_t *interfaces; +} virtio_main_t; + +extern virtio_main_t virtio_main; +extern vnet_device_class_t virtio_device_class; +extern vlib_node_registration_t virtio_input_node; + +clib_error_t *virtio_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 idx, + u16 sz); +clib_error_t *virtio_vring_free (virtio_if_t * vif, u32 idx); + +#endif /* _VNET_DEVICES_VIRTIO_VIRTIO_H_ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ |