diff options
author | Mohsin Kazmi <sykazmi@cisco.com> | 2018-10-23 18:00:47 +0200 |
---|---|---|
committer | Damjan Marion <damarion@cisco.com> | 2019-01-21 23:13:20 +0100 |
commit | d6c15af33f3f153e084f14484e884f3ca68dbc23 (patch) | |
tree | 97d7d92f4fb57067f0c24900a5363448df92f8b0 /src/vnet/devices | |
parent | 8b5d0b8c3a089e88a8872af939302029eb3434d9 (diff) |
virtio: Native virtio driver
Change-Id: Id7fccf2f805e578fb05032aeb2b649a74c3c0e56
Signed-off-by: Mohsin Kazmi <sykazmi@cisco.com>
Diffstat (limited to 'src/vnet/devices')
-rw-r--r-- | src/vnet/devices/tap/cli.c | 100 | ||||
-rw-r--r-- | src/vnet/devices/tap/tap.c | 26 | ||||
-rw-r--r-- | src/vnet/devices/virtio/cli.c | 208 | ||||
-rw-r--r-- | src/vnet/devices/virtio/device.c | 136 | ||||
-rw-r--r-- | src/vnet/devices/virtio/node.c | 37 | ||||
-rw-r--r-- | src/vnet/devices/virtio/pci.c | 919 | ||||
-rw-r--r-- | src/vnet/devices/virtio/pci.h | 233 | ||||
-rw-r--r-- | src/vnet/devices/virtio/virtio.api | 96 | ||||
-rw-r--r-- | src/vnet/devices/virtio/virtio.c | 165 | ||||
-rw-r--r-- | src/vnet/devices/virtio/virtio.h | 85 | ||||
-rw-r--r-- | src/vnet/devices/virtio/virtio_api.c | 237 |
11 files changed, 2069 insertions, 173 deletions
diff --git a/src/vnet/devices/tap/cli.c b/src/vnet/devices/tap/cli.c index 9d86159c574..ee57a72268e 100644 --- a/src/vnet/devices/tap/cli.c +++ b/src/vnet/devices/tap/cli.c @@ -172,29 +172,6 @@ tap_show_command_fn (vlib_main_t * vm, unformat_input_t * input, int show_descr = 0; clib_error_t *error = 0; u32 hw_if_index, *hw_if_indices = 0; - virtio_vring_t *vring; - int i, j; - struct feat_struct - { - u8 bit; - char *str; - }; - struct feat_struct *feat_entry; - - static struct feat_struct feat_array[] = { -#define _(s,b) { .str = #s, .bit = b, }, - foreach_virtio_net_features -#undef _ - {.str = NULL} - }; - - struct feat_struct *flag_entry; - static struct feat_struct flags_array[] = { -#define _(b,e,s) { .bit = b, .str = s, }, - foreach_virtio_if_flag -#undef _ - {.str = NULL} - }; while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { @@ -220,81 +197,8 @@ tap_show_command_fn (vlib_main_t * vm, unformat_input_t * input, /* *INDENT-ON* */ } - for (hw_if_index = 0; hw_if_index < vec_len (hw_if_indices); hw_if_index++) - { - vnet_hw_interface_t *hi = - vnet_get_hw_interface (vnm, hw_if_indices[hw_if_index]); - vif = pool_elt_at_index (mm->interfaces, hi->dev_instance); - vlib_cli_output (vm, "interface %U", format_vnet_sw_if_index_name, - vnm, vif->sw_if_index); - if (vif->host_if_name) - vlib_cli_output (vm, " name \"%s\"", vif->host_if_name); - if (vif->net_ns) - vlib_cli_output (vm, " host-ns \"%s\"", vif->net_ns); - vlib_cli_output (vm, " flags 0x%x", vif->flags); - flag_entry = (struct feat_struct *) &flags_array; - while (flag_entry->str) - { - if (vif->flags & (1ULL << flag_entry->bit)) - vlib_cli_output (vm, " %s (%d)", flag_entry->str, - flag_entry->bit); - flag_entry++; - } - vlib_cli_output (vm, " fd %d", vif->fd); - vlib_cli_output (vm, " tap-fd %d", vif->tap_fd); - vlib_cli_output (vm, " features 0x%lx", vif->features); - feat_entry = (struct feat_struct *) &feat_array; - while (feat_entry->str) - { - if (vif->features & (1ULL << feat_entry->bit)) - vlib_cli_output (vm, " %s (%d)", feat_entry->str, - feat_entry->bit); - feat_entry++; - } - vlib_cli_output (vm, " remote-features 0x%lx", vif->remote_features); - feat_entry = (struct feat_struct *) &feat_array; - while (feat_entry->str) - { - if (vif->remote_features & (1ULL << feat_entry->bit)) - vlib_cli_output (vm, " %s (%d)", feat_entry->str, - feat_entry->bit); - feat_entry++; - } - vec_foreach_index (i, vif->vrings) - { - // RX = 0, TX = 1 - vring = vec_elt_at_index (vif->vrings, i); - vlib_cli_output (vm, " Virtqueue (%s)", (i & 1) ? "TX" : "RX"); - vlib_cli_output (vm, - " qsz %d, last_used_idx %d, desc_next %d, desc_in_use %d", - vring->size, vring->last_used_idx, vring->desc_next, - vring->desc_in_use); - vlib_cli_output (vm, - " avail.flags 0x%x avail.idx %d used.flags 0x%x used.idx %d", - vring->avail->flags, vring->avail->idx, - vring->used->flags, vring->used->idx); - vlib_cli_output (vm, " kickfd %d, callfd %d", vring->kick_fd, - vring->call_fd); - if (show_descr) - { - vlib_cli_output (vm, "\n descriptor table:\n"); - vlib_cli_output (vm, - " id addr len flags next user_addr\n"); - vlib_cli_output (vm, - " ===== ================== ===== ====== ===== ==================\n"); - vring = vif->vrings; - for (j = 0; j < vring->size; j++) - { - struct vring_desc *desc = &vring->desc[j]; - vlib_cli_output (vm, - " %-5d 0x%016lx %-5d 0x%04x %-5d 0x%016lx\n", - j, desc->addr, - desc->len, - desc->flags, desc->next, desc->addr); - } - } - } - } + virtio_show (vm, hw_if_indices, show_descr, VIRTIO_IF_TYPE_TAP); + done: vec_free (hw_if_indices); return error; diff --git a/src/vnet/devices/tap/tap.c b/src/vnet/devices/tap/tap.c index d0ed58c1f06..4f3066a6916 100644 --- a/src/vnet/devices/tap/tap.c +++ b/src/vnet/devices/tap/tap.c @@ -130,7 +130,7 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args) _IOCTL (vif->fd, VHOST_GET_FEATURES, &vif->remote_features); - if ((vif->remote_features & (1ULL << VIRTIO_NET_F_MRG_RXBUF)) == 0) + if ((vif->remote_features & VIRTIO_FEATURE (VIRTIO_NET_F_MRG_RXBUF)) == 0) { args->rv = VNET_API_ERROR_UNSUPPORTED; args->error = clib_error_return (0, "vhost-net backend doesn't support " @@ -138,7 +138,8 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args) goto error; } - if ((vif->remote_features & (1ULL << VIRTIO_RING_F_INDIRECT_DESC)) == 0) + if ((vif->remote_features & VIRTIO_FEATURE (VIRTIO_RING_F_INDIRECT_DESC)) == + 0) { args->rv = VNET_API_ERROR_UNSUPPORTED; args->error = clib_error_return (0, "vhost-net backend doesn't support " @@ -146,7 +147,7 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args) goto error; } - if ((vif->remote_features & (1ULL << VIRTIO_F_VERSION_1)) == 0) + if ((vif->remote_features & VIRTIO_FEATURE (VIRTIO_F_VERSION_1)) == 0) { args->rv = VNET_API_ERROR_UNSUPPORTED; args->error = clib_error_return (0, "vhost-net backend doesn't support " @@ -154,9 +155,11 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args) goto error; } - vif->features |= 1ULL << VIRTIO_NET_F_MRG_RXBUF; - vif->features |= 1ULL << VIRTIO_F_VERSION_1; - vif->features |= 1ULL << VIRTIO_RING_F_INDIRECT_DESC; + vif->features |= VIRTIO_FEATURE (VIRTIO_NET_F_MRG_RXBUF); + vif->features |= VIRTIO_FEATURE (VIRTIO_F_VERSION_1); + vif->features |= VIRTIO_FEATURE (VIRTIO_RING_F_INDIRECT_DESC); + + virtio_set_net_hdr_size (vif); _IOCTL (vif->fd, VHOST_SET_FEATURES, &vif->features); @@ -349,6 +352,8 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args) } vif->rx_ring_sz = args->rx_ring_sz != 0 ? args->rx_ring_sz : 256; vif->tx_ring_sz = args->tx_ring_sz != 0 ? args->tx_ring_sz : 256; + clib_memcpy (vif->mac_addr, args->mac_addr, 6); + vif->host_if_name = args->host_if_name; args->host_if_name = 0; vif->net_ns = args->host_namespace; @@ -363,9 +368,10 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args) if (args->host_ip6_prefix_len) clib_memcpy (&vif->host_ip6_addr, &args->host_ip6_addr, 16); + vif->type = VIRTIO_IF_TYPE_TAP; args->error = ethernet_register_interface (vnm, virtio_device_class.index, vif->dev_instance, - args->mac_addr, + vif->mac_addr, &vif->hw_if_index, virtio_eth_flag_change); if (args->error) @@ -386,7 +392,6 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args) vnet_hw_interface_set_rx_mode (vnm, vif->hw_if_index, 0, VNET_HW_INTERFACE_RX_MODE_DEFAULT); vif->per_interface_next_index = ~0; - vif->type = VIRTIO_IF_TYPE_TAP; vif->flags |= VIRTIO_IF_FLAG_ADMIN_UP; vnet_hw_interface_set_flags (vnm, vif->hw_if_index, VNET_HW_INTERFACE_FLAG_LINK_UP); @@ -433,6 +438,9 @@ tap_delete_if (vlib_main_t * vm, u32 sw_if_index) vif = pool_elt_at_index (mm->interfaces, hw->dev_instance); + if (vif->type != VIRTIO_IF_TYPE_TAP) + return VNET_API_ERROR_INVALID_INTERFACE; + /* bring down the interface */ vnet_hw_interface_set_flags (vnm, vif->hw_if_index, 0); vnet_sw_interface_set_flags (vnm, vif->sw_if_index, 0); @@ -469,6 +477,8 @@ tap_dump_ifs (tap_interface_details_t ** out_tapids) /* *INDENT-OFF* */ pool_foreach (vif, mm->interfaces, + if (vif->type != VIRTIO_IF_TYPE_TAP) + continue; vec_add2(r_tapids, tapid, 1); clib_memset (tapid, 0, sizeof (*tapid)); tapid->id = vif->id; diff --git a/src/vnet/devices/virtio/cli.c b/src/vnet/devices/virtio/cli.c new file mode 100644 index 00000000000..82dc5a117b8 --- /dev/null +++ b/src/vnet/devices/virtio/cli.c @@ -0,0 +1,208 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2018 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ +#include <stdint.h> +#include <net/if.h> +#include <sys/ioctl.h> +#include <inttypes.h> + +#include <vlib/vlib.h> +#include <vlib/unix/unix.h> +#include <vlib/pci/pci.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ip/ip4_packet.h> +#include <vnet/ip/ip6_packet.h> +#include <vnet/devices/virtio/virtio.h> +#include <vnet/devices/virtio/pci.h> + +static clib_error_t * +virtio_pci_create_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + virtio_pci_create_if_args_t args; + u32 tmp; + u64 feature_mask = (u64) ~ (0ULL); + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + memset (&args, 0, sizeof (args)); + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "%U", unformat_vlib_pci_addr, &args.addr)) + ; + else if (unformat (line_input, "feature-mask 0x%llx", &feature_mask)) + args.features = feature_mask; + else if (unformat (line_input, "rx-queue-size %u", &tmp)) + args.rxq_size = tmp; + else if (unformat (line_input, "tx-queue-size %u", &tmp)) + args.txq_size = tmp; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + unformat_free (line_input); + + virtio_pci_create_if (vm, &args); + + return args.error; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (virtio_pci_create_command, static) = { + .path = "create interface virtio", + .short_help = "create interface virtio <pci-address>" + "[feature-mask <hex-mask>] [rx-queue-size <size>] [tx-queue-size <size>]", + .function = virtio_pci_create_command_fn, +}; +/* *INDENT-ON* */ + +static clib_error_t * +virtio_pci_delete_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + u32 sw_if_index = ~0; + vnet_hw_interface_t *hw; + virtio_main_t *vmxm = &virtio_main; + virtio_if_t *vif; + vnet_main_t *vnm = vnet_get_main (); + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "sw_if_index %d", &sw_if_index)) + ; + else if (unformat (line_input, "%U", unformat_vnet_sw_interface, + vnm, &sw_if_index)) + ; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + unformat_free (line_input); + + if (sw_if_index == ~0) + return clib_error_return (0, + "please specify interface name or sw_if_index"); + + hw = vnet_get_sup_hw_interface (vnm, sw_if_index); + if (hw == NULL || virtio_device_class.index != hw->dev_class_index) + return clib_error_return (0, "not a virtio interface"); + + vif = pool_elt_at_index (vmxm->interfaces, hw->dev_instance); + + if (virtio_pci_delete_if (vm, vif) < 0) + return clib_error_return (0, "not a virtio pci interface"); + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (virtio_pci_delete_command, static) = { + .path = "delete interface virtio", + .short_help = "delete interface virtio" + "{<interface> | sw_if_index <sw_idx>}", + .function = virtio_pci_delete_command_fn, +}; +/* *INDENT-ON* */ + +static clib_error_t * +show_virtio_pci_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + virtio_main_t *vmxm = &virtio_main; + vnet_main_t *vnm = &vnet_main; + virtio_if_t *vif; + clib_error_t *error = 0; + u32 hw_if_index, *hw_if_indices = 0; + vnet_hw_interface_t *hi; + u8 show_descr = 0, show_device_config = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (input, "%U", unformat_vnet_hw_interface, vnm, &hw_if_index)) + { + hi = vnet_get_hw_interface (vnm, hw_if_index); + if (virtio_device_class.index != hi->dev_class_index) + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + goto done; + } + vec_add1 (hw_if_indices, hw_if_index); + } + else if (unformat (input, "descriptors") || unformat (input, "desc")) + show_descr = 1; + else if (unformat (input, "debug-device")) + show_device_config = 1; + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + goto done; + } + } + + if (vec_len (hw_if_indices) == 0) + { + pool_foreach (vif, vmxm->interfaces, + vec_add1 (hw_if_indices, vif->hw_if_index); + ); + } + else if (show_device_config) + { + vif = pool_elt_at_index (vmxm->interfaces, hi->dev_instance); + if (vif->type == VIRTIO_IF_TYPE_PCI) + debug_device_config_space (vm, vif); + } + + virtio_show (vm, hw_if_indices, show_descr, VIRTIO_IF_TYPE_PCI); + +done: + vec_free (hw_if_indices); + return error; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_virtio_pci_command, static) = { + .path = "show virtio pci", + .short_help = "show virtio pci [<interface>] [descriptors | desc] [debug-device]", + .function = show_virtio_pci_fn, +}; +/* *INDENT-ON* */ + +clib_error_t * +virtio_pci_cli_init (vlib_main_t * vm) +{ + return 0; +} + +VLIB_INIT_FUNCTION (virtio_pci_cli_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/virtio/device.c b/src/vnet/devices/virtio/device.c index 7c66a60cf99..4eeb2caf8a1 100644 --- a/src/vnet/devices/virtio/device.c +++ b/src/vnet/devices/virtio/device.c @@ -36,10 +36,10 @@ _(NO_TX_QUEUES, "no tx queues") typedef enum { -#define _(f,s) TAP_TX_ERROR_##f, +#define _(f,s) VIRTIO_TX_ERROR_##f, foreach_virtio_tx_func_error #undef _ - TAP_TX_N_ERROR, + VIRTIO_TX_N_ERROR, } virtio_tx_func_error_t; static char *virtio_tx_func_error_strings[] = { @@ -56,11 +56,13 @@ format_virtio_device_name (u8 * s, va_list * args) virtio_if_t *vif = pool_elt_at_index (mm->interfaces, dev_instance); if (vif->type == VIRTIO_IF_TYPE_TAP) - { - s = format (s, "tap%u", vif->id); - } + s = format (s, "tap%u", vif->id); + else if (vif->type == VIRTIO_IF_TYPE_PCI) + s = format (s, "virtio-%x/%x/%x/%x", vif->pci_addr.domain, + vif->pci_addr.bus, vif->pci_addr.slot, + vif->pci_addr.function); else - s = format (s, "virtio%lu", vif->dev_instance); + s = format (s, "virtio-%lu", vif->dev_instance); return s; } @@ -104,13 +106,6 @@ virtio_free_used_desc (vlib_main_t * vm, virtio_vring_t * vring) { struct vring_used_elem *e = &vring->used->ring[last & mask]; u16 slot = e->id; - struct vring_desc *d = &vring->desc[slot]; - - if (PREDICT_FALSE (d->flags & VRING_DESC_F_INDIRECT)) - { - d = uword_to_pointer (d->addr, struct vring_desc *); - vec_free (d); - } vlib_buffer_free (vm, &vring->buffers[slot], 1); used--; @@ -122,11 +117,12 @@ virtio_free_used_desc (vlib_main_t * vm, virtio_vring_t * vring) } static_always_inline u16 -add_buffer_to_slot (vlib_main_t * vm, virtio_vring_t * vring, u32 bi, - u16 avail, u16 next, u16 mask) +add_buffer_to_slot (vlib_main_t * vm, virtio_if_t * vif, + virtio_vring_t * vring, u32 bi, u16 avail, u16 next, + u16 mask) { u16 n_added = 0; - const int hdr_sz = sizeof (struct virtio_net_hdr_v1); + int hdr_sz = vif->virtio_net_hdr_sz; struct vring_desc *d; d = &vring->desc[next]; vlib_buffer_t *b = vlib_get_buffer (vm, bi); @@ -136,31 +132,85 @@ add_buffer_to_slot (vlib_main_t * vm, virtio_vring_t * vring, u32 bi, if (PREDICT_TRUE ((b->flags & VLIB_BUFFER_NEXT_PRESENT) == 0)) { - d->addr = pointer_to_uword (vlib_buffer_get_current (b)) - hdr_sz; + d->addr = + ((vif->type == VIRTIO_IF_TYPE_PCI) ? vlib_buffer_get_current_pa (vm, + b) : + pointer_to_uword (vlib_buffer_get_current (b))) - hdr_sz; d->len = b->current_length + hdr_sz; d->flags = 0; } else { - struct vring_desc *id, *descs = 0; - - /* first buffer in chain */ - vec_add2_aligned (descs, id, 1, CLIB_CACHE_LINE_BYTES); - id->addr = pointer_to_uword (vlib_buffer_get_current (b)) - hdr_sz; - id->len = b->current_length + hdr_sz; - - while (b->flags & VLIB_BUFFER_NEXT_PRESENT) + /* + * We are using single vlib_buffer_t for indirect descriptor(s) + * chain. Single descriptor is 16 bytes and vlib_buffer_t + * has 2048 bytes space. So maximum long chain can have 128 + * (=2048/16) indirect descriptors. + * It can easily support 65535 bytes of Jumbo frames with + * each data buffer size of 512 bytes minimum. + */ + vlib_buffer_t *indirect_desc = + vlib_get_buffer (vm, vring->indirect_buffers[next]); + indirect_desc->current_data = 0; + + struct vring_desc *id = + (struct vring_desc *) vlib_buffer_get_current (indirect_desc); + u32 count = 1; + if (vif->type == VIRTIO_IF_TYPE_PCI) { - id->flags = VRING_DESC_F_NEXT; - id->next = vec_len (descs); - vec_add2_aligned (descs, id, 1, CLIB_CACHE_LINE_BYTES); - b = vlib_get_buffer (vm, b->next_buffer); - id->addr = pointer_to_uword (vlib_buffer_get_current (b)); - id->len = b->current_length; + d->addr = vlib_physmem_get_pa (vm, id); + id->addr = vlib_buffer_get_current_pa (vm, b) - hdr_sz; + + /* + * If VIRTIO_F_ANY_LAYOUT is not negotiated, then virtio_net_hdr + * should be presented in separate descriptor and data will start + * from next descriptor. + */ + if (PREDICT_TRUE + (vif->features & VIRTIO_FEATURE (VIRTIO_F_ANY_LAYOUT))) + id->len = b->current_length + hdr_sz; + else + { + id->len = hdr_sz; + id->flags = VRING_DESC_F_NEXT; + id->next = count; + count++; + id++; + id->addr = vlib_buffer_get_current_pa (vm, b); + id->len = b->current_length; + } + while (b->flags & VLIB_BUFFER_NEXT_PRESENT) + { + id->flags = VRING_DESC_F_NEXT; + id->next = count; + count++; + id++; + b = vlib_get_buffer (vm, b->next_buffer); + id->addr = vlib_buffer_get_current_pa (vm, b); + id->len = b->current_length; + } } - - d->addr = pointer_to_uword (descs); - d->len = vec_len (descs) * sizeof (struct vring_desc); + else /* VIRTIO_IF_TYPE_TAP */ + { + d->addr = pointer_to_uword (id); + /* first buffer in chain */ + id->addr = pointer_to_uword (vlib_buffer_get_current (b)) - hdr_sz; + id->len = b->current_length + hdr_sz; + + while (b->flags & VLIB_BUFFER_NEXT_PRESENT) + { + id->flags = VRING_DESC_F_NEXT; + id->next = count; + count++; + id++; + b = vlib_get_buffer (vm, b->next_buffer); + id->addr = pointer_to_uword (vlib_buffer_get_current (b)); + id->len = b->current_length; + } + } + id->flags = 0; + id->next = 0; + d->len = count * sizeof (struct vring_desc); d->flags = VRING_DESC_F_INDIRECT; } vring->buffers[next] = bi; @@ -184,8 +234,8 @@ virtio_interface_tx_inline (vlib_main_t * vm, vlib_node_runtime_t * node, clib_spinlock_lock_if_init (&vif->lockp); if ((vring->used->flags & VIRTIO_RING_FLAG_MASK_INT) == 0 && - vring->last_kick_avail_idx != vring->avail->idx) - virtio_kick (vring); + (vring->last_kick_avail_idx != vring->avail->idx)) + virtio_kick (vm, vring, vif); /* free consumed buffers */ virtio_free_used_desc (vm, vring); @@ -196,8 +246,11 @@ virtio_interface_tx_inline (vlib_main_t * vm, vlib_node_runtime_t * node, while (n_left && used < sz) { - u16 n_added; - n_added = add_buffer_to_slot (vm, vring, buffers[0], avail, next, mask); + u16 n_added = 0; + n_added = + add_buffer_to_slot (vm, vif, vring, buffers[0], avail, next, mask); + if (!n_added) + break; avail += n_added; next = (next + n_added) & mask; used += n_added; @@ -212,13 +265,12 @@ virtio_interface_tx_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vring->desc_next = next; vring->desc_in_use = used; if ((vring->used->flags & VIRTIO_RING_FLAG_MASK_INT) == 0) - virtio_kick (vring); + virtio_kick (vm, vring, vif); } - if (n_left) { - vlib_error_count (vm, node->node_index, TAP_TX_ERROR_NO_FREE_SLOTS, + vlib_error_count (vm, node->node_index, VIRTIO_TX_ERROR_NO_FREE_SLOTS, n_left); vlib_buffer_free (vm, buffers, n_left); } @@ -313,7 +365,7 @@ VNET_DEVICE_CLASS (virtio_device_class) = { .format_device_name = format_virtio_device_name, .format_device = format_virtio_device, .format_tx_trace = format_virtio_tx_trace, - .tx_function_n_errors = TAP_TX_N_ERROR, + .tx_function_n_errors = VIRTIO_TX_N_ERROR, .tx_function_error_strings = virtio_tx_func_error_strings, .rx_redirect_to_node = virtio_set_interface_next_node, .clear_counters = virtio_clear_hw_interface_counters, diff --git a/src/vnet/devices/virtio/node.c b/src/vnet/devices/virtio/node.c index d7a0b3964b4..c02b607d5fe 100644 --- a/src/vnet/devices/virtio/node.c +++ b/src/vnet/devices/virtio/node.c @@ -80,9 +80,9 @@ format_virtio_input_trace (u8 * s, va_list * args) } static_always_inline void -virtio_refill_vring (vlib_main_t * vm, virtio_vring_t * vring) +virtio_refill_vring (vlib_main_t * vm, virtio_if_t * vif, + virtio_vring_t * vring, const int hdr_sz) { - const int hdr_sz = sizeof (struct virtio_net_hdr_v1); u16 used, next, avail, n_slots; u16 sz = vring->size; u16 mask = sz - 1; @@ -108,7 +108,18 @@ more: { struct vring_desc *d = &vring->desc[next];; vlib_buffer_t *b = vlib_get_buffer (vm, vring->buffers[next]); - d->addr = pointer_to_uword (vlib_buffer_get_current (b)) - hdr_sz; + /* + * current_data may not be initialized with 0 and may contain + * previous offset. Here we want to make sure, it should be 0 + * initialized. + */ + b->current_data = 0; + b->current_data -= hdr_sz; + memset (vlib_buffer_get_current (b), 0, hdr_sz); + d->addr = + ((vif->type == VIRTIO_IF_TYPE_PCI) ? vlib_buffer_get_current_pa (vm, + b) : + pointer_to_uword (vlib_buffer_get_current (b))); d->len = VLIB_BUFFER_DATA_SIZE + hdr_sz; d->flags = VRING_DESC_F_WRITE; vring->avail->ring[avail & mask] = next; @@ -123,7 +134,9 @@ more: vring->desc_in_use = used; if ((vring->used->flags & VIRTIO_RING_FLAG_MASK_INT) == 0) - virtio_kick (vring); + { + virtio_kick (vm, vring, vif); + } goto more; } @@ -136,7 +149,7 @@ virtio_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, uword n_trace = vlib_get_trace_count (vm, node); virtio_vring_t *vring = vec_elt_at_index (vif->vrings, 0); u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; - const int hdr_sz = sizeof (struct virtio_net_hdr_v1); + const int hdr_sz = vif->virtio_net_hdr_sz; u32 *to_next = 0; u32 n_rx_packets = 0; u32 n_rx_bytes = 0; @@ -146,7 +159,7 @@ virtio_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, if ((vring->used->flags & VIRTIO_RING_FLAG_MASK_INT) == 0 && vring->last_kick_avail_idx != vring->avail->idx) - virtio_kick (vring); + virtio_kick (vm, vring, vif); if (n_left == 0) goto refill; @@ -159,17 +172,18 @@ virtio_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, while (n_left && n_left_to_next) { - u16 num_buffers; + u16 num_buffers = 1; struct vring_used_elem *e = &vring->used->ring[last & mask]; struct virtio_net_hdr_v1 *hdr; u16 slot = e->id; u16 len = e->len - hdr_sz; u32 bi0 = vring->buffers[slot]; vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); - hdr = vlib_buffer_get_current (b0) - hdr_sz; - num_buffers = hdr->num_buffers; + hdr = vlib_buffer_get_current (b0); + if (hdr_sz == sizeof (struct virtio_net_hdr_v1)) + num_buffers = hdr->num_buffers; - b0->current_data = 0; + b0->current_data += hdr_sz; b0->current_length = len; b0->total_length_not_including_first_buffer = 0; b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID; @@ -189,7 +203,6 @@ virtio_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, cb = vlib_get_buffer (vm, cbi); /* current buffer */ - cb->current_data = -hdr_sz; cb->current_length = e->len; /* previous buffer */ @@ -253,7 +266,7 @@ virtio_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, n_rx_bytes); refill: - virtio_refill_vring (vm, vring); + virtio_refill_vring (vm, vif, vring, hdr_sz); return n_rx_packets; } diff --git a/src/vnet/devices/virtio/pci.c b/src/vnet/devices/virtio/pci.c new file mode 100644 index 00000000000..63c8c54fdad --- /dev/null +++ b/src/vnet/devices/virtio/pci.c @@ -0,0 +1,919 @@ +/* + * Copyright (c) 2018 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <fcntl.h> +#include <sys/ioctl.h> +#include <linux/virtio_net.h> +#include <linux/virtio_ring.h> +#include <linux/vhost.h> +#include <sys/eventfd.h> +#if defined(__x86_64__) +#include <sys/io.h> +#endif + +#include <vppinfra/types.h> +#include <vlib/vlib.h> +#include <vlib/pci/pci.h> +#include <vnet/ethernet/ethernet.h> +#include <vpp/app/version.h> +#include <vnet/ip/ip4_packet.h> +#include <vnet/ip/ip6_packet.h> +#include <vnet/devices/virtio/virtio.h> +#include <vnet/devices/virtio/pci.h> + +#define PCI_VENDOR_ID_VIRTIO 0x1af4 +#define PCI_DEVICE_ID_VIRTIO_NIC 0x1000 +/* Doesn't support modern device */ +#define PCI_DEVICE_ID_VIRTIO_NIC_MODERN 0x1041 + +#define PCI_CAPABILITY_LIST 0x34 +#define PCI_CAP_ID_VNDR 0x09 +#define PCI_CAP_ID_MSIX 0x11 + +#define PCI_MSIX_ENABLE 0x8000 + +static u32 msix_enabled = 0; + +#define PCI_CONFIG_SIZE ((msix_enabled == VIRTIO_MSIX_ENABLED) ? \ + 24 : 20) + +static pci_device_id_t virtio_pci_device_ids[] = { + { + .vendor_id = PCI_VENDOR_ID_VIRTIO, + .device_id = PCI_DEVICE_ID_VIRTIO_NIC}, + { + .vendor_id = PCI_VENDOR_ID_VIRTIO, + .device_id = PCI_DEVICE_ID_VIRTIO_NIC_MODERN}, + {0}, +}; + +static void +virtio_pci_legacy_read_config (vlib_main_t * vm, virtio_if_t * vif, void *dst, + int len, u32 addr) +{ + u32 size = 0; + vlib_pci_dev_handle_t h = vif->pci_dev_handle; + + while (len > 0) + { + if (len >= 4) + { + size = 4; + vlib_pci_read_io_u32 (vm, h, PCI_CONFIG_SIZE + addr, dst); + } + else if (len >= 2) + { + size = 2; + vlib_pci_read_io_u16 (vm, h, PCI_CONFIG_SIZE + addr, dst); + } + else + { + size = 1; + vlib_pci_read_io_u8 (vm, h, PCI_CONFIG_SIZE + addr, dst); + } + dst = (u8 *) dst + size; + addr += size; + len -= size; + } +} + +static void +virtio_pci_legacy_write_config (vlib_main_t * vm, virtio_if_t * vif, + void *src, int len, u32 addr) +{ + u32 size = 0; + vlib_pci_dev_handle_t h = vif->pci_dev_handle; + + while (len > 0) + { + if (len >= 4) + { + size = 4; + vlib_pci_write_io_u32 (vm, h, PCI_CONFIG_SIZE + addr, src); + } + else if (len >= 2) + { + size = 2; + vlib_pci_write_io_u16 (vm, h, PCI_CONFIG_SIZE + addr, src); + } + else + { + size = 1; + vlib_pci_write_io_u8 (vm, h, PCI_CONFIG_SIZE + addr, src); + } + src = (u8 *) src + size; + addr += size; + len -= size; + } +} + +static u64 +virtio_pci_legacy_get_features (vlib_main_t * vm, virtio_if_t * vif) +{ + u32 features; + vlib_pci_read_io_u32 (vm, vif->pci_dev_handle, VIRTIO_PCI_HOST_FEATURES, + &features); + return features; +} + +static u32 +virtio_pci_legacy_set_features (vlib_main_t * vm, virtio_if_t * vif, + u64 features) +{ + if ((features >> 32) != 0) + { + clib_warning ("only 32 bit features are allowed for legacy virtio!"); + } + u32 feature = 0, guest_features = (u32) features; + vlib_pci_write_io_u32 (vm, vif->pci_dev_handle, VIRTIO_PCI_GUEST_FEATURES, + &guest_features); + vlib_pci_read_io_u32 (vm, vif->pci_dev_handle, VIRTIO_PCI_GUEST_FEATURES, + &feature); + return feature; +} + +static u8 +virtio_pci_legacy_get_status (vlib_main_t * vm, virtio_if_t * vif) +{ + u8 status = 0; + vlib_pci_read_io_u8 (vm, vif->pci_dev_handle, VIRTIO_PCI_STATUS, &status); + return status; +} + +static void +virtio_pci_legacy_set_status (vlib_main_t * vm, virtio_if_t * vif, u8 status) +{ + if (status != VIRTIO_CONFIG_STATUS_RESET) + status |= virtio_pci_legacy_get_status (vm, vif); + vlib_pci_write_io_u8 (vm, vif->pci_dev_handle, VIRTIO_PCI_STATUS, &status); +} + +static u8 +virtio_pci_legacy_reset (vlib_main_t * vm, virtio_if_t * vif) +{ + virtio_pci_legacy_set_status (vm, vif, VIRTIO_CONFIG_STATUS_RESET); + return virtio_pci_legacy_get_status (vm, vif); +} + +static u8 +virtio_pci_legacy_get_isr (vlib_main_t * vm, virtio_if_t * vif) +{ + u8 isr = 0; + vlib_pci_read_io_u8 (vm, vif->pci_dev_handle, VIRTIO_PCI_ISR, &isr); + return isr; +} + +static u16 +virtio_pci_legacy_get_queue_num (vlib_main_t * vm, virtio_if_t * vif, + u16 queue_id) +{ + u16 queue_num = 0; + vlib_pci_write_io_u16 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_SEL, + &queue_id); + vlib_pci_read_io_u16 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_NUM, + &queue_num); + return queue_num; +} + + +static void +virtio_pci_legacy_setup_queue (vlib_main_t * vm, virtio_if_t * vif, + u16 queue_id, void *p) +{ + u64 addr = vlib_physmem_get_pa (vm, p) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT; + vlib_pci_write_io_u16 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_SEL, + &queue_id); + vlib_pci_write_io_u32 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_PFN, + (u32 *) & addr); +} + +static void +virtio_pci_legacy_del_queue (vlib_main_t * vm, virtio_if_t * vif, + u16 queue_id) +{ + u32 src = 0; + vlib_pci_write_io_u16 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_SEL, + &queue_id); + vlib_pci_write_io_u32 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_PFN, &src); +} + +inline void +virtio_pci_legacy_notify_queue (vlib_main_t * vm, virtio_if_t * vif, + u16 queue_id) +{ + vlib_pci_write_io_u16 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_NOTIFY, + &queue_id); +} + +/* Enable one vector (0) for Link State Intrerrupt */ +static u16 +virtio_pci_legacy_set_config_irq (vlib_main_t * vm, virtio_if_t * vif, + u16 vec) +{ + vlib_pci_write_io_u16 (vm, vif->pci_dev_handle, VIRTIO_MSI_CONFIG_VECTOR, + &vec); + vlib_pci_read_io_u16 (vm, vif->pci_dev_handle, VIRTIO_MSI_CONFIG_VECTOR, + &vec); + return vec; +} + +static u16 +virtio_pci_legacy_set_queue_irq (vlib_main_t * vm, virtio_if_t * vif, u16 vec, + u16 queue_id) +{ + vlib_pci_write_io_u16 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_SEL, + &queue_id); + vlib_pci_write_io_u16 (vm, vif->pci_dev_handle, VIRTIO_MSI_QUEUE_VECTOR, + &vec); + vlib_pci_read_io_u16 (vm, vif->pci_dev_handle, VIRTIO_MSI_QUEUE_VECTOR, + &vec); + return vec; +} + +static u32 +virtio_pci_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hw, + u32 flags) +{ + return 0; +} + +static clib_error_t * +virtio_pci_get_max_virtqueue_pairs (vlib_main_t * vm, virtio_if_t * vif) +{ + virtio_net_config_t config; + clib_error_t *error = 0; + u16 max_queue_pairs = 1; + + if (vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_MQ)) + { + virtio_pci_legacy_read_config (vm, vif, &config.max_virtqueue_pairs, + sizeof (config.max_virtqueue_pairs), 8); + max_queue_pairs = config.max_virtqueue_pairs; + } + + if (max_queue_pairs < 1 || max_queue_pairs > 0x8000) + clib_error_return (error, "max queue pair is %x", max_queue_pairs); + + vif->max_queue_pairs = max_queue_pairs; + return error; +} + +static void +virtio_pci_set_mac (vlib_main_t * vm, virtio_if_t * vif) +{ + virtio_pci_legacy_write_config (vm, vif, vif->mac_addr, + sizeof (vif->mac_addr), 0); +} + +static u32 +virtio_pci_get_mac (vlib_main_t * vm, virtio_if_t * vif) +{ + if (vif->remote_features & VIRTIO_FEATURE (VIRTIO_NET_F_MAC)) + { + virtio_pci_legacy_read_config (vm, vif, vif->mac_addr, + sizeof (vif->mac_addr), 0); + return 0; + } + return 1; +} + +static u16 +virtio_pci_is_link_up (vlib_main_t * vm, virtio_if_t * vif) +{ + /* + * Minimal driver: assumes link is up + */ + u16 status = 1; + if (vif->remote_features & VIRTIO_FEATURE (VIRTIO_NET_F_STATUS)) + virtio_pci_legacy_read_config (vm, vif, &status, sizeof (status), /* mac */ + 6); + return status; +} + +static void +virtio_pci_irq_0_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h, u16 line) +{ + vnet_main_t *vnm = vnet_get_main (); + virtio_main_t *vmxm = &virtio_main; + uword pd = vlib_pci_get_private_data (vm, h); + virtio_if_t *vif = pool_elt_at_index (vmxm->interfaces, pd); + u16 qid = line; + + vnet_device_input_set_interrupt_pending (vnm, vif->hw_if_index, qid); +} + +static void +virtio_pci_irq_1_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h, u16 line) +{ + vnet_main_t *vnm = vnet_get_main (); + virtio_main_t *vmxm = &virtio_main; + uword pd = vlib_pci_get_private_data (vm, h); + virtio_if_t *vif = pool_elt_at_index (vmxm->interfaces, pd); + + if (virtio_pci_is_link_up (vm, vif) & VIRTIO_NET_S_LINK_UP) + { + vif->flags |= VIRTIO_IF_FLAG_ADMIN_UP; + vnet_hw_interface_set_flags (vnm, vif->hw_if_index, + VNET_HW_INTERFACE_FLAG_LINK_UP); + } + else + { + vif->flags &= ~VIRTIO_IF_FLAG_ADMIN_UP; + vnet_hw_interface_set_flags (vnm, vif->hw_if_index, 0); + } +} + +static void +virtio_pci_irq_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h) +{ + virtio_main_t *vmxm = &virtio_main; + uword pd = vlib_pci_get_private_data (vm, h); + virtio_if_t *vif = pool_elt_at_index (vmxm->interfaces, pd); + u8 isr = 0; + u16 line = 0; + + isr = virtio_pci_legacy_get_isr (vm, vif); + + /* + * If the lower bit is set: look through the used rings of + * all virtqueues for the device, to see if any progress has + * been made by the device which requires servicing. + */ + if (isr & VIRTIO_PCI_ISR_INTR) + virtio_pci_irq_0_handler (vm, h, line); + + if (isr & VIRTIO_PCI_ISR_CONFIG) + virtio_pci_irq_1_handler (vm, h, line); +} + +inline void +device_status (vlib_main_t * vm, virtio_if_t * vif) +{ + struct status_struct + { + u8 bit; + char *str; + }; + struct status_struct *status_entry; + static struct status_struct status_array[] = { +#define _(s,b) { .str = #s, .bit = b, }, + foreach_virtio_config_status_flags +#undef _ + {.str = NULL} + }; + + vlib_cli_output (vm, " status 0x%x", vif->status); + + status_entry = (struct status_struct *) &status_array; + while (status_entry->str) + { + if (vif->status & status_entry->bit) + vlib_cli_output (vm, " %s (%x)", status_entry->str, + status_entry->bit); + status_entry++; + } +} + +inline void +debug_device_config_space (vlib_main_t * vm, virtio_if_t * vif) +{ + u32 data_u32; + u16 data_u16; + u8 data_u8; + vlib_pci_read_io_u32 (vm, vif->pci_dev_handle, VIRTIO_PCI_HOST_FEATURES, + &data_u32); + vlib_cli_output (vm, "remote features 0x%lx", data_u32); + vlib_pci_read_io_u32 (vm, vif->pci_dev_handle, VIRTIO_PCI_GUEST_FEATURES, + &data_u32); + vlib_cli_output (vm, "guest features 0x%lx", data_u32); + vlib_pci_read_io_u32 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_PFN, + &data_u32); + vlib_cli_output (vm, "queue address 0x%lx", data_u32); + vlib_pci_read_io_u16 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_NUM, + &data_u16); + vlib_cli_output (vm, "queue size 0x%x", data_u16); + vlib_pci_read_io_u16 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_SEL, + &data_u16); + vlib_cli_output (vm, "queue select 0x%x", data_u16); + vlib_pci_read_io_u16 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_NOTIFY, + &data_u16); + vlib_cli_output (vm, "queue notify 0x%x", data_u16); + vlib_pci_read_io_u8 (vm, vif->pci_dev_handle, VIRTIO_PCI_STATUS, &data_u8); + vlib_cli_output (vm, "status 0x%x", data_u8); + vlib_pci_read_io_u8 (vm, vif->pci_dev_handle, VIRTIO_PCI_ISR, &data_u8); + vlib_cli_output (vm, "isr 0x%x", data_u8); + + u8 mac[6]; + virtio_pci_legacy_read_config (vm, vif, mac, sizeof (mac), 0); + vlib_cli_output (vm, "mac %U", format_ethernet_address, mac); + virtio_pci_legacy_read_config (vm, vif, &data_u16, sizeof (u16), /* offset to status */ + 6); + vlib_cli_output (vm, "link up/down status 0x%x", data_u16); + virtio_pci_legacy_read_config (vm, vif, &data_u16, sizeof (u16), + /* offset to max_virtqueue */ 8); + vlib_cli_output (vm, "num of virtqueue 0x%x", data_u16); + virtio_pci_legacy_read_config (vm, vif, &data_u16, sizeof (u16), /* offset to mtu */ + 10); + vlib_cli_output (vm, "mtu 0x%x", data_u16); + + u32 i = PCI_CONFIG_SIZE + 12, a = 4; + i += a; + i &= ~a; + for (; i < 64; i += 4) + { + u32 data = 0; + vlib_pci_read_io_u32 (vm, vif->pci_dev_handle, i, &data); + vlib_cli_output (vm, "0x%lx", data); + } +} + +static u8 +virtio_pci_queue_size_valid (u16 qsz) +{ + if (qsz < 64 || qsz > 4096) + return 0; + if ((qsz % 64) != 0) + return 0; + return 1; +} + +clib_error_t * +virtio_pci_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 idx) +{ + clib_error_t *error = 0; + u16 queue_size = 0; + virtio_vring_t *vring; + struct vring vr; + u32 i = 0; + void *ptr; + + queue_size = virtio_pci_legacy_get_queue_num (vm, vif, idx); + if (!virtio_pci_queue_size_valid (queue_size)) + clib_warning ("queue size is not valid"); + + if (!is_pow2 (queue_size)) + return clib_error_return (0, "ring size must be power of 2"); + + if (queue_size > 32768) + return clib_error_return (0, "ring size must be 32768 or lower"); + + if (queue_size == 0) + queue_size = 256; + + vec_validate_aligned (vif->vrings, idx, CLIB_CACHE_LINE_BYTES); + vring = vec_elt_at_index (vif->vrings, idx); + + i = vring_size (queue_size, VIRTIO_PCI_VRING_ALIGN); + i = round_pow2 (i, VIRTIO_PCI_VRING_ALIGN); + ptr = vlib_physmem_alloc_aligned (vm, i, VIRTIO_PCI_VRING_ALIGN); + memset (ptr, 0, i); + vring_init (&vr, queue_size, ptr, VIRTIO_PCI_VRING_ALIGN); + vring->desc = vr.desc; + vring->avail = vr.avail; + vring->used = vr.used; + vring->queue_id = idx; + vring->avail->flags = VIRTIO_RING_FLAG_MASK_INT; + + ASSERT (vring->buffers == 0); + vec_validate_aligned (vring->buffers, queue_size, CLIB_CACHE_LINE_BYTES); + ASSERT (vring->indirect_buffers == 0); + vec_validate_aligned (vring->indirect_buffers, queue_size, + CLIB_CACHE_LINE_BYTES); + if (idx % 2) + { + u32 n_alloc = 0; + do + { + if (n_alloc < queue_size) + n_alloc = + vlib_buffer_alloc (vm, vring->indirect_buffers + n_alloc, + queue_size - n_alloc); + } + while (n_alloc != queue_size); + vif->tx_ring_sz = queue_size; + } + else + vif->rx_ring_sz = queue_size; + vring->size = queue_size; + + virtio_pci_legacy_setup_queue (vm, vif, idx, ptr); + vring->kick_fd = -1; + + return error; +} + +static void +virtio_negotiate_features (vlib_main_t * vm, virtio_if_t * vif, + u64 req_features) +{ + /* + * if features are not requested + * default: all supported features + */ + u64 supported_features = VIRTIO_FEATURE (VIRTIO_NET_F_MTU) + | VIRTIO_FEATURE (VIRTIO_NET_F_MAC) + | VIRTIO_FEATURE (VIRTIO_NET_F_MRG_RXBUF) + | VIRTIO_FEATURE (VIRTIO_NET_F_STATUS) + | VIRTIO_FEATURE (VIRTIO_F_ANY_LAYOUT) + | VIRTIO_FEATURE (VIRTIO_RING_F_INDIRECT_DESC); + + if (req_features == 0) + { + req_features = supported_features; + } + + vif->features = req_features & vif->remote_features & supported_features; + + if (vif-> + remote_features & vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_MTU)) + { + virtio_net_config_t config; + virtio_pci_legacy_read_config (vm, vif, &config.mtu, + sizeof (config.mtu), 10); + if (config.mtu < 64) + vif->features &= ~VIRTIO_FEATURE (VIRTIO_NET_F_MTU); + } + + vif->features = virtio_pci_legacy_set_features (vm, vif, vif->features); +} + +void +virtio_pci_read_device_feature (vlib_main_t * vm, virtio_if_t * vif) +{ + vif->remote_features = virtio_pci_legacy_get_features (vm, vif); +} + +int +virtio_pci_reset_device (vlib_main_t * vm, virtio_if_t * vif) +{ + u8 status = 0; + + /* + * Reset the device + */ + status = virtio_pci_legacy_reset (vm, vif); + + /* + * Set the Acknowledge status bit + */ + virtio_pci_legacy_set_status (vm, vif, VIRTIO_CONFIG_STATUS_ACK); + + /* + * Set the Driver status bit + */ + virtio_pci_legacy_set_status (vm, vif, VIRTIO_CONFIG_STATUS_DRIVER); + + /* + * Read the status and verify it + */ + status = virtio_pci_legacy_get_status (vm, vif); + if (! + ((status & VIRTIO_CONFIG_STATUS_ACK) + && (status & VIRTIO_CONFIG_STATUS_DRIVER))) + return -1; + vif->status = status; + + return 0; +} + +clib_error_t * +virtio_pci_read_caps (vlib_main_t * vm, virtio_if_t * vif) +{ + clib_error_t *error = 0; + struct virtio_pci_cap cap; + u8 pos, common_cfg = 0, notify_base = 0, dev_cfg = 0, isr = 0; + vlib_pci_dev_handle_t h = vif->pci_dev_handle; + + if ((error = vlib_pci_read_config_u8 (vm, h, PCI_CAPABILITY_LIST, &pos))) + clib_error_return (error, "error in reading capabilty list position"); + + while (pos) + { + if ((error = + vlib_pci_read_write_config (vm, h, VLIB_READ, pos, &cap, + sizeof (cap)))) + clib_error_return (error, "error in reading the capability at [%2x]", + pos); + + if (cap.cap_vndr == PCI_CAP_ID_MSIX) + { + u16 flags; + if ((error = + vlib_pci_read_write_config (vm, h, VLIB_READ, pos + 2, &flags, + sizeof (flags)))) + clib_error_return (error, + "error in reading the capability at [%2x]", + pos + 2); + + if (flags & PCI_MSIX_ENABLE) + msix_enabled = VIRTIO_MSIX_ENABLED; + else + msix_enabled = VIRTIO_MSIX_DISABLED; + } + + if (cap.cap_vndr != PCI_CAP_ID_VNDR) + { + clib_warning ("[%2x] skipping non VNDR cap id: %2x", pos, + cap.cap_vndr); + goto next; + } + + switch (cap.cfg_type) + { + case VIRTIO_PCI_CAP_COMMON_CFG: + common_cfg = 1; + break; + case VIRTIO_PCI_CAP_NOTIFY_CFG: + notify_base = 1; + break; + case VIRTIO_PCI_CAP_DEVICE_CFG: + dev_cfg = 1; + break; + case VIRTIO_PCI_CAP_ISR_CFG: + isr = 1; + break; + } + next: + pos = cap.cap_next; + } + + if (common_cfg == 0 || notify_base == 0 || dev_cfg == 0 || isr == 0) + { + clib_warning ("no modern virtio pci device found"); + return error; + } + + return clib_error_return (error, "modern virtio pci device found"); +} + +static clib_error_t * +virtio_pci_device_init (vlib_main_t * vm, virtio_if_t * vif, + virtio_pci_create_if_args_t * args) +{ + clib_error_t *error = 0; + u8 status = 0; + + virtio_pci_read_caps (vm, vif); + + if (virtio_pci_reset_device (vm, vif) < 0) + clib_error_return (error, "Failed to reset the device"); + + /* + * read device features and negotiate (user) requested features + */ + virtio_pci_read_device_feature (vm, vif); + virtio_negotiate_features (vm, vif, args->features); + + /* + * After FEATURE_OK, driver should not accept new feature bits + */ + virtio_pci_legacy_set_status (vm, vif, VIRTIO_CONFIG_STATUS_FEATURES_OK); + status = virtio_pci_legacy_get_status (vm, vif); + if (!(status & VIRTIO_CONFIG_STATUS_FEATURES_OK)) + clib_error_return (error, "Device doesn't support requested features"); + + vif->status = status; + + if (virtio_pci_get_mac (vm, vif)) + { + f64 now = vlib_time_now (vm); + u32 rnd; + rnd = (u32) (now * 1e6); + rnd = random_u32 (&rnd); + + memcpy (vif->mac_addr + 2, &rnd, sizeof (rnd)); + vif->mac_addr[0] = 2; + vif->mac_addr[1] = 0xfe; + virtio_pci_set_mac (vm, vif); + } + + virtio_set_net_hdr_size (vif); + + if ((error = virtio_pci_get_max_virtqueue_pairs (vm, vif))) + goto error; + + if ((error = virtio_pci_vring_init (vm, vif, 0))) + goto error; + + if ((error = virtio_pci_vring_init (vm, vif, 1))) + goto error; + + if (msix_enabled == VIRTIO_MSIX_ENABLED) + { + virtio_pci_legacy_set_config_irq (vm, vif, VIRTIO_MSI_NO_VECTOR); + virtio_pci_legacy_set_queue_irq (vm, vif, VIRTIO_MSI_NO_VECTOR, 0); + } + virtio_pci_legacy_set_status (vm, vif, VIRTIO_CONFIG_STATUS_DRIVER_OK); + vif->status = virtio_pci_legacy_get_status (vm, vif); +error: + return error; +} + +void +virtio_pci_create_if (vlib_main_t * vm, virtio_pci_create_if_args_t * args) +{ + vnet_main_t *vnm = vnet_get_main (); + virtio_main_t *vmxm = &virtio_main; + virtio_if_t *vif; + vlib_pci_dev_handle_t h; + clib_error_t *error = 0; + + if (args->rxq_size == 0) + args->rxq_size = VIRTIO_NUM_RX_DESC; + if (args->txq_size == 0) + args->txq_size = VIRTIO_NUM_TX_DESC; + + if (!virtio_pci_queue_size_valid (args->rxq_size) || + !virtio_pci_queue_size_valid (args->txq_size)) + { + args->rv = VNET_API_ERROR_INVALID_VALUE; + args->error = + clib_error_return (error, + "queue size must be <= 4096, >= 64, " + "and multiples of 64"); + return; + } + + /* *INDENT-OFF* */ + pool_foreach (vif, vmxm->interfaces, ({ + if (vif->pci_addr.as_u32 == args->addr) + { + args->rv = VNET_API_ERROR_INVALID_VALUE; + args->error = + clib_error_return (error, "PCI address in use"); + return; + } + })); + /* *INDENT-ON* */ + + pool_get (vmxm->interfaces, vif); + vif->dev_instance = vif - vmxm->interfaces; + vif->per_interface_next_index = ~0; + vif->pci_addr.as_u32 = args->addr; + + if ((vif->fd = open ("/dev/vhost-net", O_RDWR | O_NONBLOCK)) < 0) + { + args->rv = VNET_API_ERROR_SYSCALL_ERROR_1; + args->error = clib_error_return_unix (0, "open '/dev/vhost-net'"); + goto error; + } + + if ((error = + vlib_pci_device_open (vm, (vlib_pci_addr_t *) & vif->pci_addr, + virtio_pci_device_ids, &h))) + { + pool_put (vmxm->interfaces, vif); + args->rv = VNET_API_ERROR_INVALID_INTERFACE; + args->error = + clib_error_return (error, "pci-addr %U", format_vlib_pci_addr, + &vif->pci_addr); + return; + } + vif->pci_dev_handle = h; + vlib_pci_set_private_data (vm, h, vif->dev_instance); + + if ((error = vlib_pci_bus_master_enable (vm, h))) + goto error; + + if ((error = vlib_pci_io_region (vm, h, 0))) + goto error; + + if ((error = virtio_pci_device_init (vm, vif, args))) + goto error; + + if (msix_enabled == VIRTIO_MSIX_ENABLED) + { + if ((error = vlib_pci_register_msix_handler (vm, h, 0, 1, + &virtio_pci_irq_0_handler))) + goto error; + + if ((error = vlib_pci_register_msix_handler (vm, h, 1, 1, + &virtio_pci_irq_1_handler))) + goto error; + + if ((error = vlib_pci_enable_msix_irq (vm, h, 0, 2))) + goto error; + } + else + { + vlib_pci_register_intx_handler (vm, h, &virtio_pci_irq_handler); + } + + if ((error = vlib_pci_intr_enable (vm, h))) + goto error; + + vif->type = VIRTIO_IF_TYPE_PCI; + /* create interface */ + error = ethernet_register_interface (vnm, virtio_device_class.index, + vif->dev_instance, vif->mac_addr, + &vif->hw_if_index, + virtio_pci_flag_change); + + if (error) + goto error; + + vnet_sw_interface_t *sw = vnet_get_hw_sw_interface (vnm, vif->hw_if_index); + vif->sw_if_index = sw->sw_if_index; + args->sw_if_index = sw->sw_if_index; + + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, vif->hw_if_index); + hw->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_INT_MODE; + vnet_hw_interface_set_input_node (vnm, vif->hw_if_index, + virtio_input_node.index); + vnet_hw_interface_assign_rx_thread (vnm, vif->hw_if_index, 0, ~0); + + if (virtio_pci_is_link_up (vm, vif) & VIRTIO_NET_S_LINK_UP) + { + vif->flags |= VIRTIO_IF_FLAG_ADMIN_UP; + vnet_hw_interface_set_flags (vnm, vif->hw_if_index, + VNET_HW_INTERFACE_FLAG_LINK_UP); + } + else + vnet_hw_interface_set_flags (vnm, vif->hw_if_index, 0); + return; + +error: + virtio_pci_delete_if (vm, vif); + args->rv = VNET_API_ERROR_INVALID_INTERFACE; + args->error = error; +} + +int +virtio_pci_delete_if (vlib_main_t * vm, virtio_if_t * vif) +{ + vnet_main_t *vnm = vnet_get_main (); + virtio_main_t *vmxm = &virtio_main; + u32 i = 0; + + if (vif->type != VIRTIO_IF_TYPE_PCI) + return VNET_API_ERROR_INVALID_INTERFACE; + + vlib_pci_intr_disable (vm, vif->pci_dev_handle); + + virtio_pci_legacy_del_queue (vm, vif, 0); + virtio_pci_legacy_del_queue (vm, vif, 1); + + virtio_pci_legacy_reset (vm, vif); + + if (vif->hw_if_index) + { + vnet_hw_interface_set_flags (vnm, vif->hw_if_index, 0); + vnet_hw_interface_unassign_rx_thread (vnm, vif->hw_if_index, 0); + ethernet_delete_interface (vnm, vif->hw_if_index); + } + + vlib_pci_device_close (vm, vif->pci_dev_handle); + + vec_foreach_index (i, vif->vrings) + { + virtio_vring_t *vring = vec_elt_at_index (vif->vrings, i); + if (vring->kick_fd != -1) + close (vring->kick_fd); + if (vring->used) + { + if ((i & 1) == 1) + virtio_free_used_desc (vm, vring); + else + virtio_free_rx_buffers (vm, vring); + } + if (vring->queue_id % 2) + { + vlib_buffer_free_no_next (vm, vring->indirect_buffers, vring->size); + } + vec_free (vring->buffers); + vec_free (vring->indirect_buffers); + vlib_physmem_free (vm, vring->desc); + } + + vec_free (vif->vrings); + + if (vif->fd != -1) + close (vif->fd); + if (vif->tap_fd != -1) + vif->tap_fd = -1; + clib_error_free (vif->error); + memset (vif, 0, sizeof (*vif)); + pool_put (vmxm->interfaces, vif); + + return 0; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/virtio/pci.h b/src/vnet/devices/virtio/pci.h new file mode 100644 index 00000000000..7552dd89ac2 --- /dev/null +++ b/src/vnet/devices/virtio/pci.h @@ -0,0 +1,233 @@ +/* + * Copyright (c) 2018 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __included_virtio_pci_h__ +#define __included_virtio_pci_h__ + +/* VirtIO ABI version, this must match exactly. */ +#define VIRTIO_PCI_ABI_VERSION 0 + +/* + * VirtIO Header, located in BAR 0. + */ +#define VIRTIO_PCI_HOST_FEATURES 0 /* host's supported features (32bit, RO) */ +#define VIRTIO_PCI_GUEST_FEATURES 4 /* guest's supported features (32, RW) */ +#define VIRTIO_PCI_QUEUE_PFN 8 /* physical address of VQ (32, RW) */ +#define VIRTIO_PCI_QUEUE_NUM 12 /* number of ring entries (16, RO) */ +#define VIRTIO_PCI_QUEUE_SEL 14 /* current VQ selection (16, RW) */ +#define VIRTIO_PCI_QUEUE_NOTIFY 16 /* notify host regarding VQ (16, RW) */ +#define VIRTIO_PCI_STATUS 18 /* device status register (8, RW) */ +#define VIRTIO_PCI_ISR 19 /* interrupt status register, reading + * also clears the register (8, RO) */ +/* Only if MSIX is enabled: */ +#define VIRTIO_MSI_CONFIG_VECTOR 20 /* configuration change vector (16, RW) */ +#define VIRTIO_MSI_QUEUE_VECTOR 22 /* vector for selected VQ notifications + (16, RW) */ + +/* + * define in include/linux/virtio_pci.h + * #define VIRTIO_MSI_NO_VECTOR 0xFFFF + */ + +/* The bit of the ISR which indicates a device has an interrupt. */ +#define VIRTIO_PCI_ISR_INTR 0x1 +/* The bit of the ISR which indicates a device configuration change. */ +#define VIRTIO_PCI_ISR_CONFIG 0x2 +/* Vector value used to disable MSI for queue. */ + +/* VirtIO device IDs. */ +#define VIRTIO_ID_NETWORK 0x01 + +/* Status byte for guest to report progress. */ +#define foreach_virtio_config_status_flags \ + _ (VIRTIO_CONFIG_STATUS_RESET, 0x00) \ + _ (VIRTIO_CONFIG_STATUS_ACK, 0x01) \ + _ (VIRTIO_CONFIG_STATUS_DRIVER, 0x02) \ + _ (VIRTIO_CONFIG_STATUS_DRIVER_OK, 0x04) \ + _ (VIRTIO_CONFIG_STATUS_FEATURES_OK, 0x08) \ + _ (VIRTIO_CONFIG_STATUS_DEVICE_NEEDS_RESET, 0x40) \ + _ (VIRTIO_CONFIG_STATUS_FAILED, 0x80) + +typedef enum +{ +#define _(a, b) a = b, + foreach_virtio_config_status_flags +#undef _ +} virtio_config_status_flags_t; + +#define foreach_virtio_net_feature_flags \ + _ (VIRTIO_NET_F_CSUM, 0) /* Host handles pkts w/ partial csum */ \ + _ (VIRTIO_NET_F_GUEST_CSUM, 1) /* Guest handles pkts w/ partial csum */ \ + _ (VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, 2) /* Dynamic offload configuration. */ \ + _ (VIRTIO_NET_F_MTU, 3) /* Initial MTU advice. */ \ + _ (VIRTIO_NET_F_MAC, 5) /* Host has given MAC address. */ \ + _ (VIRTIO_NET_F_GSO, 6) /* Host handles pkts w/ any GSO. */ \ + _ (VIRTIO_NET_F_GUEST_TSO4, 7) /* Guest can handle TSOv4 in. */ \ + _ (VIRTIO_NET_F_GUEST_TSO6, 8) /* Guest can handle TSOv6 in. */ \ + _ (VIRTIO_NET_F_GUEST_ECN, 9) /* Guest can handle TSO[6] w/ ECN in. */ \ + _ (VIRTIO_NET_F_GUEST_UFO, 10) /* Guest can handle UFO in. */ \ + _ (VIRTIO_NET_F_HOST_TSO4, 11) /* Host can handle TSOv4 in. */ \ + _ (VIRTIO_NET_F_HOST_TSO6, 12) /* Host can handle TSOv6 in. */ \ + _ (VIRTIO_NET_F_HOST_ECN, 13) /* Host can handle TSO[6] w/ ECN in. */ \ + _ (VIRTIO_NET_F_HOST_UFO, 14) /* Host can handle UFO in. */ \ + _ (VIRTIO_NET_F_MRG_RXBUF, 15) /* Host can merge receive buffers. */ \ + _ (VIRTIO_NET_F_STATUS, 16) /* virtio_net_config.status available */ \ + _ (VIRTIO_NET_F_CTRL_VQ, 17) /* Control channel available */ \ + _ (VIRTIO_NET_F_CTRL_RX, 18) /* Control channel RX mode support */ \ + _ (VIRTIO_NET_F_CTRL_VLAN, 19) /* Control channel VLAN filtering */ \ + _ (VIRTIO_NET_F_CTRL_RX_EXTRA, 20) /* Extra RX mode control support */ \ + _ (VIRTIO_NET_F_GUEST_ANNOUNCE, 21) /* Guest can announce device on the network */ \ + _ (VIRTIO_NET_F_MQ, 22) /* Device supports Receive Flow Steering */ \ + _ (VIRTIO_NET_F_CTRL_MAC_ADDR, 23) /* Set MAC address */ \ + _ (VIRTIO_F_NOTIFY_ON_EMPTY, 24) \ + _ (VHOST_F_LOG_ALL, 26) /* Log all write descriptors */ \ + _ (VIRTIO_F_ANY_LAYOUT, 27) /* Can the device handle any descripor layout */ \ + _ (VIRTIO_RING_F_INDIRECT_DESC, 28) /* Support indirect buffer descriptors */ \ + _ (VIRTIO_RING_F_EVENT_IDX, 29) /* The Guest publishes the used index for which it expects an interrupt \ + * at the end of the avail ring. Host should ignore the avail->flags field. */ \ +/* The Host publishes the avail index for which it expects a kick \ + * at the end of the used ring. Guest should ignore the used->flags field. */ \ + _ (VHOST_USER_F_PROTOCOL_FEATURES, 30) + +#define VIRTIO_NET_F_MTU 3 +#define VIRTIO_NET_S_LINK_UP 1 /* Link is up */ +#define VIRTIO_NET_S_ANNOUNCE 2 /* Announcement is needed */ + +/* Common configuration */ +#define VIRTIO_PCI_CAP_COMMON_CFG 1 +/* Notifications */ +#define VIRTIO_PCI_CAP_NOTIFY_CFG 2 +/* ISR Status */ +#define VIRTIO_PCI_CAP_ISR_CFG 3 +/* Device specific configuration */ +#define VIRTIO_PCI_CAP_DEVICE_CFG 4 +/* PCI configuration access */ +#define VIRTIO_PCI_CAP_PCI_CFG 5 + +#define VIRTIO_PCI_QUEUE_ADDR_SHIFT 12 + +#define VIRTIO_PCI_VRING_ALIGN 4096 + +typedef enum +{ + VIRTIO_MSIX_NONE = 0, + VIRTIO_MSIX_DISABLED = 1, + VIRTIO_MSIX_ENABLED = 2 +} virtio_msix_status_t; + +/* This is the PCI capability header: */ +typedef struct +{ + u8 cap_vndr; /* Generic PCI field: PCI_CAP_ID_VNDR */ + u8 cap_next; /* Generic PCI field: next ptr. */ + u8 cap_len; /* Generic PCI field: capability length */ + u8 cfg_type; /* Identifies the structure. */ + u8 bar; /* Where to find it. */ + u8 padding[3]; /* Pad to full dword. */ + u32 offset; /* Offset within bar. */ + u32 length; /* Length of the structure, in bytes. */ +} virtio_pci_cap_t; + +typedef struct +{ + struct virtio_pci_cap cap; + u32 notify_off_multiplier; /* Multiplier for queue_notify_off. */ +} virtio_pci_notify_cap_t; + +/* Fields in VIRTIO_PCI_CAP_COMMON_CFG: */ +typedef struct +{ + /* About the whole device. */ + u32 device_feature_select; /* read-write */ + u32 device_feature; /* read-only */ + u32 guest_feature_select; /* read-write */ + u32 guest_feature; /* read-write */ + u16 msix_config; /* read-write */ + u16 num_queues; /* read-only */ + u8 device_status; /* read-write */ + u8 config_generation; /* read-only */ + + /* About a specific virtqueue. */ + u16 queue_select; /* read-write */ + u16 queue_size; /* read-write, power of 2. */ + u16 queue_msix_vector; /* read-write */ + u16 queue_enable; /* read-write */ + u16 queue_notify_off; /* read-only */ + u32 queue_desc_lo; /* read-write */ + u32 queue_desc_hi; /* read-write */ + u32 queue_avail_lo; /* read-write */ + u32 queue_avail_hi; /* read-write */ + u32 queue_used_lo; /* read-write */ + u32 queue_used_hi; /* read-write */ +} virtio_pci_common_cfg_t; + +typedef struct +{ + u64 addr; + u32 len; + u16 flags; + u16 next; +} vring_desc_t; + +typedef struct +{ + u16 flags; + u16 idx; + u16 ring[0]; + /* u16 used_event; */ +} vring_avail_t; + +typedef struct +{ + u32 id; + u32 len; +} vring_used_elem_t; + +typedef struct +{ + u16 flags; + u16 idx; + vring_used_elem_t ring[0]; + /* u16 avail_event; */ +} vring_used_t; + +typedef struct +{ + u32 addr; + u16 rxq_size; + u16 txq_size; + /* return */ + i32 rv; + u32 sw_if_index; + u8 mac_addr_set; + u8 mac_addr[6]; + u64 features; + clib_error_t *error; +} virtio_pci_create_if_args_t; + +extern void debug_device_config_space (vlib_main_t * vm, virtio_if_t * vif); +extern void device_status (vlib_main_t * vm, virtio_if_t * vif); +void virtio_pci_create_if (vlib_main_t * vm, + virtio_pci_create_if_args_t * args); +int virtio_pci_delete_if (vlib_main_t * vm, virtio_if_t * ad); + +#endif /* __included_virtio_pci_h__ */ +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/virtio/virtio.api b/src/vnet/devices/virtio/virtio.api new file mode 100644 index 00000000000..cb672960afd --- /dev/null +++ b/src/vnet/devices/virtio/virtio.api @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2018 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +option version = "1.0.0"; + +/** \brief Initialize a new virtio pci interface with the given paramters + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param pci_addr - pci address as unsigned 32bit integer: + 0-15 domain, 16-23 bus, 24-28 slot, 29-31 function + @param use_random_mac - let the system generate a unique mac address + @param mac_address - mac addr to assign to the interface if use_radom not set + @param tx_ring_sz - the number of entries of TX ring + @param rx_ring_sz - the number of entries of RX ring + @param features - the virtio features which driver should negotiate with device +*/ +define virtio_pci_create +{ + u32 client_index; + u32 context; + u32 pci_addr; + u8 use_random_mac; + u8 mac_address[6]; + u16 tx_ring_sz; /* optional, default is 256 entries, must be power of 2 */ + u16 rx_ring_sz; /* optional, default is 256 entries, must be power of 2 */ + u64 features; +}; + +/** \brief Reply for virtio pci create reply + @param context - returned sender context, to match reply w/ request + @param retval - return code + @param sw_if_index - software index allocated for the new virtio pci interface +*/ +define virtio_pci_create_reply +{ + u32 context; + i32 retval; + u32 sw_if_index; +}; + +/** \brief Delete virtio pci interface + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - interface index of existing virtio pci interface +*/ +autoreply define virtio_pci_delete +{ + u32 client_index; + u32 context; + u32 sw_if_index; +}; + +/** \brief Dump virtio pci interfaces request */ +define sw_interface_virtio_pci_dump +{ + u32 client_index; + u32 context; +}; + +/** \brief Reply for virtio pci interface dump request + @param sw_if_index - software index of virtio pci interface + @param pci_addr - pci address as unsigned 32bit integer: + 0-15 domain, 16-23 bus, 24-28 slot, 29-31 function + @param mac_addr - native virtio device mac address + @param tx_ring_sz - the number of entries of TX ring + @param rx_ring_sz - the number of entries of RX ring + @param features - the virtio features which driver have negotiated with device +*/ +define sw_interface_virtio_pci_details +{ + u32 context; + u32 sw_if_index; + u32 pci_addr; + u8 mac_addr[6]; + u16 tx_ring_sz; + u16 rx_ring_sz; + u64 features; +}; + +/* + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/virtio/virtio.c b/src/vnet/devices/virtio/virtio.c index 17de781921d..94f140dac97 100644 --- a/src/vnet/devices/virtio/virtio.c +++ b/src/vnet/devices/virtio/virtio.c @@ -26,11 +26,13 @@ #include <sys/eventfd.h> #include <vlib/vlib.h> +#include <vlib/pci/pci.h> #include <vlib/unix/unix.h> #include <vnet/ethernet/ethernet.h> #include <vnet/ip/ip4_packet.h> #include <vnet/ip/ip6_packet.h> #include <vnet/devices/virtio/virtio.h> +#include <vnet/devices/virtio/pci.h> virtio_main_t virtio_main; @@ -101,6 +103,20 @@ virtio_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 idx, u16 sz) ASSERT (vring->buffers == 0); vec_validate_aligned (vring->buffers, sz, CLIB_CACHE_LINE_BYTES); + ASSERT (vring->indirect_buffers == 0); + vec_validate_aligned (vring->indirect_buffers, sz, CLIB_CACHE_LINE_BYTES); + if (idx % 2) + { + u32 n_alloc = 0; + do + { + if (n_alloc < sz) + n_alloc = + vlib_buffer_alloc (vm, vring->indirect_buffers + n_alloc, + sz - n_alloc); + } + while (n_alloc != sz); + } vring->size = sz; vring->call_fd = eventfd (0, EFD_NONBLOCK | EFD_CLOEXEC); @@ -136,7 +152,7 @@ error: return err; } -static_always_inline void +inline void virtio_free_rx_buffers (vlib_main_t * vm, virtio_vring_t * vring) { u16 used = vring->desc_in_use; @@ -171,10 +187,157 @@ virtio_vring_free (vlib_main_t * vm, virtio_if_t * vif, u32 idx) clib_mem_free (vring->desc); if (vring->avail) clib_mem_free (vring->avail); + if (vring->queue_id % 2) + { + vlib_buffer_free_no_next (vm, vring->indirect_buffers, vring->size); + } vec_free (vring->buffers); + vec_free (vring->indirect_buffers); return 0; } +inline void +virtio_set_net_hdr_size (virtio_if_t * vif) +{ + if (vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_MRG_RXBUF) || + vif->features & VIRTIO_FEATURE (VIRTIO_F_VERSION_1)) + vif->virtio_net_hdr_sz = sizeof (struct virtio_net_hdr_v1); + else + vif->virtio_net_hdr_sz = sizeof (struct virtio_net_hdr); +} + +inline void +virtio_show (vlib_main_t * vm, u32 * hw_if_indices, u8 show_descr, u32 type) +{ + u32 i, j, hw_if_index; + virtio_if_t *vif; + vnet_main_t *vnm = &vnet_main; + virtio_main_t *mm = &virtio_main; + virtio_vring_t *vring; + struct feat_struct + { + u8 bit; + char *str; + }; + struct feat_struct *feat_entry; + + static struct feat_struct feat_array[] = { +#define _(s,b) { .str = #s, .bit = b, }, + foreach_virtio_net_features +#undef _ + {.str = NULL} + }; + + struct feat_struct *flag_entry; + static struct feat_struct flags_array[] = { +#define _(b,e,s) { .bit = b, .str = s, }, + foreach_virtio_if_flag +#undef _ + {.str = NULL} + }; + + if (!hw_if_indices) + return; + + for (hw_if_index = 0; hw_if_index < vec_len (hw_if_indices); hw_if_index++) + { + vnet_hw_interface_t *hi = + vnet_get_hw_interface (vnm, hw_if_indices[hw_if_index]); + vif = pool_elt_at_index (mm->interfaces, hi->dev_instance); + if (vif->type != type) + continue; + vlib_cli_output (vm, "Interface: %U (ifindex %d)", + format_vnet_hw_if_index_name, vnm, + hw_if_indices[hw_if_index], vif->hw_if_index); + if (type == VIRTIO_IF_TYPE_PCI) + { + vlib_cli_output (vm, " PCI Address: %U", format_vlib_pci_addr, + &vif->pci_addr); + } + if (type == VIRTIO_IF_TYPE_TAP) + { + if (vif->host_if_name) + vlib_cli_output (vm, " name \"%s\"", vif->host_if_name); + if (vif->net_ns) + vlib_cli_output (vm, " host-ns \"%s\"", vif->net_ns); + vlib_cli_output (vm, " fd %d", vif->fd); + vlib_cli_output (vm, " tap-fd %d", vif->tap_fd); + } + vlib_cli_output (vm, " Mac Address: %U", format_ethernet_address, + vif->mac_addr); + vlib_cli_output (vm, " Device instance: %u", vif->dev_instance); + vlib_cli_output (vm, " flags 0x%x", vif->flags); + flag_entry = (struct feat_struct *) &flags_array; + while (flag_entry->str) + { + if (vif->flags & (1ULL << flag_entry->bit)) + vlib_cli_output (vm, " %s (%d)", flag_entry->str, + flag_entry->bit); + flag_entry++; + } + if (type == VIRTIO_IF_TYPE_PCI) + { + device_status (vm, vif); + } + vlib_cli_output (vm, " features 0x%lx", vif->features); + feat_entry = (struct feat_struct *) &feat_array; + while (feat_entry->str) + { + if (vif->features & (1ULL << feat_entry->bit)) + vlib_cli_output (vm, " %s (%d)", feat_entry->str, + feat_entry->bit); + feat_entry++; + } + vlib_cli_output (vm, " remote-features 0x%lx", vif->remote_features); + feat_entry = (struct feat_struct *) &feat_array; + while (feat_entry->str) + { + if (vif->remote_features & (1ULL << feat_entry->bit)) + vlib_cli_output (vm, " %s (%d)", feat_entry->str, + feat_entry->bit); + feat_entry++; + } + vec_foreach_index (i, vif->vrings) + { + // RX = 0, TX = 1 + vring = vec_elt_at_index (vif->vrings, i); + vlib_cli_output (vm, " Virtqueue (%s)", (i & 1) ? "TX" : "RX"); + vlib_cli_output (vm, + " qsz %d, last_used_idx %d, desc_next %d, desc_in_use %d", + vring->size, vring->last_used_idx, vring->desc_next, + vring->desc_in_use); + vlib_cli_output (vm, + " avail.flags 0x%x avail.idx %d used.flags 0x%x used.idx %d", + vring->avail->flags, vring->avail->idx, + vring->used->flags, vring->used->idx); + if (type == VIRTIO_IF_TYPE_TAP) + { + vlib_cli_output (vm, " kickfd %d, callfd %d", vring->kick_fd, + vring->call_fd); + } + if (show_descr) + { + vlib_cli_output (vm, "\n descriptor table:\n"); + vlib_cli_output (vm, + " id addr len flags next user_addr\n"); + vlib_cli_output (vm, + " ===== ================== ===== ====== ===== ==================\n"); + vring = vif->vrings; + for (j = 0; j < vring->size; j++) + { + struct vring_desc *desc = &vring->desc[j]; + vlib_cli_output (vm, + " %-5d 0x%016lx %-5d 0x%04x %-5d 0x%016lx\n", + j, desc->addr, + desc->len, + desc->flags, desc->next, desc->addr); + } + } + } + } + +} + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vnet/devices/virtio/virtio.h b/src/vnet/devices/virtio/virtio.h index 841441bd0c6..f4a8103a0ab 100644 --- a/src/vnet/devices/virtio/virtio.h +++ b/src/vnet/devices/virtio/virtio.h @@ -18,6 +18,11 @@ #ifndef _VNET_DEVICES_VIRTIO_VIRTIO_H_ #define _VNET_DEVICES_VIRTIO_VIRTIO_H_ +#include <linux/virtio_config.h> +#include <linux/virtio_net.h> +#include <linux/virtio_pci.h> +#include <linux/virtio_ring.h> + #define foreach_virtio_net_features \ _ (VIRTIO_NET_F_CSUM, 0) /* Host handles pkts w/ partial csum */ \ _ (VIRTIO_NET_F_GUEST_CSUM, 1) /* Guest handles pkts w/ partial csum */ \ @@ -53,6 +58,7 @@ _ (VHOST_USER_F_PROTOCOL_FEATURES, 30) \ _ (VIRTIO_F_VERSION_1, 32) + #define foreach_virtio_if_flag \ _(0, ADMIN_UP, "admin-up") \ _(1, DELETING, "deleting") @@ -64,15 +70,31 @@ typedef enum #undef _ } virtio_if_flag_t; +#define VIRTIO_NUM_RX_DESC 256 +#define VIRTIO_NUM_TX_DESC 256 + +#define VIRTIO_FEATURE(X) (1ULL << X) + typedef enum { VIRTIO_IF_TYPE_TAP, + VIRTIO_IF_TYPE_PCI, VIRTIO_IF_N_TYPES, } virtio_if_type_t; typedef struct { + u8 mac[6]; + u16 status; + u16 max_virtqueue_pairs; + u16 mtu; +} virtio_net_config_t; + +#define VIRTIO_RING_FLAG_MASK_INT 1 + +typedef struct +{ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); struct vring_desc *desc; struct vring_used *used; @@ -82,33 +104,62 @@ typedef struct int kick_fd; int call_fd; u16 size; -#define VIRTIO_RING_FLAG_MASK_INT 1 - u32 flags; + u16 queue_id; + u16 flags; u32 call_file_index; u32 *buffers; + u32 *indirect_buffers; u16 last_used_idx; u16 last_kick_avail_idx; } virtio_vring_t; +typedef union +{ + struct + { + u16 domain; + u8 bus; + u8 slot:5; + u8 function:3; + }; + u32 as_u32; +} pci_addr_t; + typedef struct { + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); u32 flags; clib_spinlock_t lockp; - u32 id; u32 dev_instance; u32 hw_if_index; u32 sw_if_index; + u16 virtio_net_hdr_sz; + virtio_if_type_t type; + union + { + u32 id; + pci_addr_t pci_addr; + }; u32 per_interface_next_index; int fd; - int tap_fd; + union + { + int tap_fd; + u32 pci_dev_handle; + }; virtio_vring_t *vrings; u64 features, remote_features; - virtio_if_type_t type; + /* error */ + clib_error_t *error; + u16 max_queue_pairs; u16 tx_ring_sz; u16 rx_ring_sz; + u8 status; + u8 mac_addr[6]; + u64 bar[2]; u8 *host_if_name; u8 *net_ns; u8 *host_bridge; @@ -135,17 +186,27 @@ clib_error_t *virtio_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 idx, clib_error_t *virtio_vring_free (vlib_main_t * vm, virtio_if_t * vif, u32 idx); extern void virtio_free_used_desc (vlib_main_t * vm, virtio_vring_t * vring); - +extern void virtio_free_rx_buffers (vlib_main_t * vm, virtio_vring_t * vring); +extern void virtio_set_net_hdr_size (virtio_if_t * vif); +extern void virtio_show (vlib_main_t * vm, u32 * hw_if_indices, u8 show_descr, + u32 type); +extern void virtio_pci_legacy_notify_queue (vlib_main_t * vm, + virtio_if_t * vif, u16 queue_id); format_function_t format_virtio_device_name; static_always_inline void -virtio_kick (virtio_vring_t * vring) +virtio_kick (vlib_main_t * vm, virtio_vring_t * vring, virtio_if_t * vif) { - u64 x = 1; - int __clib_unused r; - - r = write (vring->kick_fd, &x, sizeof (x)); - vring->last_kick_avail_idx = vring->avail->idx; + if (vif->type == VIRTIO_IF_TYPE_PCI) + virtio_pci_legacy_notify_queue (vm, vif, vring->queue_id); + else + { + u64 x = 1; + int __clib_unused r; + + r = write (vring->kick_fd, &x, sizeof (x)); + vring->last_kick_avail_idx = vring->avail->idx; + } } #endif /* _VNET_DEVICES_VIRTIO_VIRTIO_H_ */ diff --git a/src/vnet/devices/virtio/virtio_api.c b/src/vnet/devices/virtio/virtio_api.c new file mode 100644 index 00000000000..6f70b090034 --- /dev/null +++ b/src/vnet/devices/virtio/virtio_api.c @@ -0,0 +1,237 @@ +/* + *------------------------------------------------------------------ + * virtio_api.c - vnet virtio pci device driver API support + * + * Copyright (c) 2018 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <vnet/vnet.h> +#include <vlibmemory/api.h> + +#include <vnet/interface.h> +#include <vnet/api_errno.h> +#include <vnet/ip/ip.h> +#include <vnet/devices/virtio/virtio.h> +#include <vnet/devices/virtio/pci.h> + +#include <vnet/vnet_msg_enum.h> + +#define vl_typedefs /* define message structures */ +#include <vnet/vnet_all_api_h.h> +#undef vl_typedefs + +#define vl_endianfun /* define message structures */ +#include <vnet/vnet_all_api_h.h> +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) +#define vl_printfun +#include <vnet/vnet_all_api_h.h> +#undef vl_printfun + +#include <vlibapi/api_helper_macros.h> + +#define foreach_virtio_pci_api_msg \ +_(VIRTIO_PCI_CREATE, virtio_pci_create) \ +_(VIRTIO_PCI_DELETE, virtio_pci_delete) \ +_(SW_INTERFACE_VIRTIO_PCI_DUMP, sw_interface_virtio_pci_dump) + +static void +vl_api_virtio_pci_create_t_handler (vl_api_virtio_pci_create_t * mp) +{ + vlib_main_t *vm = vlib_get_main (); + vl_api_virtio_pci_create_reply_t *rmp; + vl_api_registration_t *reg; + virtio_pci_create_if_args_t _a, *ap = &_a; + + clib_memset (ap, 0, sizeof (*ap)); + + ap->addr = ntohl (mp->pci_addr); + if (!mp->use_random_mac) + { + clib_memcpy (ap->mac_addr, mp->mac_address, 6); + ap->mac_addr_set = 1; + } + ap->rxq_size = ntohs (mp->rx_ring_sz); + ap->txq_size = ntohs (mp->tx_ring_sz); + ap->sw_if_index = (u32) ~ 0; + ap->features = clib_net_to_host_u64 (mp->features); + + virtio_pci_create_if (vm, ap); + + reg = vl_api_client_index_to_registration (mp->client_index); + if (!reg) + return;; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + rmp->_vl_msg_id = htons (VL_API_VIRTIO_PCI_CREATE_REPLY); + rmp->context = mp->context; + rmp->retval = htonl (ap->rv); + rmp->sw_if_index = htonl (ap->sw_if_index); + + vl_api_send_msg (reg, (u8 *) rmp); +} + +static void +virtio_pci_send_sw_interface_event_deleted (vpe_api_main_t * am, + vl_api_registration_t * reg, + u32 sw_if_index) +{ + vl_api_sw_interface_event_t *mp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + clib_memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = htons (VL_API_SW_INTERFACE_EVENT); + mp->sw_if_index = htonl (sw_if_index); + + mp->admin_up_down = 0; + mp->link_up_down = 0; + mp->deleted = 1; + vl_api_send_msg (reg, (u8 *) mp); +} + +static void +vl_api_virtio_pci_delete_t_handler (vl_api_virtio_pci_delete_t * mp) +{ + vnet_main_t *vnm = vnet_get_main (); + vlib_main_t *vm = vlib_get_main (); + virtio_main_t *vmx = &virtio_main; + int rv = 0; + vnet_hw_interface_t *hw; + virtio_if_t *vif; + vpe_api_main_t *vam = &vpe_api_main; + vl_api_virtio_pci_delete_reply_t *rmp; + vl_api_registration_t *reg; + u32 sw_if_index = ntohl (mp->sw_if_index); + + hw = vnet_get_sup_hw_interface (vnm, htonl (mp->sw_if_index)); + if (hw == NULL || virtio_device_class.index != hw->dev_class_index) + { + rv = VNET_API_ERROR_INVALID_INTERFACE; + goto reply; + } + + vif = pool_elt_at_index (vmx->interfaces, hw->dev_instance); + + rv = virtio_pci_delete_if (vm, vif); + +reply: + reg = vl_api_client_index_to_registration (mp->client_index); + if (!reg) + return; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + rmp->_vl_msg_id = htons (VL_API_VIRTIO_PCI_DELETE_REPLY); + rmp->context = mp->context; + rmp->retval = htonl (rv); + + vl_api_send_msg (reg, (u8 *) rmp); + + if (!rv) + { + virtio_pci_send_sw_interface_event_deleted (vam, reg, sw_if_index); + } +} + +static void +virtio_pci_send_sw_interface_details (vpe_api_main_t * am, + vl_api_registration_t * reg, + virtio_if_t * vif, u32 context) +{ + vl_api_sw_interface_virtio_pci_details_t *mp; + mp = vl_msg_api_alloc (sizeof (*mp)); + + clib_memset (mp, 0, sizeof (*mp)); + + mp->_vl_msg_id = htons (VL_API_SW_INTERFACE_VIRTIO_PCI_DETAILS); + mp->pci_addr = htonl (vif->pci_addr.as_u32); + mp->sw_if_index = htonl (vif->sw_if_index); + mp->rx_ring_sz = htons (vif->rx_ring_sz); + mp->tx_ring_sz = htons (vif->tx_ring_sz); + clib_memcpy (mp->mac_addr, vif->mac_addr, 6); + mp->features = clib_host_to_net_u64 (vif->features); + + mp->context = context; + vl_api_send_msg (reg, (u8 *) mp); +} + +static void + vl_api_sw_interface_virtio_pci_dump_t_handler + (vl_api_sw_interface_virtio_pci_dump_t * mp) +{ + vpe_api_main_t *am = &vpe_api_main; + vl_api_registration_t *reg; + virtio_main_t *vmx = &virtio_main; + virtio_if_t *vif; + + reg = vl_api_client_index_to_registration (mp->client_index); + if (!reg) + return; + + pool_foreach (vif, vmx->interfaces, ( + { + if (vif->type == VIRTIO_IF_TYPE_PCI) + { + virtio_pci_send_sw_interface_details + (am, reg, vif, mp->context);} + } + )); +} + +#define vl_msg_name_crc_list +#include <vnet/vnet_all_api_h.h> +#undef vl_msg_name_crc_list + +static void +setup_message_id_table (api_main_t * am) +{ +#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id); + foreach_vl_msg_name_crc_virtio; +#undef _ +} + +static clib_error_t * +virtio_pci_api_hookup (vlib_main_t * vm) +{ + api_main_t *am = &api_main; + +#define _(N,n) \ + vl_msg_api_set_handlers(VL_API_##N, #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_virtio_pci_api_msg; +#undef _ + + /* + * Set up the (msg_name, crc, message-id) table + */ + setup_message_id_table (am); + + return 0; +} + +VLIB_API_INIT_FUNCTION (virtio_pci_api_hookup); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ |