aboutsummaryrefslogtreecommitdiffstats
path: root/src/vnet/devices/virtio
diff options
context:
space:
mode:
Diffstat (limited to 'src/vnet/devices/virtio')
-rw-r--r--src/vnet/devices/virtio/cli.c208
-rw-r--r--src/vnet/devices/virtio/device.c136
-rw-r--r--src/vnet/devices/virtio/node.c37
-rw-r--r--src/vnet/devices/virtio/pci.c919
-rw-r--r--src/vnet/devices/virtio/pci.h233
-rw-r--r--src/vnet/devices/virtio/virtio.api96
-rw-r--r--src/vnet/devices/virtio/virtio.c165
-rw-r--r--src/vnet/devices/virtio/virtio.h85
-rw-r--r--src/vnet/devices/virtio/virtio_api.c237
9 files changed, 2049 insertions, 67 deletions
diff --git a/src/vnet/devices/virtio/cli.c b/src/vnet/devices/virtio/cli.c
new file mode 100644
index 00000000000..82dc5a117b8
--- /dev/null
+++ b/src/vnet/devices/virtio/cli.c
@@ -0,0 +1,208 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2018 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+#include <stdint.h>
+#include <net/if.h>
+#include <sys/ioctl.h>
+#include <inttypes.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vlib/pci/pci.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/devices/virtio/virtio.h>
+#include <vnet/devices/virtio/pci.h>
+
+static clib_error_t *
+virtio_pci_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ virtio_pci_create_if_args_t args;
+ u32 tmp;
+ u64 feature_mask = (u64) ~ (0ULL);
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ memset (&args, 0, sizeof (args));
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%U", unformat_vlib_pci_addr, &args.addr))
+ ;
+ else if (unformat (line_input, "feature-mask 0x%llx", &feature_mask))
+ args.features = feature_mask;
+ else if (unformat (line_input, "rx-queue-size %u", &tmp))
+ args.rxq_size = tmp;
+ else if (unformat (line_input, "tx-queue-size %u", &tmp))
+ args.txq_size = tmp;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ unformat_free (line_input);
+
+ virtio_pci_create_if (vm, &args);
+
+ return args.error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (virtio_pci_create_command, static) = {
+ .path = "create interface virtio",
+ .short_help = "create interface virtio <pci-address>"
+ "[feature-mask <hex-mask>] [rx-queue-size <size>] [tx-queue-size <size>]",
+ .function = virtio_pci_create_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+virtio_pci_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u32 sw_if_index = ~0;
+ vnet_hw_interface_t *hw;
+ virtio_main_t *vmxm = &virtio_main;
+ virtio_if_t *vif;
+ vnet_main_t *vnm = vnet_get_main ();
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "sw_if_index %d", &sw_if_index))
+ ;
+ else if (unformat (line_input, "%U", unformat_vnet_sw_interface,
+ vnm, &sw_if_index))
+ ;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ unformat_free (line_input);
+
+ if (sw_if_index == ~0)
+ return clib_error_return (0,
+ "please specify interface name or sw_if_index");
+
+ hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
+ if (hw == NULL || virtio_device_class.index != hw->dev_class_index)
+ return clib_error_return (0, "not a virtio interface");
+
+ vif = pool_elt_at_index (vmxm->interfaces, hw->dev_instance);
+
+ if (virtio_pci_delete_if (vm, vif) < 0)
+ return clib_error_return (0, "not a virtio pci interface");
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (virtio_pci_delete_command, static) = {
+ .path = "delete interface virtio",
+ .short_help = "delete interface virtio"
+ "{<interface> | sw_if_index <sw_idx>}",
+ .function = virtio_pci_delete_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+show_virtio_pci_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ virtio_main_t *vmxm = &virtio_main;
+ vnet_main_t *vnm = &vnet_main;
+ virtio_if_t *vif;
+ clib_error_t *error = 0;
+ u32 hw_if_index, *hw_if_indices = 0;
+ vnet_hw_interface_t *hi;
+ u8 show_descr = 0, show_device_config = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat
+ (input, "%U", unformat_vnet_hw_interface, vnm, &hw_if_index))
+ {
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+ if (virtio_device_class.index != hi->dev_class_index)
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ vec_add1 (hw_if_indices, hw_if_index);
+ }
+ else if (unformat (input, "descriptors") || unformat (input, "desc"))
+ show_descr = 1;
+ else if (unformat (input, "debug-device"))
+ show_device_config = 1;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ }
+
+ if (vec_len (hw_if_indices) == 0)
+ {
+ pool_foreach (vif, vmxm->interfaces,
+ vec_add1 (hw_if_indices, vif->hw_if_index);
+ );
+ }
+ else if (show_device_config)
+ {
+ vif = pool_elt_at_index (vmxm->interfaces, hi->dev_instance);
+ if (vif->type == VIRTIO_IF_TYPE_PCI)
+ debug_device_config_space (vm, vif);
+ }
+
+ virtio_show (vm, hw_if_indices, show_descr, VIRTIO_IF_TYPE_PCI);
+
+done:
+ vec_free (hw_if_indices);
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_virtio_pci_command, static) = {
+ .path = "show virtio pci",
+ .short_help = "show virtio pci [<interface>] [descriptors | desc] [debug-device]",
+ .function = show_virtio_pci_fn,
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+virtio_pci_cli_init (vlib_main_t * vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (virtio_pci_cli_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/devices/virtio/device.c b/src/vnet/devices/virtio/device.c
index 7c66a60cf99..4eeb2caf8a1 100644
--- a/src/vnet/devices/virtio/device.c
+++ b/src/vnet/devices/virtio/device.c
@@ -36,10 +36,10 @@ _(NO_TX_QUEUES, "no tx queues")
typedef enum
{
-#define _(f,s) TAP_TX_ERROR_##f,
+#define _(f,s) VIRTIO_TX_ERROR_##f,
foreach_virtio_tx_func_error
#undef _
- TAP_TX_N_ERROR,
+ VIRTIO_TX_N_ERROR,
} virtio_tx_func_error_t;
static char *virtio_tx_func_error_strings[] = {
@@ -56,11 +56,13 @@ format_virtio_device_name (u8 * s, va_list * args)
virtio_if_t *vif = pool_elt_at_index (mm->interfaces, dev_instance);
if (vif->type == VIRTIO_IF_TYPE_TAP)
- {
- s = format (s, "tap%u", vif->id);
- }
+ s = format (s, "tap%u", vif->id);
+ else if (vif->type == VIRTIO_IF_TYPE_PCI)
+ s = format (s, "virtio-%x/%x/%x/%x", vif->pci_addr.domain,
+ vif->pci_addr.bus, vif->pci_addr.slot,
+ vif->pci_addr.function);
else
- s = format (s, "virtio%lu", vif->dev_instance);
+ s = format (s, "virtio-%lu", vif->dev_instance);
return s;
}
@@ -104,13 +106,6 @@ virtio_free_used_desc (vlib_main_t * vm, virtio_vring_t * vring)
{
struct vring_used_elem *e = &vring->used->ring[last & mask];
u16 slot = e->id;
- struct vring_desc *d = &vring->desc[slot];
-
- if (PREDICT_FALSE (d->flags & VRING_DESC_F_INDIRECT))
- {
- d = uword_to_pointer (d->addr, struct vring_desc *);
- vec_free (d);
- }
vlib_buffer_free (vm, &vring->buffers[slot], 1);
used--;
@@ -122,11 +117,12 @@ virtio_free_used_desc (vlib_main_t * vm, virtio_vring_t * vring)
}
static_always_inline u16
-add_buffer_to_slot (vlib_main_t * vm, virtio_vring_t * vring, u32 bi,
- u16 avail, u16 next, u16 mask)
+add_buffer_to_slot (vlib_main_t * vm, virtio_if_t * vif,
+ virtio_vring_t * vring, u32 bi, u16 avail, u16 next,
+ u16 mask)
{
u16 n_added = 0;
- const int hdr_sz = sizeof (struct virtio_net_hdr_v1);
+ int hdr_sz = vif->virtio_net_hdr_sz;
struct vring_desc *d;
d = &vring->desc[next];
vlib_buffer_t *b = vlib_get_buffer (vm, bi);
@@ -136,31 +132,85 @@ add_buffer_to_slot (vlib_main_t * vm, virtio_vring_t * vring, u32 bi,
if (PREDICT_TRUE ((b->flags & VLIB_BUFFER_NEXT_PRESENT) == 0))
{
- d->addr = pointer_to_uword (vlib_buffer_get_current (b)) - hdr_sz;
+ d->addr =
+ ((vif->type == VIRTIO_IF_TYPE_PCI) ? vlib_buffer_get_current_pa (vm,
+ b) :
+ pointer_to_uword (vlib_buffer_get_current (b))) - hdr_sz;
d->len = b->current_length + hdr_sz;
d->flags = 0;
}
else
{
- struct vring_desc *id, *descs = 0;
-
- /* first buffer in chain */
- vec_add2_aligned (descs, id, 1, CLIB_CACHE_LINE_BYTES);
- id->addr = pointer_to_uword (vlib_buffer_get_current (b)) - hdr_sz;
- id->len = b->current_length + hdr_sz;
-
- while (b->flags & VLIB_BUFFER_NEXT_PRESENT)
+ /*
+ * We are using single vlib_buffer_t for indirect descriptor(s)
+ * chain. Single descriptor is 16 bytes and vlib_buffer_t
+ * has 2048 bytes space. So maximum long chain can have 128
+ * (=2048/16) indirect descriptors.
+ * It can easily support 65535 bytes of Jumbo frames with
+ * each data buffer size of 512 bytes minimum.
+ */
+ vlib_buffer_t *indirect_desc =
+ vlib_get_buffer (vm, vring->indirect_buffers[next]);
+ indirect_desc->current_data = 0;
+
+ struct vring_desc *id =
+ (struct vring_desc *) vlib_buffer_get_current (indirect_desc);
+ u32 count = 1;
+ if (vif->type == VIRTIO_IF_TYPE_PCI)
{
- id->flags = VRING_DESC_F_NEXT;
- id->next = vec_len (descs);
- vec_add2_aligned (descs, id, 1, CLIB_CACHE_LINE_BYTES);
- b = vlib_get_buffer (vm, b->next_buffer);
- id->addr = pointer_to_uword (vlib_buffer_get_current (b));
- id->len = b->current_length;
+ d->addr = vlib_physmem_get_pa (vm, id);
+ id->addr = vlib_buffer_get_current_pa (vm, b) - hdr_sz;
+
+ /*
+ * If VIRTIO_F_ANY_LAYOUT is not negotiated, then virtio_net_hdr
+ * should be presented in separate descriptor and data will start
+ * from next descriptor.
+ */
+ if (PREDICT_TRUE
+ (vif->features & VIRTIO_FEATURE (VIRTIO_F_ANY_LAYOUT)))
+ id->len = b->current_length + hdr_sz;
+ else
+ {
+ id->len = hdr_sz;
+ id->flags = VRING_DESC_F_NEXT;
+ id->next = count;
+ count++;
+ id++;
+ id->addr = vlib_buffer_get_current_pa (vm, b);
+ id->len = b->current_length;
+ }
+ while (b->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ id->flags = VRING_DESC_F_NEXT;
+ id->next = count;
+ count++;
+ id++;
+ b = vlib_get_buffer (vm, b->next_buffer);
+ id->addr = vlib_buffer_get_current_pa (vm, b);
+ id->len = b->current_length;
+ }
}
-
- d->addr = pointer_to_uword (descs);
- d->len = vec_len (descs) * sizeof (struct vring_desc);
+ else /* VIRTIO_IF_TYPE_TAP */
+ {
+ d->addr = pointer_to_uword (id);
+ /* first buffer in chain */
+ id->addr = pointer_to_uword (vlib_buffer_get_current (b)) - hdr_sz;
+ id->len = b->current_length + hdr_sz;
+
+ while (b->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ id->flags = VRING_DESC_F_NEXT;
+ id->next = count;
+ count++;
+ id++;
+ b = vlib_get_buffer (vm, b->next_buffer);
+ id->addr = pointer_to_uword (vlib_buffer_get_current (b));
+ id->len = b->current_length;
+ }
+ }
+ id->flags = 0;
+ id->next = 0;
+ d->len = count * sizeof (struct vring_desc);
d->flags = VRING_DESC_F_INDIRECT;
}
vring->buffers[next] = bi;
@@ -184,8 +234,8 @@ virtio_interface_tx_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
clib_spinlock_lock_if_init (&vif->lockp);
if ((vring->used->flags & VIRTIO_RING_FLAG_MASK_INT) == 0 &&
- vring->last_kick_avail_idx != vring->avail->idx)
- virtio_kick (vring);
+ (vring->last_kick_avail_idx != vring->avail->idx))
+ virtio_kick (vm, vring, vif);
/* free consumed buffers */
virtio_free_used_desc (vm, vring);
@@ -196,8 +246,11 @@ virtio_interface_tx_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
while (n_left && used < sz)
{
- u16 n_added;
- n_added = add_buffer_to_slot (vm, vring, buffers[0], avail, next, mask);
+ u16 n_added = 0;
+ n_added =
+ add_buffer_to_slot (vm, vif, vring, buffers[0], avail, next, mask);
+ if (!n_added)
+ break;
avail += n_added;
next = (next + n_added) & mask;
used += n_added;
@@ -212,13 +265,12 @@ virtio_interface_tx_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
vring->desc_next = next;
vring->desc_in_use = used;
if ((vring->used->flags & VIRTIO_RING_FLAG_MASK_INT) == 0)
- virtio_kick (vring);
+ virtio_kick (vm, vring, vif);
}
-
if (n_left)
{
- vlib_error_count (vm, node->node_index, TAP_TX_ERROR_NO_FREE_SLOTS,
+ vlib_error_count (vm, node->node_index, VIRTIO_TX_ERROR_NO_FREE_SLOTS,
n_left);
vlib_buffer_free (vm, buffers, n_left);
}
@@ -313,7 +365,7 @@ VNET_DEVICE_CLASS (virtio_device_class) = {
.format_device_name = format_virtio_device_name,
.format_device = format_virtio_device,
.format_tx_trace = format_virtio_tx_trace,
- .tx_function_n_errors = TAP_TX_N_ERROR,
+ .tx_function_n_errors = VIRTIO_TX_N_ERROR,
.tx_function_error_strings = virtio_tx_func_error_strings,
.rx_redirect_to_node = virtio_set_interface_next_node,
.clear_counters = virtio_clear_hw_interface_counters,
diff --git a/src/vnet/devices/virtio/node.c b/src/vnet/devices/virtio/node.c
index d7a0b3964b4..c02b607d5fe 100644
--- a/src/vnet/devices/virtio/node.c
+++ b/src/vnet/devices/virtio/node.c
@@ -80,9 +80,9 @@ format_virtio_input_trace (u8 * s, va_list * args)
}
static_always_inline void
-virtio_refill_vring (vlib_main_t * vm, virtio_vring_t * vring)
+virtio_refill_vring (vlib_main_t * vm, virtio_if_t * vif,
+ virtio_vring_t * vring, const int hdr_sz)
{
- const int hdr_sz = sizeof (struct virtio_net_hdr_v1);
u16 used, next, avail, n_slots;
u16 sz = vring->size;
u16 mask = sz - 1;
@@ -108,7 +108,18 @@ more:
{
struct vring_desc *d = &vring->desc[next];;
vlib_buffer_t *b = vlib_get_buffer (vm, vring->buffers[next]);
- d->addr = pointer_to_uword (vlib_buffer_get_current (b)) - hdr_sz;
+ /*
+ * current_data may not be initialized with 0 and may contain
+ * previous offset. Here we want to make sure, it should be 0
+ * initialized.
+ */
+ b->current_data = 0;
+ b->current_data -= hdr_sz;
+ memset (vlib_buffer_get_current (b), 0, hdr_sz);
+ d->addr =
+ ((vif->type == VIRTIO_IF_TYPE_PCI) ? vlib_buffer_get_current_pa (vm,
+ b) :
+ pointer_to_uword (vlib_buffer_get_current (b)));
d->len = VLIB_BUFFER_DATA_SIZE + hdr_sz;
d->flags = VRING_DESC_F_WRITE;
vring->avail->ring[avail & mask] = next;
@@ -123,7 +134,9 @@ more:
vring->desc_in_use = used;
if ((vring->used->flags & VIRTIO_RING_FLAG_MASK_INT) == 0)
- virtio_kick (vring);
+ {
+ virtio_kick (vm, vring, vif);
+ }
goto more;
}
@@ -136,7 +149,7 @@ virtio_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
uword n_trace = vlib_get_trace_count (vm, node);
virtio_vring_t *vring = vec_elt_at_index (vif->vrings, 0);
u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
- const int hdr_sz = sizeof (struct virtio_net_hdr_v1);
+ const int hdr_sz = vif->virtio_net_hdr_sz;
u32 *to_next = 0;
u32 n_rx_packets = 0;
u32 n_rx_bytes = 0;
@@ -146,7 +159,7 @@ virtio_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
if ((vring->used->flags & VIRTIO_RING_FLAG_MASK_INT) == 0 &&
vring->last_kick_avail_idx != vring->avail->idx)
- virtio_kick (vring);
+ virtio_kick (vm, vring, vif);
if (n_left == 0)
goto refill;
@@ -159,17 +172,18 @@ virtio_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
while (n_left && n_left_to_next)
{
- u16 num_buffers;
+ u16 num_buffers = 1;
struct vring_used_elem *e = &vring->used->ring[last & mask];
struct virtio_net_hdr_v1 *hdr;
u16 slot = e->id;
u16 len = e->len - hdr_sz;
u32 bi0 = vring->buffers[slot];
vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
- hdr = vlib_buffer_get_current (b0) - hdr_sz;
- num_buffers = hdr->num_buffers;
+ hdr = vlib_buffer_get_current (b0);
+ if (hdr_sz == sizeof (struct virtio_net_hdr_v1))
+ num_buffers = hdr->num_buffers;
- b0->current_data = 0;
+ b0->current_data += hdr_sz;
b0->current_length = len;
b0->total_length_not_including_first_buffer = 0;
b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID;
@@ -189,7 +203,6 @@ virtio_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
cb = vlib_get_buffer (vm, cbi);
/* current buffer */
- cb->current_data = -hdr_sz;
cb->current_length = e->len;
/* previous buffer */
@@ -253,7 +266,7 @@ virtio_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
n_rx_bytes);
refill:
- virtio_refill_vring (vm, vring);
+ virtio_refill_vring (vm, vif, vring, hdr_sz);
return n_rx_packets;
}
diff --git a/src/vnet/devices/virtio/pci.c b/src/vnet/devices/virtio/pci.c
new file mode 100644
index 00000000000..63c8c54fdad
--- /dev/null
+++ b/src/vnet/devices/virtio/pci.c
@@ -0,0 +1,919 @@
+/*
+ * Copyright (c) 2018 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_ring.h>
+#include <linux/vhost.h>
+#include <sys/eventfd.h>
+#if defined(__x86_64__)
+#include <sys/io.h>
+#endif
+
+#include <vppinfra/types.h>
+#include <vlib/vlib.h>
+#include <vlib/pci/pci.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vpp/app/version.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/devices/virtio/virtio.h>
+#include <vnet/devices/virtio/pci.h>
+
+#define PCI_VENDOR_ID_VIRTIO 0x1af4
+#define PCI_DEVICE_ID_VIRTIO_NIC 0x1000
+/* Doesn't support modern device */
+#define PCI_DEVICE_ID_VIRTIO_NIC_MODERN 0x1041
+
+#define PCI_CAPABILITY_LIST 0x34
+#define PCI_CAP_ID_VNDR 0x09
+#define PCI_CAP_ID_MSIX 0x11
+
+#define PCI_MSIX_ENABLE 0x8000
+
+static u32 msix_enabled = 0;
+
+#define PCI_CONFIG_SIZE ((msix_enabled == VIRTIO_MSIX_ENABLED) ? \
+ 24 : 20)
+
+static pci_device_id_t virtio_pci_device_ids[] = {
+ {
+ .vendor_id = PCI_VENDOR_ID_VIRTIO,
+ .device_id = PCI_DEVICE_ID_VIRTIO_NIC},
+ {
+ .vendor_id = PCI_VENDOR_ID_VIRTIO,
+ .device_id = PCI_DEVICE_ID_VIRTIO_NIC_MODERN},
+ {0},
+};
+
+static void
+virtio_pci_legacy_read_config (vlib_main_t * vm, virtio_if_t * vif, void *dst,
+ int len, u32 addr)
+{
+ u32 size = 0;
+ vlib_pci_dev_handle_t h = vif->pci_dev_handle;
+
+ while (len > 0)
+ {
+ if (len >= 4)
+ {
+ size = 4;
+ vlib_pci_read_io_u32 (vm, h, PCI_CONFIG_SIZE + addr, dst);
+ }
+ else if (len >= 2)
+ {
+ size = 2;
+ vlib_pci_read_io_u16 (vm, h, PCI_CONFIG_SIZE + addr, dst);
+ }
+ else
+ {
+ size = 1;
+ vlib_pci_read_io_u8 (vm, h, PCI_CONFIG_SIZE + addr, dst);
+ }
+ dst = (u8 *) dst + size;
+ addr += size;
+ len -= size;
+ }
+}
+
+static void
+virtio_pci_legacy_write_config (vlib_main_t * vm, virtio_if_t * vif,
+ void *src, int len, u32 addr)
+{
+ u32 size = 0;
+ vlib_pci_dev_handle_t h = vif->pci_dev_handle;
+
+ while (len > 0)
+ {
+ if (len >= 4)
+ {
+ size = 4;
+ vlib_pci_write_io_u32 (vm, h, PCI_CONFIG_SIZE + addr, src);
+ }
+ else if (len >= 2)
+ {
+ size = 2;
+ vlib_pci_write_io_u16 (vm, h, PCI_CONFIG_SIZE + addr, src);
+ }
+ else
+ {
+ size = 1;
+ vlib_pci_write_io_u8 (vm, h, PCI_CONFIG_SIZE + addr, src);
+ }
+ src = (u8 *) src + size;
+ addr += size;
+ len -= size;
+ }
+}
+
+static u64
+virtio_pci_legacy_get_features (vlib_main_t * vm, virtio_if_t * vif)
+{
+ u32 features;
+ vlib_pci_read_io_u32 (vm, vif->pci_dev_handle, VIRTIO_PCI_HOST_FEATURES,
+ &features);
+ return features;
+}
+
+static u32
+virtio_pci_legacy_set_features (vlib_main_t * vm, virtio_if_t * vif,
+ u64 features)
+{
+ if ((features >> 32) != 0)
+ {
+ clib_warning ("only 32 bit features are allowed for legacy virtio!");
+ }
+ u32 feature = 0, guest_features = (u32) features;
+ vlib_pci_write_io_u32 (vm, vif->pci_dev_handle, VIRTIO_PCI_GUEST_FEATURES,
+ &guest_features);
+ vlib_pci_read_io_u32 (vm, vif->pci_dev_handle, VIRTIO_PCI_GUEST_FEATURES,
+ &feature);
+ return feature;
+}
+
+static u8
+virtio_pci_legacy_get_status (vlib_main_t * vm, virtio_if_t * vif)
+{
+ u8 status = 0;
+ vlib_pci_read_io_u8 (vm, vif->pci_dev_handle, VIRTIO_PCI_STATUS, &status);
+ return status;
+}
+
+static void
+virtio_pci_legacy_set_status (vlib_main_t * vm, virtio_if_t * vif, u8 status)
+{
+ if (status != VIRTIO_CONFIG_STATUS_RESET)
+ status |= virtio_pci_legacy_get_status (vm, vif);
+ vlib_pci_write_io_u8 (vm, vif->pci_dev_handle, VIRTIO_PCI_STATUS, &status);
+}
+
+static u8
+virtio_pci_legacy_reset (vlib_main_t * vm, virtio_if_t * vif)
+{
+ virtio_pci_legacy_set_status (vm, vif, VIRTIO_CONFIG_STATUS_RESET);
+ return virtio_pci_legacy_get_status (vm, vif);
+}
+
+static u8
+virtio_pci_legacy_get_isr (vlib_main_t * vm, virtio_if_t * vif)
+{
+ u8 isr = 0;
+ vlib_pci_read_io_u8 (vm, vif->pci_dev_handle, VIRTIO_PCI_ISR, &isr);
+ return isr;
+}
+
+static u16
+virtio_pci_legacy_get_queue_num (vlib_main_t * vm, virtio_if_t * vif,
+ u16 queue_id)
+{
+ u16 queue_num = 0;
+ vlib_pci_write_io_u16 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_SEL,
+ &queue_id);
+ vlib_pci_read_io_u16 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_NUM,
+ &queue_num);
+ return queue_num;
+}
+
+
+static void
+virtio_pci_legacy_setup_queue (vlib_main_t * vm, virtio_if_t * vif,
+ u16 queue_id, void *p)
+{
+ u64 addr = vlib_physmem_get_pa (vm, p) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT;
+ vlib_pci_write_io_u16 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_SEL,
+ &queue_id);
+ vlib_pci_write_io_u32 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_PFN,
+ (u32 *) & addr);
+}
+
+static void
+virtio_pci_legacy_del_queue (vlib_main_t * vm, virtio_if_t * vif,
+ u16 queue_id)
+{
+ u32 src = 0;
+ vlib_pci_write_io_u16 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_SEL,
+ &queue_id);
+ vlib_pci_write_io_u32 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_PFN, &src);
+}
+
+inline void
+virtio_pci_legacy_notify_queue (vlib_main_t * vm, virtio_if_t * vif,
+ u16 queue_id)
+{
+ vlib_pci_write_io_u16 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_NOTIFY,
+ &queue_id);
+}
+
+/* Enable one vector (0) for Link State Intrerrupt */
+static u16
+virtio_pci_legacy_set_config_irq (vlib_main_t * vm, virtio_if_t * vif,
+ u16 vec)
+{
+ vlib_pci_write_io_u16 (vm, vif->pci_dev_handle, VIRTIO_MSI_CONFIG_VECTOR,
+ &vec);
+ vlib_pci_read_io_u16 (vm, vif->pci_dev_handle, VIRTIO_MSI_CONFIG_VECTOR,
+ &vec);
+ return vec;
+}
+
+static u16
+virtio_pci_legacy_set_queue_irq (vlib_main_t * vm, virtio_if_t * vif, u16 vec,
+ u16 queue_id)
+{
+ vlib_pci_write_io_u16 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_SEL,
+ &queue_id);
+ vlib_pci_write_io_u16 (vm, vif->pci_dev_handle, VIRTIO_MSI_QUEUE_VECTOR,
+ &vec);
+ vlib_pci_read_io_u16 (vm, vif->pci_dev_handle, VIRTIO_MSI_QUEUE_VECTOR,
+ &vec);
+ return vec;
+}
+
+static u32
+virtio_pci_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hw,
+ u32 flags)
+{
+ return 0;
+}
+
+static clib_error_t *
+virtio_pci_get_max_virtqueue_pairs (vlib_main_t * vm, virtio_if_t * vif)
+{
+ virtio_net_config_t config;
+ clib_error_t *error = 0;
+ u16 max_queue_pairs = 1;
+
+ if (vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_MQ))
+ {
+ virtio_pci_legacy_read_config (vm, vif, &config.max_virtqueue_pairs,
+ sizeof (config.max_virtqueue_pairs), 8);
+ max_queue_pairs = config.max_virtqueue_pairs;
+ }
+
+ if (max_queue_pairs < 1 || max_queue_pairs > 0x8000)
+ clib_error_return (error, "max queue pair is %x", max_queue_pairs);
+
+ vif->max_queue_pairs = max_queue_pairs;
+ return error;
+}
+
+static void
+virtio_pci_set_mac (vlib_main_t * vm, virtio_if_t * vif)
+{
+ virtio_pci_legacy_write_config (vm, vif, vif->mac_addr,
+ sizeof (vif->mac_addr), 0);
+}
+
+static u32
+virtio_pci_get_mac (vlib_main_t * vm, virtio_if_t * vif)
+{
+ if (vif->remote_features & VIRTIO_FEATURE (VIRTIO_NET_F_MAC))
+ {
+ virtio_pci_legacy_read_config (vm, vif, vif->mac_addr,
+ sizeof (vif->mac_addr), 0);
+ return 0;
+ }
+ return 1;
+}
+
+static u16
+virtio_pci_is_link_up (vlib_main_t * vm, virtio_if_t * vif)
+{
+ /*
+ * Minimal driver: assumes link is up
+ */
+ u16 status = 1;
+ if (vif->remote_features & VIRTIO_FEATURE (VIRTIO_NET_F_STATUS))
+ virtio_pci_legacy_read_config (vm, vif, &status, sizeof (status), /* mac */
+ 6);
+ return status;
+}
+
+static void
+virtio_pci_irq_0_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h, u16 line)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ virtio_main_t *vmxm = &virtio_main;
+ uword pd = vlib_pci_get_private_data (vm, h);
+ virtio_if_t *vif = pool_elt_at_index (vmxm->interfaces, pd);
+ u16 qid = line;
+
+ vnet_device_input_set_interrupt_pending (vnm, vif->hw_if_index, qid);
+}
+
+static void
+virtio_pci_irq_1_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h, u16 line)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ virtio_main_t *vmxm = &virtio_main;
+ uword pd = vlib_pci_get_private_data (vm, h);
+ virtio_if_t *vif = pool_elt_at_index (vmxm->interfaces, pd);
+
+ if (virtio_pci_is_link_up (vm, vif) & VIRTIO_NET_S_LINK_UP)
+ {
+ vif->flags |= VIRTIO_IF_FLAG_ADMIN_UP;
+ vnet_hw_interface_set_flags (vnm, vif->hw_if_index,
+ VNET_HW_INTERFACE_FLAG_LINK_UP);
+ }
+ else
+ {
+ vif->flags &= ~VIRTIO_IF_FLAG_ADMIN_UP;
+ vnet_hw_interface_set_flags (vnm, vif->hw_if_index, 0);
+ }
+}
+
+static void
+virtio_pci_irq_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h)
+{
+ virtio_main_t *vmxm = &virtio_main;
+ uword pd = vlib_pci_get_private_data (vm, h);
+ virtio_if_t *vif = pool_elt_at_index (vmxm->interfaces, pd);
+ u8 isr = 0;
+ u16 line = 0;
+
+ isr = virtio_pci_legacy_get_isr (vm, vif);
+
+ /*
+ * If the lower bit is set: look through the used rings of
+ * all virtqueues for the device, to see if any progress has
+ * been made by the device which requires servicing.
+ */
+ if (isr & VIRTIO_PCI_ISR_INTR)
+ virtio_pci_irq_0_handler (vm, h, line);
+
+ if (isr & VIRTIO_PCI_ISR_CONFIG)
+ virtio_pci_irq_1_handler (vm, h, line);
+}
+
+inline void
+device_status (vlib_main_t * vm, virtio_if_t * vif)
+{
+ struct status_struct
+ {
+ u8 bit;
+ char *str;
+ };
+ struct status_struct *status_entry;
+ static struct status_struct status_array[] = {
+#define _(s,b) { .str = #s, .bit = b, },
+ foreach_virtio_config_status_flags
+#undef _
+ {.str = NULL}
+ };
+
+ vlib_cli_output (vm, " status 0x%x", vif->status);
+
+ status_entry = (struct status_struct *) &status_array;
+ while (status_entry->str)
+ {
+ if (vif->status & status_entry->bit)
+ vlib_cli_output (vm, " %s (%x)", status_entry->str,
+ status_entry->bit);
+ status_entry++;
+ }
+}
+
+inline void
+debug_device_config_space (vlib_main_t * vm, virtio_if_t * vif)
+{
+ u32 data_u32;
+ u16 data_u16;
+ u8 data_u8;
+ vlib_pci_read_io_u32 (vm, vif->pci_dev_handle, VIRTIO_PCI_HOST_FEATURES,
+ &data_u32);
+ vlib_cli_output (vm, "remote features 0x%lx", data_u32);
+ vlib_pci_read_io_u32 (vm, vif->pci_dev_handle, VIRTIO_PCI_GUEST_FEATURES,
+ &data_u32);
+ vlib_cli_output (vm, "guest features 0x%lx", data_u32);
+ vlib_pci_read_io_u32 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_PFN,
+ &data_u32);
+ vlib_cli_output (vm, "queue address 0x%lx", data_u32);
+ vlib_pci_read_io_u16 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_NUM,
+ &data_u16);
+ vlib_cli_output (vm, "queue size 0x%x", data_u16);
+ vlib_pci_read_io_u16 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_SEL,
+ &data_u16);
+ vlib_cli_output (vm, "queue select 0x%x", data_u16);
+ vlib_pci_read_io_u16 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_NOTIFY,
+ &data_u16);
+ vlib_cli_output (vm, "queue notify 0x%x", data_u16);
+ vlib_pci_read_io_u8 (vm, vif->pci_dev_handle, VIRTIO_PCI_STATUS, &data_u8);
+ vlib_cli_output (vm, "status 0x%x", data_u8);
+ vlib_pci_read_io_u8 (vm, vif->pci_dev_handle, VIRTIO_PCI_ISR, &data_u8);
+ vlib_cli_output (vm, "isr 0x%x", data_u8);
+
+ u8 mac[6];
+ virtio_pci_legacy_read_config (vm, vif, mac, sizeof (mac), 0);
+ vlib_cli_output (vm, "mac %U", format_ethernet_address, mac);
+ virtio_pci_legacy_read_config (vm, vif, &data_u16, sizeof (u16), /* offset to status */
+ 6);
+ vlib_cli_output (vm, "link up/down status 0x%x", data_u16);
+ virtio_pci_legacy_read_config (vm, vif, &data_u16, sizeof (u16),
+ /* offset to max_virtqueue */ 8);
+ vlib_cli_output (vm, "num of virtqueue 0x%x", data_u16);
+ virtio_pci_legacy_read_config (vm, vif, &data_u16, sizeof (u16), /* offset to mtu */
+ 10);
+ vlib_cli_output (vm, "mtu 0x%x", data_u16);
+
+ u32 i = PCI_CONFIG_SIZE + 12, a = 4;
+ i += a;
+ i &= ~a;
+ for (; i < 64; i += 4)
+ {
+ u32 data = 0;
+ vlib_pci_read_io_u32 (vm, vif->pci_dev_handle, i, &data);
+ vlib_cli_output (vm, "0x%lx", data);
+ }
+}
+
+static u8
+virtio_pci_queue_size_valid (u16 qsz)
+{
+ if (qsz < 64 || qsz > 4096)
+ return 0;
+ if ((qsz % 64) != 0)
+ return 0;
+ return 1;
+}
+
+clib_error_t *
+virtio_pci_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 idx)
+{
+ clib_error_t *error = 0;
+ u16 queue_size = 0;
+ virtio_vring_t *vring;
+ struct vring vr;
+ u32 i = 0;
+ void *ptr;
+
+ queue_size = virtio_pci_legacy_get_queue_num (vm, vif, idx);
+ if (!virtio_pci_queue_size_valid (queue_size))
+ clib_warning ("queue size is not valid");
+
+ if (!is_pow2 (queue_size))
+ return clib_error_return (0, "ring size must be power of 2");
+
+ if (queue_size > 32768)
+ return clib_error_return (0, "ring size must be 32768 or lower");
+
+ if (queue_size == 0)
+ queue_size = 256;
+
+ vec_validate_aligned (vif->vrings, idx, CLIB_CACHE_LINE_BYTES);
+ vring = vec_elt_at_index (vif->vrings, idx);
+
+ i = vring_size (queue_size, VIRTIO_PCI_VRING_ALIGN);
+ i = round_pow2 (i, VIRTIO_PCI_VRING_ALIGN);
+ ptr = vlib_physmem_alloc_aligned (vm, i, VIRTIO_PCI_VRING_ALIGN);
+ memset (ptr, 0, i);
+ vring_init (&vr, queue_size, ptr, VIRTIO_PCI_VRING_ALIGN);
+ vring->desc = vr.desc;
+ vring->avail = vr.avail;
+ vring->used = vr.used;
+ vring->queue_id = idx;
+ vring->avail->flags = VIRTIO_RING_FLAG_MASK_INT;
+
+ ASSERT (vring->buffers == 0);
+ vec_validate_aligned (vring->buffers, queue_size, CLIB_CACHE_LINE_BYTES);
+ ASSERT (vring->indirect_buffers == 0);
+ vec_validate_aligned (vring->indirect_buffers, queue_size,
+ CLIB_CACHE_LINE_BYTES);
+ if (idx % 2)
+ {
+ u32 n_alloc = 0;
+ do
+ {
+ if (n_alloc < queue_size)
+ n_alloc =
+ vlib_buffer_alloc (vm, vring->indirect_buffers + n_alloc,
+ queue_size - n_alloc);
+ }
+ while (n_alloc != queue_size);
+ vif->tx_ring_sz = queue_size;
+ }
+ else
+ vif->rx_ring_sz = queue_size;
+ vring->size = queue_size;
+
+ virtio_pci_legacy_setup_queue (vm, vif, idx, ptr);
+ vring->kick_fd = -1;
+
+ return error;
+}
+
+static void
+virtio_negotiate_features (vlib_main_t * vm, virtio_if_t * vif,
+ u64 req_features)
+{
+ /*
+ * if features are not requested
+ * default: all supported features
+ */
+ u64 supported_features = VIRTIO_FEATURE (VIRTIO_NET_F_MTU)
+ | VIRTIO_FEATURE (VIRTIO_NET_F_MAC)
+ | VIRTIO_FEATURE (VIRTIO_NET_F_MRG_RXBUF)
+ | VIRTIO_FEATURE (VIRTIO_NET_F_STATUS)
+ | VIRTIO_FEATURE (VIRTIO_F_ANY_LAYOUT)
+ | VIRTIO_FEATURE (VIRTIO_RING_F_INDIRECT_DESC);
+
+ if (req_features == 0)
+ {
+ req_features = supported_features;
+ }
+
+ vif->features = req_features & vif->remote_features & supported_features;
+
+ if (vif->
+ remote_features & vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_MTU))
+ {
+ virtio_net_config_t config;
+ virtio_pci_legacy_read_config (vm, vif, &config.mtu,
+ sizeof (config.mtu), 10);
+ if (config.mtu < 64)
+ vif->features &= ~VIRTIO_FEATURE (VIRTIO_NET_F_MTU);
+ }
+
+ vif->features = virtio_pci_legacy_set_features (vm, vif, vif->features);
+}
+
+void
+virtio_pci_read_device_feature (vlib_main_t * vm, virtio_if_t * vif)
+{
+ vif->remote_features = virtio_pci_legacy_get_features (vm, vif);
+}
+
+int
+virtio_pci_reset_device (vlib_main_t * vm, virtio_if_t * vif)
+{
+ u8 status = 0;
+
+ /*
+ * Reset the device
+ */
+ status = virtio_pci_legacy_reset (vm, vif);
+
+ /*
+ * Set the Acknowledge status bit
+ */
+ virtio_pci_legacy_set_status (vm, vif, VIRTIO_CONFIG_STATUS_ACK);
+
+ /*
+ * Set the Driver status bit
+ */
+ virtio_pci_legacy_set_status (vm, vif, VIRTIO_CONFIG_STATUS_DRIVER);
+
+ /*
+ * Read the status and verify it
+ */
+ status = virtio_pci_legacy_get_status (vm, vif);
+ if (!
+ ((status & VIRTIO_CONFIG_STATUS_ACK)
+ && (status & VIRTIO_CONFIG_STATUS_DRIVER)))
+ return -1;
+ vif->status = status;
+
+ return 0;
+}
+
+clib_error_t *
+virtio_pci_read_caps (vlib_main_t * vm, virtio_if_t * vif)
+{
+ clib_error_t *error = 0;
+ struct virtio_pci_cap cap;
+ u8 pos, common_cfg = 0, notify_base = 0, dev_cfg = 0, isr = 0;
+ vlib_pci_dev_handle_t h = vif->pci_dev_handle;
+
+ if ((error = vlib_pci_read_config_u8 (vm, h, PCI_CAPABILITY_LIST, &pos)))
+ clib_error_return (error, "error in reading capabilty list position");
+
+ while (pos)
+ {
+ if ((error =
+ vlib_pci_read_write_config (vm, h, VLIB_READ, pos, &cap,
+ sizeof (cap))))
+ clib_error_return (error, "error in reading the capability at [%2x]",
+ pos);
+
+ if (cap.cap_vndr == PCI_CAP_ID_MSIX)
+ {
+ u16 flags;
+ if ((error =
+ vlib_pci_read_write_config (vm, h, VLIB_READ, pos + 2, &flags,
+ sizeof (flags))))
+ clib_error_return (error,
+ "error in reading the capability at [%2x]",
+ pos + 2);
+
+ if (flags & PCI_MSIX_ENABLE)
+ msix_enabled = VIRTIO_MSIX_ENABLED;
+ else
+ msix_enabled = VIRTIO_MSIX_DISABLED;
+ }
+
+ if (cap.cap_vndr != PCI_CAP_ID_VNDR)
+ {
+ clib_warning ("[%2x] skipping non VNDR cap id: %2x", pos,
+ cap.cap_vndr);
+ goto next;
+ }
+
+ switch (cap.cfg_type)
+ {
+ case VIRTIO_PCI_CAP_COMMON_CFG:
+ common_cfg = 1;
+ break;
+ case VIRTIO_PCI_CAP_NOTIFY_CFG:
+ notify_base = 1;
+ break;
+ case VIRTIO_PCI_CAP_DEVICE_CFG:
+ dev_cfg = 1;
+ break;
+ case VIRTIO_PCI_CAP_ISR_CFG:
+ isr = 1;
+ break;
+ }
+ next:
+ pos = cap.cap_next;
+ }
+
+ if (common_cfg == 0 || notify_base == 0 || dev_cfg == 0 || isr == 0)
+ {
+ clib_warning ("no modern virtio pci device found");
+ return error;
+ }
+
+ return clib_error_return (error, "modern virtio pci device found");
+}
+
+static clib_error_t *
+virtio_pci_device_init (vlib_main_t * vm, virtio_if_t * vif,
+ virtio_pci_create_if_args_t * args)
+{
+ clib_error_t *error = 0;
+ u8 status = 0;
+
+ virtio_pci_read_caps (vm, vif);
+
+ if (virtio_pci_reset_device (vm, vif) < 0)
+ clib_error_return (error, "Failed to reset the device");
+
+ /*
+ * read device features and negotiate (user) requested features
+ */
+ virtio_pci_read_device_feature (vm, vif);
+ virtio_negotiate_features (vm, vif, args->features);
+
+ /*
+ * After FEATURE_OK, driver should not accept new feature bits
+ */
+ virtio_pci_legacy_set_status (vm, vif, VIRTIO_CONFIG_STATUS_FEATURES_OK);
+ status = virtio_pci_legacy_get_status (vm, vif);
+ if (!(status & VIRTIO_CONFIG_STATUS_FEATURES_OK))
+ clib_error_return (error, "Device doesn't support requested features");
+
+ vif->status = status;
+
+ if (virtio_pci_get_mac (vm, vif))
+ {
+ f64 now = vlib_time_now (vm);
+ u32 rnd;
+ rnd = (u32) (now * 1e6);
+ rnd = random_u32 (&rnd);
+
+ memcpy (vif->mac_addr + 2, &rnd, sizeof (rnd));
+ vif->mac_addr[0] = 2;
+ vif->mac_addr[1] = 0xfe;
+ virtio_pci_set_mac (vm, vif);
+ }
+
+ virtio_set_net_hdr_size (vif);
+
+ if ((error = virtio_pci_get_max_virtqueue_pairs (vm, vif)))
+ goto error;
+
+ if ((error = virtio_pci_vring_init (vm, vif, 0)))
+ goto error;
+
+ if ((error = virtio_pci_vring_init (vm, vif, 1)))
+ goto error;
+
+ if (msix_enabled == VIRTIO_MSIX_ENABLED)
+ {
+ virtio_pci_legacy_set_config_irq (vm, vif, VIRTIO_MSI_NO_VECTOR);
+ virtio_pci_legacy_set_queue_irq (vm, vif, VIRTIO_MSI_NO_VECTOR, 0);
+ }
+ virtio_pci_legacy_set_status (vm, vif, VIRTIO_CONFIG_STATUS_DRIVER_OK);
+ vif->status = virtio_pci_legacy_get_status (vm, vif);
+error:
+ return error;
+}
+
+void
+virtio_pci_create_if (vlib_main_t * vm, virtio_pci_create_if_args_t * args)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ virtio_main_t *vmxm = &virtio_main;
+ virtio_if_t *vif;
+ vlib_pci_dev_handle_t h;
+ clib_error_t *error = 0;
+
+ if (args->rxq_size == 0)
+ args->rxq_size = VIRTIO_NUM_RX_DESC;
+ if (args->txq_size == 0)
+ args->txq_size = VIRTIO_NUM_TX_DESC;
+
+ if (!virtio_pci_queue_size_valid (args->rxq_size) ||
+ !virtio_pci_queue_size_valid (args->txq_size))
+ {
+ args->rv = VNET_API_ERROR_INVALID_VALUE;
+ args->error =
+ clib_error_return (error,
+ "queue size must be <= 4096, >= 64, "
+ "and multiples of 64");
+ return;
+ }
+
+ /* *INDENT-OFF* */
+ pool_foreach (vif, vmxm->interfaces, ({
+ if (vif->pci_addr.as_u32 == args->addr)
+ {
+ args->rv = VNET_API_ERROR_INVALID_VALUE;
+ args->error =
+ clib_error_return (error, "PCI address in use");
+ return;
+ }
+ }));
+ /* *INDENT-ON* */
+
+ pool_get (vmxm->interfaces, vif);
+ vif->dev_instance = vif - vmxm->interfaces;
+ vif->per_interface_next_index = ~0;
+ vif->pci_addr.as_u32 = args->addr;
+
+ if ((vif->fd = open ("/dev/vhost-net", O_RDWR | O_NONBLOCK)) < 0)
+ {
+ args->rv = VNET_API_ERROR_SYSCALL_ERROR_1;
+ args->error = clib_error_return_unix (0, "open '/dev/vhost-net'");
+ goto error;
+ }
+
+ if ((error =
+ vlib_pci_device_open (vm, (vlib_pci_addr_t *) & vif->pci_addr,
+ virtio_pci_device_ids, &h)))
+ {
+ pool_put (vmxm->interfaces, vif);
+ args->rv = VNET_API_ERROR_INVALID_INTERFACE;
+ args->error =
+ clib_error_return (error, "pci-addr %U", format_vlib_pci_addr,
+ &vif->pci_addr);
+ return;
+ }
+ vif->pci_dev_handle = h;
+ vlib_pci_set_private_data (vm, h, vif->dev_instance);
+
+ if ((error = vlib_pci_bus_master_enable (vm, h)))
+ goto error;
+
+ if ((error = vlib_pci_io_region (vm, h, 0)))
+ goto error;
+
+ if ((error = virtio_pci_device_init (vm, vif, args)))
+ goto error;
+
+ if (msix_enabled == VIRTIO_MSIX_ENABLED)
+ {
+ if ((error = vlib_pci_register_msix_handler (vm, h, 0, 1,
+ &virtio_pci_irq_0_handler)))
+ goto error;
+
+ if ((error = vlib_pci_register_msix_handler (vm, h, 1, 1,
+ &virtio_pci_irq_1_handler)))
+ goto error;
+
+ if ((error = vlib_pci_enable_msix_irq (vm, h, 0, 2)))
+ goto error;
+ }
+ else
+ {
+ vlib_pci_register_intx_handler (vm, h, &virtio_pci_irq_handler);
+ }
+
+ if ((error = vlib_pci_intr_enable (vm, h)))
+ goto error;
+
+ vif->type = VIRTIO_IF_TYPE_PCI;
+ /* create interface */
+ error = ethernet_register_interface (vnm, virtio_device_class.index,
+ vif->dev_instance, vif->mac_addr,
+ &vif->hw_if_index,
+ virtio_pci_flag_change);
+
+ if (error)
+ goto error;
+
+ vnet_sw_interface_t *sw = vnet_get_hw_sw_interface (vnm, vif->hw_if_index);
+ vif->sw_if_index = sw->sw_if_index;
+ args->sw_if_index = sw->sw_if_index;
+
+ vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, vif->hw_if_index);
+ hw->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_INT_MODE;
+ vnet_hw_interface_set_input_node (vnm, vif->hw_if_index,
+ virtio_input_node.index);
+ vnet_hw_interface_assign_rx_thread (vnm, vif->hw_if_index, 0, ~0);
+
+ if (virtio_pci_is_link_up (vm, vif) & VIRTIO_NET_S_LINK_UP)
+ {
+ vif->flags |= VIRTIO_IF_FLAG_ADMIN_UP;
+ vnet_hw_interface_set_flags (vnm, vif->hw_if_index,
+ VNET_HW_INTERFACE_FLAG_LINK_UP);
+ }
+ else
+ vnet_hw_interface_set_flags (vnm, vif->hw_if_index, 0);
+ return;
+
+error:
+ virtio_pci_delete_if (vm, vif);
+ args->rv = VNET_API_ERROR_INVALID_INTERFACE;
+ args->error = error;
+}
+
+int
+virtio_pci_delete_if (vlib_main_t * vm, virtio_if_t * vif)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ virtio_main_t *vmxm = &virtio_main;
+ u32 i = 0;
+
+ if (vif->type != VIRTIO_IF_TYPE_PCI)
+ return VNET_API_ERROR_INVALID_INTERFACE;
+
+ vlib_pci_intr_disable (vm, vif->pci_dev_handle);
+
+ virtio_pci_legacy_del_queue (vm, vif, 0);
+ virtio_pci_legacy_del_queue (vm, vif, 1);
+
+ virtio_pci_legacy_reset (vm, vif);
+
+ if (vif->hw_if_index)
+ {
+ vnet_hw_interface_set_flags (vnm, vif->hw_if_index, 0);
+ vnet_hw_interface_unassign_rx_thread (vnm, vif->hw_if_index, 0);
+ ethernet_delete_interface (vnm, vif->hw_if_index);
+ }
+
+ vlib_pci_device_close (vm, vif->pci_dev_handle);
+
+ vec_foreach_index (i, vif->vrings)
+ {
+ virtio_vring_t *vring = vec_elt_at_index (vif->vrings, i);
+ if (vring->kick_fd != -1)
+ close (vring->kick_fd);
+ if (vring->used)
+ {
+ if ((i & 1) == 1)
+ virtio_free_used_desc (vm, vring);
+ else
+ virtio_free_rx_buffers (vm, vring);
+ }
+ if (vring->queue_id % 2)
+ {
+ vlib_buffer_free_no_next (vm, vring->indirect_buffers, vring->size);
+ }
+ vec_free (vring->buffers);
+ vec_free (vring->indirect_buffers);
+ vlib_physmem_free (vm, vring->desc);
+ }
+
+ vec_free (vif->vrings);
+
+ if (vif->fd != -1)
+ close (vif->fd);
+ if (vif->tap_fd != -1)
+ vif->tap_fd = -1;
+ clib_error_free (vif->error);
+ memset (vif, 0, sizeof (*vif));
+ pool_put (vmxm->interfaces, vif);
+
+ return 0;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/devices/virtio/pci.h b/src/vnet/devices/virtio/pci.h
new file mode 100644
index 00000000000..7552dd89ac2
--- /dev/null
+++ b/src/vnet/devices/virtio/pci.h
@@ -0,0 +1,233 @@
+/*
+ * Copyright (c) 2018 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __included_virtio_pci_h__
+#define __included_virtio_pci_h__
+
+/* VirtIO ABI version, this must match exactly. */
+#define VIRTIO_PCI_ABI_VERSION 0
+
+/*
+ * VirtIO Header, located in BAR 0.
+ */
+#define VIRTIO_PCI_HOST_FEATURES 0 /* host's supported features (32bit, RO) */
+#define VIRTIO_PCI_GUEST_FEATURES 4 /* guest's supported features (32, RW) */
+#define VIRTIO_PCI_QUEUE_PFN 8 /* physical address of VQ (32, RW) */
+#define VIRTIO_PCI_QUEUE_NUM 12 /* number of ring entries (16, RO) */
+#define VIRTIO_PCI_QUEUE_SEL 14 /* current VQ selection (16, RW) */
+#define VIRTIO_PCI_QUEUE_NOTIFY 16 /* notify host regarding VQ (16, RW) */
+#define VIRTIO_PCI_STATUS 18 /* device status register (8, RW) */
+#define VIRTIO_PCI_ISR 19 /* interrupt status register, reading
+ * also clears the register (8, RO) */
+/* Only if MSIX is enabled: */
+#define VIRTIO_MSI_CONFIG_VECTOR 20 /* configuration change vector (16, RW) */
+#define VIRTIO_MSI_QUEUE_VECTOR 22 /* vector for selected VQ notifications
+ (16, RW) */
+
+/*
+ * define in include/linux/virtio_pci.h
+ * #define VIRTIO_MSI_NO_VECTOR 0xFFFF
+ */
+
+/* The bit of the ISR which indicates a device has an interrupt. */
+#define VIRTIO_PCI_ISR_INTR 0x1
+/* The bit of the ISR which indicates a device configuration change. */
+#define VIRTIO_PCI_ISR_CONFIG 0x2
+/* Vector value used to disable MSI for queue. */
+
+/* VirtIO device IDs. */
+#define VIRTIO_ID_NETWORK 0x01
+
+/* Status byte for guest to report progress. */
+#define foreach_virtio_config_status_flags \
+ _ (VIRTIO_CONFIG_STATUS_RESET, 0x00) \
+ _ (VIRTIO_CONFIG_STATUS_ACK, 0x01) \
+ _ (VIRTIO_CONFIG_STATUS_DRIVER, 0x02) \
+ _ (VIRTIO_CONFIG_STATUS_DRIVER_OK, 0x04) \
+ _ (VIRTIO_CONFIG_STATUS_FEATURES_OK, 0x08) \
+ _ (VIRTIO_CONFIG_STATUS_DEVICE_NEEDS_RESET, 0x40) \
+ _ (VIRTIO_CONFIG_STATUS_FAILED, 0x80)
+
+typedef enum
+{
+#define _(a, b) a = b,
+ foreach_virtio_config_status_flags
+#undef _
+} virtio_config_status_flags_t;
+
+#define foreach_virtio_net_feature_flags \
+ _ (VIRTIO_NET_F_CSUM, 0) /* Host handles pkts w/ partial csum */ \
+ _ (VIRTIO_NET_F_GUEST_CSUM, 1) /* Guest handles pkts w/ partial csum */ \
+ _ (VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, 2) /* Dynamic offload configuration. */ \
+ _ (VIRTIO_NET_F_MTU, 3) /* Initial MTU advice. */ \
+ _ (VIRTIO_NET_F_MAC, 5) /* Host has given MAC address. */ \
+ _ (VIRTIO_NET_F_GSO, 6) /* Host handles pkts w/ any GSO. */ \
+ _ (VIRTIO_NET_F_GUEST_TSO4, 7) /* Guest can handle TSOv4 in. */ \
+ _ (VIRTIO_NET_F_GUEST_TSO6, 8) /* Guest can handle TSOv6 in. */ \
+ _ (VIRTIO_NET_F_GUEST_ECN, 9) /* Guest can handle TSO[6] w/ ECN in. */ \
+ _ (VIRTIO_NET_F_GUEST_UFO, 10) /* Guest can handle UFO in. */ \
+ _ (VIRTIO_NET_F_HOST_TSO4, 11) /* Host can handle TSOv4 in. */ \
+ _ (VIRTIO_NET_F_HOST_TSO6, 12) /* Host can handle TSOv6 in. */ \
+ _ (VIRTIO_NET_F_HOST_ECN, 13) /* Host can handle TSO[6] w/ ECN in. */ \
+ _ (VIRTIO_NET_F_HOST_UFO, 14) /* Host can handle UFO in. */ \
+ _ (VIRTIO_NET_F_MRG_RXBUF, 15) /* Host can merge receive buffers. */ \
+ _ (VIRTIO_NET_F_STATUS, 16) /* virtio_net_config.status available */ \
+ _ (VIRTIO_NET_F_CTRL_VQ, 17) /* Control channel available */ \
+ _ (VIRTIO_NET_F_CTRL_RX, 18) /* Control channel RX mode support */ \
+ _ (VIRTIO_NET_F_CTRL_VLAN, 19) /* Control channel VLAN filtering */ \
+ _ (VIRTIO_NET_F_CTRL_RX_EXTRA, 20) /* Extra RX mode control support */ \
+ _ (VIRTIO_NET_F_GUEST_ANNOUNCE, 21) /* Guest can announce device on the network */ \
+ _ (VIRTIO_NET_F_MQ, 22) /* Device supports Receive Flow Steering */ \
+ _ (VIRTIO_NET_F_CTRL_MAC_ADDR, 23) /* Set MAC address */ \
+ _ (VIRTIO_F_NOTIFY_ON_EMPTY, 24) \
+ _ (VHOST_F_LOG_ALL, 26) /* Log all write descriptors */ \
+ _ (VIRTIO_F_ANY_LAYOUT, 27) /* Can the device handle any descripor layout */ \
+ _ (VIRTIO_RING_F_INDIRECT_DESC, 28) /* Support indirect buffer descriptors */ \
+ _ (VIRTIO_RING_F_EVENT_IDX, 29) /* The Guest publishes the used index for which it expects an interrupt \
+ * at the end of the avail ring. Host should ignore the avail->flags field. */ \
+/* The Host publishes the avail index for which it expects a kick \
+ * at the end of the used ring. Guest should ignore the used->flags field. */ \
+ _ (VHOST_USER_F_PROTOCOL_FEATURES, 30)
+
+#define VIRTIO_NET_F_MTU 3
+#define VIRTIO_NET_S_LINK_UP 1 /* Link is up */
+#define VIRTIO_NET_S_ANNOUNCE 2 /* Announcement is needed */
+
+/* Common configuration */
+#define VIRTIO_PCI_CAP_COMMON_CFG 1
+/* Notifications */
+#define VIRTIO_PCI_CAP_NOTIFY_CFG 2
+/* ISR Status */
+#define VIRTIO_PCI_CAP_ISR_CFG 3
+/* Device specific configuration */
+#define VIRTIO_PCI_CAP_DEVICE_CFG 4
+/* PCI configuration access */
+#define VIRTIO_PCI_CAP_PCI_CFG 5
+
+#define VIRTIO_PCI_QUEUE_ADDR_SHIFT 12
+
+#define VIRTIO_PCI_VRING_ALIGN 4096
+
+typedef enum
+{
+ VIRTIO_MSIX_NONE = 0,
+ VIRTIO_MSIX_DISABLED = 1,
+ VIRTIO_MSIX_ENABLED = 2
+} virtio_msix_status_t;
+
+/* This is the PCI capability header: */
+typedef struct
+{
+ u8 cap_vndr; /* Generic PCI field: PCI_CAP_ID_VNDR */
+ u8 cap_next; /* Generic PCI field: next ptr. */
+ u8 cap_len; /* Generic PCI field: capability length */
+ u8 cfg_type; /* Identifies the structure. */
+ u8 bar; /* Where to find it. */
+ u8 padding[3]; /* Pad to full dword. */
+ u32 offset; /* Offset within bar. */
+ u32 length; /* Length of the structure, in bytes. */
+} virtio_pci_cap_t;
+
+typedef struct
+{
+ struct virtio_pci_cap cap;
+ u32 notify_off_multiplier; /* Multiplier for queue_notify_off. */
+} virtio_pci_notify_cap_t;
+
+/* Fields in VIRTIO_PCI_CAP_COMMON_CFG: */
+typedef struct
+{
+ /* About the whole device. */
+ u32 device_feature_select; /* read-write */
+ u32 device_feature; /* read-only */
+ u32 guest_feature_select; /* read-write */
+ u32 guest_feature; /* read-write */
+ u16 msix_config; /* read-write */
+ u16 num_queues; /* read-only */
+ u8 device_status; /* read-write */
+ u8 config_generation; /* read-only */
+
+ /* About a specific virtqueue. */
+ u16 queue_select; /* read-write */
+ u16 queue_size; /* read-write, power of 2. */
+ u16 queue_msix_vector; /* read-write */
+ u16 queue_enable; /* read-write */
+ u16 queue_notify_off; /* read-only */
+ u32 queue_desc_lo; /* read-write */
+ u32 queue_desc_hi; /* read-write */
+ u32 queue_avail_lo; /* read-write */
+ u32 queue_avail_hi; /* read-write */
+ u32 queue_used_lo; /* read-write */
+ u32 queue_used_hi; /* read-write */
+} virtio_pci_common_cfg_t;
+
+typedef struct
+{
+ u64 addr;
+ u32 len;
+ u16 flags;
+ u16 next;
+} vring_desc_t;
+
+typedef struct
+{
+ u16 flags;
+ u16 idx;
+ u16 ring[0];
+ /* u16 used_event; */
+} vring_avail_t;
+
+typedef struct
+{
+ u32 id;
+ u32 len;
+} vring_used_elem_t;
+
+typedef struct
+{
+ u16 flags;
+ u16 idx;
+ vring_used_elem_t ring[0];
+ /* u16 avail_event; */
+} vring_used_t;
+
+typedef struct
+{
+ u32 addr;
+ u16 rxq_size;
+ u16 txq_size;
+ /* return */
+ i32 rv;
+ u32 sw_if_index;
+ u8 mac_addr_set;
+ u8 mac_addr[6];
+ u64 features;
+ clib_error_t *error;
+} virtio_pci_create_if_args_t;
+
+extern void debug_device_config_space (vlib_main_t * vm, virtio_if_t * vif);
+extern void device_status (vlib_main_t * vm, virtio_if_t * vif);
+void virtio_pci_create_if (vlib_main_t * vm,
+ virtio_pci_create_if_args_t * args);
+int virtio_pci_delete_if (vlib_main_t * vm, virtio_if_t * ad);
+
+#endif /* __included_virtio_pci_h__ */
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/devices/virtio/virtio.api b/src/vnet/devices/virtio/virtio.api
new file mode 100644
index 00000000000..cb672960afd
--- /dev/null
+++ b/src/vnet/devices/virtio/virtio.api
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2018 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+option version = "1.0.0";
+
+/** \brief Initialize a new virtio pci interface with the given paramters
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param pci_addr - pci address as unsigned 32bit integer:
+ 0-15 domain, 16-23 bus, 24-28 slot, 29-31 function
+ @param use_random_mac - let the system generate a unique mac address
+ @param mac_address - mac addr to assign to the interface if use_radom not set
+ @param tx_ring_sz - the number of entries of TX ring
+ @param rx_ring_sz - the number of entries of RX ring
+ @param features - the virtio features which driver should negotiate with device
+*/
+define virtio_pci_create
+{
+ u32 client_index;
+ u32 context;
+ u32 pci_addr;
+ u8 use_random_mac;
+ u8 mac_address[6];
+ u16 tx_ring_sz; /* optional, default is 256 entries, must be power of 2 */
+ u16 rx_ring_sz; /* optional, default is 256 entries, must be power of 2 */
+ u64 features;
+};
+
+/** \brief Reply for virtio pci create reply
+ @param context - returned sender context, to match reply w/ request
+ @param retval - return code
+ @param sw_if_index - software index allocated for the new virtio pci interface
+*/
+define virtio_pci_create_reply
+{
+ u32 context;
+ i32 retval;
+ u32 sw_if_index;
+};
+
+/** \brief Delete virtio pci interface
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - interface index of existing virtio pci interface
+*/
+autoreply define virtio_pci_delete
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+};
+
+/** \brief Dump virtio pci interfaces request */
+define sw_interface_virtio_pci_dump
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Reply for virtio pci interface dump request
+ @param sw_if_index - software index of virtio pci interface
+ @param pci_addr - pci address as unsigned 32bit integer:
+ 0-15 domain, 16-23 bus, 24-28 slot, 29-31 function
+ @param mac_addr - native virtio device mac address
+ @param tx_ring_sz - the number of entries of TX ring
+ @param rx_ring_sz - the number of entries of RX ring
+ @param features - the virtio features which driver have negotiated with device
+*/
+define sw_interface_virtio_pci_details
+{
+ u32 context;
+ u32 sw_if_index;
+ u32 pci_addr;
+ u8 mac_addr[6];
+ u16 tx_ring_sz;
+ u16 rx_ring_sz;
+ u64 features;
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/devices/virtio/virtio.c b/src/vnet/devices/virtio/virtio.c
index 17de781921d..94f140dac97 100644
--- a/src/vnet/devices/virtio/virtio.c
+++ b/src/vnet/devices/virtio/virtio.c
@@ -26,11 +26,13 @@
#include <sys/eventfd.h>
#include <vlib/vlib.h>
+#include <vlib/pci/pci.h>
#include <vlib/unix/unix.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/ip/ip4_packet.h>
#include <vnet/ip/ip6_packet.h>
#include <vnet/devices/virtio/virtio.h>
+#include <vnet/devices/virtio/pci.h>
virtio_main_t virtio_main;
@@ -101,6 +103,20 @@ virtio_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 idx, u16 sz)
ASSERT (vring->buffers == 0);
vec_validate_aligned (vring->buffers, sz, CLIB_CACHE_LINE_BYTES);
+ ASSERT (vring->indirect_buffers == 0);
+ vec_validate_aligned (vring->indirect_buffers, sz, CLIB_CACHE_LINE_BYTES);
+ if (idx % 2)
+ {
+ u32 n_alloc = 0;
+ do
+ {
+ if (n_alloc < sz)
+ n_alloc =
+ vlib_buffer_alloc (vm, vring->indirect_buffers + n_alloc,
+ sz - n_alloc);
+ }
+ while (n_alloc != sz);
+ }
vring->size = sz;
vring->call_fd = eventfd (0, EFD_NONBLOCK | EFD_CLOEXEC);
@@ -136,7 +152,7 @@ error:
return err;
}
-static_always_inline void
+inline void
virtio_free_rx_buffers (vlib_main_t * vm, virtio_vring_t * vring)
{
u16 used = vring->desc_in_use;
@@ -171,10 +187,157 @@ virtio_vring_free (vlib_main_t * vm, virtio_if_t * vif, u32 idx)
clib_mem_free (vring->desc);
if (vring->avail)
clib_mem_free (vring->avail);
+ if (vring->queue_id % 2)
+ {
+ vlib_buffer_free_no_next (vm, vring->indirect_buffers, vring->size);
+ }
vec_free (vring->buffers);
+ vec_free (vring->indirect_buffers);
return 0;
}
+inline void
+virtio_set_net_hdr_size (virtio_if_t * vif)
+{
+ if (vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_MRG_RXBUF) ||
+ vif->features & VIRTIO_FEATURE (VIRTIO_F_VERSION_1))
+ vif->virtio_net_hdr_sz = sizeof (struct virtio_net_hdr_v1);
+ else
+ vif->virtio_net_hdr_sz = sizeof (struct virtio_net_hdr);
+}
+
+inline void
+virtio_show (vlib_main_t * vm, u32 * hw_if_indices, u8 show_descr, u32 type)
+{
+ u32 i, j, hw_if_index;
+ virtio_if_t *vif;
+ vnet_main_t *vnm = &vnet_main;
+ virtio_main_t *mm = &virtio_main;
+ virtio_vring_t *vring;
+ struct feat_struct
+ {
+ u8 bit;
+ char *str;
+ };
+ struct feat_struct *feat_entry;
+
+ static struct feat_struct feat_array[] = {
+#define _(s,b) { .str = #s, .bit = b, },
+ foreach_virtio_net_features
+#undef _
+ {.str = NULL}
+ };
+
+ struct feat_struct *flag_entry;
+ static struct feat_struct flags_array[] = {
+#define _(b,e,s) { .bit = b, .str = s, },
+ foreach_virtio_if_flag
+#undef _
+ {.str = NULL}
+ };
+
+ if (!hw_if_indices)
+ return;
+
+ for (hw_if_index = 0; hw_if_index < vec_len (hw_if_indices); hw_if_index++)
+ {
+ vnet_hw_interface_t *hi =
+ vnet_get_hw_interface (vnm, hw_if_indices[hw_if_index]);
+ vif = pool_elt_at_index (mm->interfaces, hi->dev_instance);
+ if (vif->type != type)
+ continue;
+ vlib_cli_output (vm, "Interface: %U (ifindex %d)",
+ format_vnet_hw_if_index_name, vnm,
+ hw_if_indices[hw_if_index], vif->hw_if_index);
+ if (type == VIRTIO_IF_TYPE_PCI)
+ {
+ vlib_cli_output (vm, " PCI Address: %U", format_vlib_pci_addr,
+ &vif->pci_addr);
+ }
+ if (type == VIRTIO_IF_TYPE_TAP)
+ {
+ if (vif->host_if_name)
+ vlib_cli_output (vm, " name \"%s\"", vif->host_if_name);
+ if (vif->net_ns)
+ vlib_cli_output (vm, " host-ns \"%s\"", vif->net_ns);
+ vlib_cli_output (vm, " fd %d", vif->fd);
+ vlib_cli_output (vm, " tap-fd %d", vif->tap_fd);
+ }
+ vlib_cli_output (vm, " Mac Address: %U", format_ethernet_address,
+ vif->mac_addr);
+ vlib_cli_output (vm, " Device instance: %u", vif->dev_instance);
+ vlib_cli_output (vm, " flags 0x%x", vif->flags);
+ flag_entry = (struct feat_struct *) &flags_array;
+ while (flag_entry->str)
+ {
+ if (vif->flags & (1ULL << flag_entry->bit))
+ vlib_cli_output (vm, " %s (%d)", flag_entry->str,
+ flag_entry->bit);
+ flag_entry++;
+ }
+ if (type == VIRTIO_IF_TYPE_PCI)
+ {
+ device_status (vm, vif);
+ }
+ vlib_cli_output (vm, " features 0x%lx", vif->features);
+ feat_entry = (struct feat_struct *) &feat_array;
+ while (feat_entry->str)
+ {
+ if (vif->features & (1ULL << feat_entry->bit))
+ vlib_cli_output (vm, " %s (%d)", feat_entry->str,
+ feat_entry->bit);
+ feat_entry++;
+ }
+ vlib_cli_output (vm, " remote-features 0x%lx", vif->remote_features);
+ feat_entry = (struct feat_struct *) &feat_array;
+ while (feat_entry->str)
+ {
+ if (vif->remote_features & (1ULL << feat_entry->bit))
+ vlib_cli_output (vm, " %s (%d)", feat_entry->str,
+ feat_entry->bit);
+ feat_entry++;
+ }
+ vec_foreach_index (i, vif->vrings)
+ {
+ // RX = 0, TX = 1
+ vring = vec_elt_at_index (vif->vrings, i);
+ vlib_cli_output (vm, " Virtqueue (%s)", (i & 1) ? "TX" : "RX");
+ vlib_cli_output (vm,
+ " qsz %d, last_used_idx %d, desc_next %d, desc_in_use %d",
+ vring->size, vring->last_used_idx, vring->desc_next,
+ vring->desc_in_use);
+ vlib_cli_output (vm,
+ " avail.flags 0x%x avail.idx %d used.flags 0x%x used.idx %d",
+ vring->avail->flags, vring->avail->idx,
+ vring->used->flags, vring->used->idx);
+ if (type == VIRTIO_IF_TYPE_TAP)
+ {
+ vlib_cli_output (vm, " kickfd %d, callfd %d", vring->kick_fd,
+ vring->call_fd);
+ }
+ if (show_descr)
+ {
+ vlib_cli_output (vm, "\n descriptor table:\n");
+ vlib_cli_output (vm,
+ " id addr len flags next user_addr\n");
+ vlib_cli_output (vm,
+ " ===== ================== ===== ====== ===== ==================\n");
+ vring = vif->vrings;
+ for (j = 0; j < vring->size; j++)
+ {
+ struct vring_desc *desc = &vring->desc[j];
+ vlib_cli_output (vm,
+ " %-5d 0x%016lx %-5d 0x%04x %-5d 0x%016lx\n",
+ j, desc->addr,
+ desc->len,
+ desc->flags, desc->next, desc->addr);
+ }
+ }
+ }
+ }
+
+}
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/vnet/devices/virtio/virtio.h b/src/vnet/devices/virtio/virtio.h
index 841441bd0c6..f4a8103a0ab 100644
--- a/src/vnet/devices/virtio/virtio.h
+++ b/src/vnet/devices/virtio/virtio.h
@@ -18,6 +18,11 @@
#ifndef _VNET_DEVICES_VIRTIO_VIRTIO_H_
#define _VNET_DEVICES_VIRTIO_VIRTIO_H_
+#include <linux/virtio_config.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_pci.h>
+#include <linux/virtio_ring.h>
+
#define foreach_virtio_net_features \
_ (VIRTIO_NET_F_CSUM, 0) /* Host handles pkts w/ partial csum */ \
_ (VIRTIO_NET_F_GUEST_CSUM, 1) /* Guest handles pkts w/ partial csum */ \
@@ -53,6 +58,7 @@
_ (VHOST_USER_F_PROTOCOL_FEATURES, 30) \
_ (VIRTIO_F_VERSION_1, 32)
+
#define foreach_virtio_if_flag \
_(0, ADMIN_UP, "admin-up") \
_(1, DELETING, "deleting")
@@ -64,15 +70,31 @@ typedef enum
#undef _
} virtio_if_flag_t;
+#define VIRTIO_NUM_RX_DESC 256
+#define VIRTIO_NUM_TX_DESC 256
+
+#define VIRTIO_FEATURE(X) (1ULL << X)
+
typedef enum
{
VIRTIO_IF_TYPE_TAP,
+ VIRTIO_IF_TYPE_PCI,
VIRTIO_IF_N_TYPES,
} virtio_if_type_t;
typedef struct
{
+ u8 mac[6];
+ u16 status;
+ u16 max_virtqueue_pairs;
+ u16 mtu;
+} virtio_net_config_t;
+
+#define VIRTIO_RING_FLAG_MASK_INT 1
+
+typedef struct
+{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
struct vring_desc *desc;
struct vring_used *used;
@@ -82,33 +104,62 @@ typedef struct
int kick_fd;
int call_fd;
u16 size;
-#define VIRTIO_RING_FLAG_MASK_INT 1
- u32 flags;
+ u16 queue_id;
+ u16 flags;
u32 call_file_index;
u32 *buffers;
+ u32 *indirect_buffers;
u16 last_used_idx;
u16 last_kick_avail_idx;
} virtio_vring_t;
+typedef union
+{
+ struct
+ {
+ u16 domain;
+ u8 bus;
+ u8 slot:5;
+ u8 function:3;
+ };
+ u32 as_u32;
+} pci_addr_t;
+
typedef struct
{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
u32 flags;
clib_spinlock_t lockp;
- u32 id;
u32 dev_instance;
u32 hw_if_index;
u32 sw_if_index;
+ u16 virtio_net_hdr_sz;
+ virtio_if_type_t type;
+ union
+ {
+ u32 id;
+ pci_addr_t pci_addr;
+ };
u32 per_interface_next_index;
int fd;
- int tap_fd;
+ union
+ {
+ int tap_fd;
+ u32 pci_dev_handle;
+ };
virtio_vring_t *vrings;
u64 features, remote_features;
- virtio_if_type_t type;
+ /* error */
+ clib_error_t *error;
+ u16 max_queue_pairs;
u16 tx_ring_sz;
u16 rx_ring_sz;
+ u8 status;
+ u8 mac_addr[6];
+ u64 bar[2];
u8 *host_if_name;
u8 *net_ns;
u8 *host_bridge;
@@ -135,17 +186,27 @@ clib_error_t *virtio_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 idx,
clib_error_t *virtio_vring_free (vlib_main_t * vm, virtio_if_t * vif,
u32 idx);
extern void virtio_free_used_desc (vlib_main_t * vm, virtio_vring_t * vring);
-
+extern void virtio_free_rx_buffers (vlib_main_t * vm, virtio_vring_t * vring);
+extern void virtio_set_net_hdr_size (virtio_if_t * vif);
+extern void virtio_show (vlib_main_t * vm, u32 * hw_if_indices, u8 show_descr,
+ u32 type);
+extern void virtio_pci_legacy_notify_queue (vlib_main_t * vm,
+ virtio_if_t * vif, u16 queue_id);
format_function_t format_virtio_device_name;
static_always_inline void
-virtio_kick (virtio_vring_t * vring)
+virtio_kick (vlib_main_t * vm, virtio_vring_t * vring, virtio_if_t * vif)
{
- u64 x = 1;
- int __clib_unused r;
-
- r = write (vring->kick_fd, &x, sizeof (x));
- vring->last_kick_avail_idx = vring->avail->idx;
+ if (vif->type == VIRTIO_IF_TYPE_PCI)
+ virtio_pci_legacy_notify_queue (vm, vif, vring->queue_id);
+ else
+ {
+ u64 x = 1;
+ int __clib_unused r;
+
+ r = write (vring->kick_fd, &x, sizeof (x));
+ vring->last_kick_avail_idx = vring->avail->idx;
+ }
}
#endif /* _VNET_DEVICES_VIRTIO_VIRTIO_H_ */
diff --git a/src/vnet/devices/virtio/virtio_api.c b/src/vnet/devices/virtio/virtio_api.c
new file mode 100644
index 00000000000..6f70b090034
--- /dev/null
+++ b/src/vnet/devices/virtio/virtio_api.c
@@ -0,0 +1,237 @@
+/*
+ *------------------------------------------------------------------
+ * virtio_api.c - vnet virtio pci device driver API support
+ *
+ * Copyright (c) 2018 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+#include <vnet/ip/ip.h>
+#include <vnet/devices/virtio/virtio.h>
+#include <vnet/devices/virtio/pci.h>
+
+#include <vnet/vnet_msg_enum.h>
+
+#define vl_typedefs /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vnet/vnet_all_api_h.h>
+#undef vl_printfun
+
+#include <vlibapi/api_helper_macros.h>
+
+#define foreach_virtio_pci_api_msg \
+_(VIRTIO_PCI_CREATE, virtio_pci_create) \
+_(VIRTIO_PCI_DELETE, virtio_pci_delete) \
+_(SW_INTERFACE_VIRTIO_PCI_DUMP, sw_interface_virtio_pci_dump)
+
+static void
+vl_api_virtio_pci_create_t_handler (vl_api_virtio_pci_create_t * mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_virtio_pci_create_reply_t *rmp;
+ vl_api_registration_t *reg;
+ virtio_pci_create_if_args_t _a, *ap = &_a;
+
+ clib_memset (ap, 0, sizeof (*ap));
+
+ ap->addr = ntohl (mp->pci_addr);
+ if (!mp->use_random_mac)
+ {
+ clib_memcpy (ap->mac_addr, mp->mac_address, 6);
+ ap->mac_addr_set = 1;
+ }
+ ap->rxq_size = ntohs (mp->rx_ring_sz);
+ ap->txq_size = ntohs (mp->tx_ring_sz);
+ ap->sw_if_index = (u32) ~ 0;
+ ap->features = clib_net_to_host_u64 (mp->features);
+
+ virtio_pci_create_if (vm, ap);
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ rmp->_vl_msg_id = htons (VL_API_VIRTIO_PCI_CREATE_REPLY);
+ rmp->context = mp->context;
+ rmp->retval = htonl (ap->rv);
+ rmp->sw_if_index = htonl (ap->sw_if_index);
+
+ vl_api_send_msg (reg, (u8 *) rmp);
+}
+
+static void
+virtio_pci_send_sw_interface_event_deleted (vpe_api_main_t * am,
+ vl_api_registration_t * reg,
+ u32 sw_if_index)
+{
+ vl_api_sw_interface_event_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ clib_memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = htons (VL_API_SW_INTERFACE_EVENT);
+ mp->sw_if_index = htonl (sw_if_index);
+
+ mp->admin_up_down = 0;
+ mp->link_up_down = 0;
+ mp->deleted = 1;
+ vl_api_send_msg (reg, (u8 *) mp);
+}
+
+static void
+vl_api_virtio_pci_delete_t_handler (vl_api_virtio_pci_delete_t * mp)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vlib_main_t *vm = vlib_get_main ();
+ virtio_main_t *vmx = &virtio_main;
+ int rv = 0;
+ vnet_hw_interface_t *hw;
+ virtio_if_t *vif;
+ vpe_api_main_t *vam = &vpe_api_main;
+ vl_api_virtio_pci_delete_reply_t *rmp;
+ vl_api_registration_t *reg;
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+
+ hw = vnet_get_sup_hw_interface (vnm, htonl (mp->sw_if_index));
+ if (hw == NULL || virtio_device_class.index != hw->dev_class_index)
+ {
+ rv = VNET_API_ERROR_INVALID_INTERFACE;
+ goto reply;
+ }
+
+ vif = pool_elt_at_index (vmx->interfaces, hw->dev_instance);
+
+ rv = virtio_pci_delete_if (vm, vif);
+
+reply:
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ rmp->_vl_msg_id = htons (VL_API_VIRTIO_PCI_DELETE_REPLY);
+ rmp->context = mp->context;
+ rmp->retval = htonl (rv);
+
+ vl_api_send_msg (reg, (u8 *) rmp);
+
+ if (!rv)
+ {
+ virtio_pci_send_sw_interface_event_deleted (vam, reg, sw_if_index);
+ }
+}
+
+static void
+virtio_pci_send_sw_interface_details (vpe_api_main_t * am,
+ vl_api_registration_t * reg,
+ virtio_if_t * vif, u32 context)
+{
+ vl_api_sw_interface_virtio_pci_details_t *mp;
+ mp = vl_msg_api_alloc (sizeof (*mp));
+
+ clib_memset (mp, 0, sizeof (*mp));
+
+ mp->_vl_msg_id = htons (VL_API_SW_INTERFACE_VIRTIO_PCI_DETAILS);
+ mp->pci_addr = htonl (vif->pci_addr.as_u32);
+ mp->sw_if_index = htonl (vif->sw_if_index);
+ mp->rx_ring_sz = htons (vif->rx_ring_sz);
+ mp->tx_ring_sz = htons (vif->tx_ring_sz);
+ clib_memcpy (mp->mac_addr, vif->mac_addr, 6);
+ mp->features = clib_host_to_net_u64 (vif->features);
+
+ mp->context = context;
+ vl_api_send_msg (reg, (u8 *) mp);
+}
+
+static void
+ vl_api_sw_interface_virtio_pci_dump_t_handler
+ (vl_api_sw_interface_virtio_pci_dump_t * mp)
+{
+ vpe_api_main_t *am = &vpe_api_main;
+ vl_api_registration_t *reg;
+ virtio_main_t *vmx = &virtio_main;
+ virtio_if_t *vif;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+
+ pool_foreach (vif, vmx->interfaces, (
+ {
+ if (vif->type == VIRTIO_IF_TYPE_PCI)
+ {
+ virtio_pci_send_sw_interface_details
+ (am, reg, vif, mp->context);}
+ }
+ ));
+}
+
+#define vl_msg_name_crc_list
+#include <vnet/vnet_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (api_main_t * am)
+{
+#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id);
+ foreach_vl_msg_name_crc_virtio;
+#undef _
+}
+
+static clib_error_t *
+virtio_pci_api_hookup (vlib_main_t * vm)
+{
+ api_main_t *am = &api_main;
+
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_virtio_pci_api_msg;
+#undef _
+
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ setup_message_id_table (am);
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (virtio_pci_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */