aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorMohsin Kazmi <sykazmi@cisco.com>2019-04-02 11:45:08 +0000
committerDamjan Marion <dmarion@me.com>2019-04-03 14:42:37 +0000
commit09a3bc50b581c72693ff6270da20a68f5781a468 (patch)
treeff87100fec6d34d0bb2301445dcd4c7024884cd7 /src
parent692f9b1205be8e61c0782b0711ec2393f8203e3e (diff)
virtio: Add support for multiqueue
Change-Id: Id71ffa77e977651f219ac09d1feef334851209e1 Signed-off-by: Mohsin Kazmi <sykazmi@cisco.com>
Diffstat (limited to 'src')
-rw-r--r--src/vnet/devices/tap/tap.c39
-rw-r--r--src/vnet/devices/virtio/device.c13
-rw-r--r--src/vnet/devices/virtio/node.c14
-rw-r--r--src/vnet/devices/virtio/pci.c347
-rw-r--r--src/vnet/devices/virtio/virtio.c140
-rw-r--r--src/vnet/devices/virtio/virtio.h22
-rw-r--r--src/vnet/devices/virtio/virtio_api.c6
7 files changed, 493 insertions, 88 deletions
diff --git a/src/vnet/devices/tap/tap.c b/src/vnet/devices/tap/tap.c
index 2649f68713e..388cfa34c37 100644
--- a/src/vnet/devices/tap/tap.c
+++ b/src/vnet/devices/tap/tap.c
@@ -334,23 +334,25 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args)
vhost_mem->regions[0].memory_size = (1ULL << 47) - 4096;
_IOCTL (vif->fd, VHOST_SET_MEM_TABLE, vhost_mem);
- if ((args->error = virtio_vring_init (vm, vif, 0, args->rx_ring_sz)))
+ if ((args->error =
+ virtio_vring_init (vm, vif, RX_QUEUE (0), args->rx_ring_sz)))
{
args->rv = VNET_API_ERROR_INIT_FAILED;
goto error;
}
+ vif->num_rxqs = 1;
- if ((args->error = virtio_vring_init (vm, vif, 1, args->tx_ring_sz)))
+ if ((args->error =
+ virtio_vring_init (vm, vif, TX_QUEUE (0), args->tx_ring_sz)))
{
args->rv = VNET_API_ERROR_INIT_FAILED;
goto error;
}
+ vif->num_txqs = 1;
if (!args->mac_addr_set)
ethernet_mac_address_generate (args->mac_addr);
- vif->rx_ring_sz = args->rx_ring_sz != 0 ? args->rx_ring_sz : 256;
- vif->tx_ring_sz = args->tx_ring_sz != 0 ? args->tx_ring_sz : 256;
clib_memcpy (vif->mac_addr, args->mac_addr, 6);
vif->host_if_name = args->host_if_name;
@@ -396,10 +398,12 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args)
vnet_hw_interface_set_rx_mode (vnm, vif->hw_if_index, 0,
VNET_HW_INTERFACE_RX_MODE_DEFAULT);
vif->per_interface_next_index = ~0;
- virtio_vring_set_numa_node (vm, vif, 0);
+ virtio_vring_set_numa_node (vm, vif, RX_QUEUE (0));
vif->flags |= VIRTIO_IF_FLAG_ADMIN_UP;
vnet_hw_interface_set_flags (vnm, vif->hw_if_index,
VNET_HW_INTERFACE_FLAG_LINK_UP);
+ vif->cxq_vring = NULL;
+
if (thm->n_vlib_mains > 1)
clib_spinlock_init (&vif->lockp);
goto done;
@@ -415,8 +419,12 @@ error:
close (vif->tap_fd);
if (vif->fd != -1)
close (vif->fd);
- vec_foreach_index (i, vif->vrings) virtio_vring_free (vm, vif, i);
- vec_free (vif->vrings);
+ vec_foreach_index (i, vif->rxq_vrings) virtio_vring_free_rx (vm, vif,
+ RX_QUEUE (i));
+ vec_foreach_index (i, vif->txq_vrings) virtio_vring_free_tx (vm, vif,
+ TX_QUEUE (i));
+ vec_free (vif->rxq_vrings);
+ vec_free (vif->txq_vrings);
clib_memset (vif, 0, sizeof (virtio_if_t));
pool_put (vim->interfaces, vif);
@@ -455,7 +463,7 @@ tap_delete_if (vlib_main_t * vm, u32 sw_if_index)
/* bring down the interface */
vnet_hw_interface_set_flags (vnm, vif->hw_if_index, 0);
vnet_sw_interface_set_flags (vnm, vif->sw_if_index, 0);
- vnet_hw_interface_unassign_rx_thread (vnm, vif->hw_if_index, 0);
+ vnet_hw_interface_unassign_rx_thread (vnm, vif->hw_if_index, RX_QUEUE (0));
ethernet_delete_interface (vnm, vif->hw_if_index);
vif->hw_if_index = ~0;
@@ -465,8 +473,12 @@ tap_delete_if (vlib_main_t * vm, u32 sw_if_index)
if (vif->fd != -1)
close (vif->fd);
- vec_foreach_index (i, vif->vrings) virtio_vring_free (vm, vif, i);
- vec_free (vif->vrings);
+ vec_foreach_index (i, vif->rxq_vrings) virtio_vring_free_rx (vm, vif,
+ RX_QUEUE (i));
+ vec_foreach_index (i, vif->txq_vrings) virtio_vring_free_tx (vm, vif,
+ TX_QUEUE (i));
+ vec_free (vif->rxq_vrings);
+ vec_free (vif->txq_vrings);
tm->tap_ids = clib_bitmap_set (tm->tap_ids, vif->id, 0);
clib_spinlock_free (&vif->lockp);
@@ -528,6 +540,7 @@ tap_dump_ifs (tap_interface_details_t ** out_tapids)
vnet_main_t *vnm = vnet_get_main ();
virtio_main_t *mm = &virtio_main;
virtio_if_t *vif;
+ virtio_vring_t *vring;
vnet_hw_interface_t *hi;
tap_interface_details_t *r_tapids = NULL;
tap_interface_details_t *tapid = NULL;
@@ -544,8 +557,10 @@ tap_dump_ifs (tap_interface_details_t ** out_tapids)
clib_memcpy(tapid->dev_name, hi->name,
MIN (ARRAY_LEN (tapid->dev_name) - 1,
strlen ((const char *) hi->name)));
- tapid->rx_ring_sz = vif->rx_ring_sz;
- tapid->tx_ring_sz = vif->tx_ring_sz;
+ vring = vec_elt_at_index (vif->rxq_vrings, RX_QUEUE_ACCESS(0));
+ tapid->rx_ring_sz = vring->size;
+ vring = vec_elt_at_index (vif->txq_vrings, TX_QUEUE_ACCESS(0));
+ tapid->tx_ring_sz = vring->size;
clib_memcpy(tapid->host_mac_addr, vif->host_mac_addr, 6);
if (vif->host_if_name)
{
diff --git a/src/vnet/devices/virtio/device.c b/src/vnet/devices/virtio/device.c
index dbf560688b8..04cf9ec49c8 100644
--- a/src/vnet/devices/virtio/device.c
+++ b/src/vnet/devices/virtio/device.c
@@ -245,15 +245,16 @@ virtio_interface_tx_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame, virtio_if_t * vif,
int do_gso)
{
- u8 qid = 0;
u16 n_left = frame->n_vectors;
- virtio_vring_t *vring = vec_elt_at_index (vif->vrings, (qid << 1) + 1);
+ virtio_vring_t *vring;
+ u16 qid = vm->thread_index % vif->num_txqs;
+ vring = vec_elt_at_index (vif->txq_vrings, qid);
u16 used, next, avail;
u16 sz = vring->size;
u16 mask = sz - 1;
u32 *buffers = vlib_frame_vector_args (frame);
- clib_spinlock_lock_if_init (&vif->lockp);
+ clib_spinlock_lock_if_init (&vring->lockp);
if ((vring->used->flags & VIRTIO_RING_FLAG_MASK_INT) == 0 &&
(vring->last_kick_avail_idx != vring->avail->idx))
@@ -298,7 +299,7 @@ virtio_interface_tx_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_buffer_free (vm, buffers, n_left);
}
- clib_spinlock_unlock_if_init (&vif->lockp);
+ clib_spinlock_unlock_if_init (&vring->lockp);
return frame->n_vectors - n_left;
}
@@ -310,8 +311,8 @@ VNET_DEVICE_CLASS_TX_FN (virtio_device_class) (vlib_main_t * vm,
virtio_main_t *nm = &virtio_main;
vnet_interface_output_runtime_t *rund = (void *) node->runtime_data;
virtio_if_t *vif = pool_elt_at_index (nm->interfaces, rund->dev_instance);
-
vnet_main_t *vnm = vnet_get_main ();
+
if (vnm->interface_main.gso_interface_count > 0)
return virtio_interface_tx_inline (vm, node, frame, vif, 1 /* do_gso */ );
else
@@ -352,7 +353,7 @@ virtio_interface_rx_mode_change (vnet_main_t * vnm, u32 hw_if_index, u32 qid,
virtio_main_t *mm = &virtio_main;
vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
virtio_if_t *vif = pool_elt_at_index (mm->interfaces, hw->dev_instance);
- virtio_vring_t *vring = vec_elt_at_index (vif->vrings, qid);
+ virtio_vring_t *vring = vec_elt_at_index (vif->rxq_vrings, qid);
if (vif->type == VIRTIO_IF_TYPE_PCI && !(vif->support_int_mode))
{
diff --git a/src/vnet/devices/virtio/node.c b/src/vnet/devices/virtio/node.c
index b2f8d04861e..686d90c3d00 100644
--- a/src/vnet/devices/virtio/node.c
+++ b/src/vnet/devices/virtio/node.c
@@ -225,7 +225,7 @@ virtio_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
vnet_main_t *vnm = vnet_get_main ();
u32 thread_index = vm->thread_index;
uword n_trace = vlib_get_trace_count (vm, node);
- virtio_vring_t *vring = vec_elt_at_index (vif->vrings, 0);
+ virtio_vring_t *vring = vec_elt_at_index (vif->rxq_vrings, qid);
u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
const int hdr_sz = vif->virtio_net_hdr_sz;
u32 *to_next = 0;
@@ -364,15 +364,15 @@ VLIB_NODE_FN (virtio_input_node) (vlib_main_t * vm,
foreach_device_and_queue (dq, rt->devices_and_queues)
{
- virtio_if_t *mif;
- mif = vec_elt_at_index (nm->interfaces, dq->dev_instance);
- if (mif->flags & VIRTIO_IF_FLAG_ADMIN_UP)
+ virtio_if_t *vif;
+ vif = vec_elt_at_index (nm->interfaces, dq->dev_instance);
+ if (vif->flags & VIRTIO_IF_FLAG_ADMIN_UP)
{
- if (mif->gso_enabled)
- n_rx += virtio_device_input_inline (vm, node, frame, mif,
+ if (vif->gso_enabled)
+ n_rx += virtio_device_input_inline (vm, node, frame, vif,
dq->queue_id, 1);
else
- n_rx += virtio_device_input_inline (vm, node, frame, mif,
+ n_rx += virtio_device_input_inline (vm, node, frame, vif,
dq->queue_id, 0);
}
}
diff --git a/src/vnet/devices/virtio/pci.c b/src/vnet/devices/virtio/pci.c
index 020b088d346..dd2e1dad507 100644
--- a/src/vnet/devices/virtio/pci.c
+++ b/src/vnet/devices/virtio/pci.c
@@ -272,7 +272,7 @@ virtio_pci_set_mac (vlib_main_t * vm, virtio_if_t * vif)
static u32
virtio_pci_get_mac (vlib_main_t * vm, virtio_if_t * vif)
{
- if (vif->remote_features & VIRTIO_FEATURE (VIRTIO_NET_F_MAC))
+ if (vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_MAC))
{
virtio_pci_legacy_read_config (vm, vif, vif->mac_addr,
sizeof (vif->mac_addr), 0);
@@ -288,7 +288,7 @@ virtio_pci_is_link_up (vlib_main_t * vm, virtio_if_t * vif)
* Minimal driver: assumes link is up
*/
u16 status = 1;
- if (vif->remote_features & VIRTIO_FEATURE (VIRTIO_NET_F_STATUS))
+ if (vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_STATUS))
virtio_pci_legacy_read_config (vm, vif, &status, sizeof (status), /* mac */
6);
return status;
@@ -444,6 +444,118 @@ debug_device_config_space (vlib_main_t * vm, virtio_if_t * vif)
}
}
+struct virtio_ctrl_mq_status_hdr
+{
+ struct virtio_net_ctrl_hdr ctrl;
+ struct virtio_net_ctrl_mq num_mqs;
+ virtio_net_ctrl_ack status;
+};
+
+static int
+virtio_pci_enable_multiqueue (vlib_main_t * vm, virtio_if_t * vif,
+ u16 num_queues)
+{
+ virtio_main_t *vim = &virtio_main;
+ virtio_vring_t *vring = vif->cxq_vring;
+ u32 buffer_index;
+ vlib_buffer_t *b;
+ u16 used, next, avail;
+ u16 sz = vring->size;
+ u16 mask = sz - 1;
+ struct virtio_ctrl_mq_status_hdr mq_hdr, result;
+ virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
+
+ mq_hdr.ctrl.class = VIRTIO_NET_CTRL_MQ;
+ mq_hdr.ctrl.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET;
+ mq_hdr.status = VIRTIO_NET_ERR;
+ mq_hdr.num_mqs.virtqueue_pairs = num_queues;
+
+ used = vring->desc_in_use;
+ next = vring->desc_next;
+ avail = vring->avail->idx;
+ struct vring_desc *d = &vring->desc[next];
+
+ if (vlib_buffer_alloc (vm, &buffer_index, 1))
+ b = vlib_get_buffer (vm, buffer_index);
+ else
+ return VIRTIO_NET_ERR;
+ /*
+ * current_data may not be initialized with 0 and may contain
+ * previous offset.
+ */
+ b->current_data = 0;
+ clib_memcpy (vlib_buffer_get_current (b), &mq_hdr,
+ sizeof (struct virtio_ctrl_mq_status_hdr));
+ d->flags = VRING_DESC_F_NEXT;
+ d->addr = vlib_buffer_get_current_pa (vm, b);
+ d->len = sizeof (struct virtio_net_ctrl_hdr);
+ vring->avail->ring[avail & mask] = next;
+ avail++;
+ next = (next + 1) & mask;
+ d->next = next;
+ used++;
+
+ d = &vring->desc[next];
+ d->flags = VRING_DESC_F_NEXT;
+ d->addr = vlib_buffer_get_current_pa (vm, b) +
+ STRUCT_OFFSET_OF (struct virtio_ctrl_mq_status_hdr, num_mqs);
+ d->len = sizeof (struct virtio_net_ctrl_mq);
+ next = (next + 1) & mask;
+ d->next = next;
+ used++;
+
+ d = &vring->desc[next];
+ d->flags = VRING_DESC_F_WRITE;
+ d->addr = vlib_buffer_get_current_pa (vm, b) +
+ STRUCT_OFFSET_OF (struct virtio_ctrl_mq_status_hdr, status);
+ d->len = sizeof (mq_hdr.status);
+ next = (next + 1) & mask;
+ used++;
+
+ CLIB_MEMORY_STORE_BARRIER ();
+ vring->avail->idx = avail;
+ vring->desc_next = next;
+ vring->desc_in_use = used;
+
+ if ((vring->used->flags & VIRTIO_RING_FLAG_MASK_INT) == 0)
+ {
+ virtio_kick (vm, vring, vif);
+ }
+
+ clib_memset (&result, 0, sizeof (result));
+ u16 last = vring->last_used_idx, n_left = 0;
+ n_left = vring->used->idx - last;
+
+ while (n_left)
+ {
+ struct vring_used_elem *e = &vring->used->ring[last & mask];
+ u16 slot = e->id;
+
+ d = &vring->desc[slot];
+ while (d->flags & VRING_DESC_F_NEXT)
+ {
+ used--;
+ slot = d->next;
+ d = &vring->desc[slot];
+ }
+ used--;
+ last++;
+ n_left--;
+ }
+ vring->desc_in_use = used;
+ vring->last_used_idx = last;
+
+ CLIB_MEMORY_BARRIER ();
+ clib_memcpy (&result, vlib_buffer_get_current (b),
+ sizeof (struct virtio_ctrl_mq_status_hdr));
+
+ virtio_log_debug (vim, vif, "multi-queue enable status on Ctrl queue : %u",
+ result.status);
+ status = result.status;
+ vlib_buffer_free (vm, &buffer_index, 1);
+ return status;
+}
+
static u8
virtio_pci_queue_size_valid (u16 qsz)
{
@@ -455,16 +567,18 @@ virtio_pci_queue_size_valid (u16 qsz)
}
clib_error_t *
-virtio_pci_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 idx)
+virtio_pci_control_vring_init (vlib_main_t * vm, virtio_if_t * vif,
+ u16 queue_num)
{
clib_error_t *error = 0;
+ virtio_main_t *vim = &virtio_main;
u16 queue_size = 0;
virtio_vring_t *vring;
struct vring vr;
u32 i = 0;
- void *ptr;
+ void *ptr = NULL;
- queue_size = virtio_pci_legacy_get_queue_num (vm, vif, idx);
+ queue_size = virtio_pci_legacy_get_queue_num (vm, vif, queue_num);
if (!virtio_pci_queue_size_valid (queue_size))
clib_warning ("queue size is not valid");
@@ -477,18 +591,87 @@ virtio_pci_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 idx)
if (queue_size == 0)
queue_size = 256;
- vec_validate_aligned (vif->vrings, idx, CLIB_CACHE_LINE_BYTES);
- vring = vec_elt_at_index (vif->vrings, idx);
+ vec_validate_aligned (vif->cxq_vring, 0, CLIB_CACHE_LINE_BYTES);
+ vring = vec_elt_at_index (vif->cxq_vring, 0);
+ i = vring_size (queue_size, VIRTIO_PCI_VRING_ALIGN);
+ i = round_pow2 (i, VIRTIO_PCI_VRING_ALIGN);
+ ptr =
+ vlib_physmem_alloc_aligned_on_numa (vm, i, VIRTIO_PCI_VRING_ALIGN,
+ vif->numa_node);
+ if (!ptr)
+ return vlib_physmem_last_error (vm);
+ clib_memset (ptr, 0, i);
+ vring_init (&vr, queue_size, ptr, VIRTIO_PCI_VRING_ALIGN);
+ vring->desc = vr.desc;
+ vring->avail = vr.avail;
+ vring->used = vr.used;
+ vring->queue_id = queue_num;
+ vring->avail->flags = VIRTIO_RING_FLAG_MASK_INT;
+
+ ASSERT (vring->buffers == 0);
+
+ vring->size = queue_size;
+ virtio_log_debug (vim, vif, "control-queue: number %u, size %u", queue_num,
+ queue_size);
+ virtio_pci_legacy_setup_queue (vm, vif, queue_num, ptr);
+ vring->kick_fd = -1;
+
+ return error;
+}
+
+clib_error_t *
+virtio_pci_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 queue_num)
+{
+ clib_error_t *error = 0;
+ virtio_main_t *vim = &virtio_main;
+ vlib_thread_main_t *vtm = vlib_get_thread_main ();
+ u16 queue_size = 0;
+ virtio_vring_t *vring;
+ struct vring vr;
+ u32 i = 0;
+ void *ptr = NULL;
+
+ queue_size = virtio_pci_legacy_get_queue_num (vm, vif, queue_num);
+ if (!virtio_pci_queue_size_valid (queue_size))
+ clib_warning ("queue size is not valid");
+
+ if (!is_pow2 (queue_size))
+ return clib_error_return (0, "ring size must be power of 2");
+
+ if (queue_size > 32768)
+ return clib_error_return (0, "ring size must be 32768 or lower");
+
+ if (queue_size == 0)
+ queue_size = 256;
+ if (queue_num % 2)
+ {
+ if (TX_QUEUE_ACCESS (queue_num) > vtm->n_vlib_mains)
+ return error;
+ vec_validate_aligned (vif->txq_vrings, TX_QUEUE_ACCESS (queue_num),
+ CLIB_CACHE_LINE_BYTES);
+ vring = vec_elt_at_index (vif->txq_vrings, TX_QUEUE_ACCESS (queue_num));
+ clib_spinlock_init (&vring->lockp);
+ }
+ else
+ {
+ vec_validate_aligned (vif->rxq_vrings, RX_QUEUE_ACCESS (queue_num),
+ CLIB_CACHE_LINE_BYTES);
+ vring = vec_elt_at_index (vif->rxq_vrings, RX_QUEUE_ACCESS (queue_num));
+ }
i = vring_size (queue_size, VIRTIO_PCI_VRING_ALIGN);
i = round_pow2 (i, VIRTIO_PCI_VRING_ALIGN);
- ptr = vlib_physmem_alloc_aligned (vm, i, VIRTIO_PCI_VRING_ALIGN);
- memset (ptr, 0, i);
+ ptr =
+ vlib_physmem_alloc_aligned_on_numa (vm, i, VIRTIO_PCI_VRING_ALIGN,
+ vif->numa_node);
+ if (!ptr)
+ return vlib_physmem_last_error (vm);
+ clib_memset (ptr, 0, i);
vring_init (&vr, queue_size, ptr, VIRTIO_PCI_VRING_ALIGN);
vring->desc = vr.desc;
vring->avail = vr.avail;
vring->used = vr.used;
- vring->queue_id = idx;
+ vring->queue_id = queue_num;
vring->avail->flags = VIRTIO_RING_FLAG_MASK_INT;
ASSERT (vring->buffers == 0);
@@ -496,7 +679,7 @@ virtio_pci_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 idx)
ASSERT (vring->indirect_buffers == 0);
vec_validate_aligned (vring->indirect_buffers, queue_size,
CLIB_CACHE_LINE_BYTES);
- if (idx % 2)
+ if (queue_num % 2)
{
u32 n_alloc = 0;
do
@@ -507,13 +690,18 @@ virtio_pci_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 idx)
queue_size - n_alloc);
}
while (n_alloc != queue_size);
- vif->tx_ring_sz = queue_size;
+ vif->num_txqs++;
+ virtio_log_debug (vim, vif, "tx-queue: number %u, size %u", queue_num,
+ queue_size);
}
else
- vif->rx_ring_sz = queue_size;
+ {
+ vif->num_rxqs++;
+ virtio_log_debug (vim, vif, "rx-queue: number %u, size %u", queue_num,
+ queue_size);
+ }
vring->size = queue_size;
-
- virtio_pci_legacy_setup_queue (vm, vif, idx, ptr);
+ virtio_pci_legacy_setup_queue (vm, vif, queue_num, ptr);
vring->kick_fd = -1;
return error;
@@ -531,6 +719,8 @@ virtio_negotiate_features (vlib_main_t * vm, virtio_if_t * vif,
| VIRTIO_FEATURE (VIRTIO_NET_F_MAC)
| VIRTIO_FEATURE (VIRTIO_NET_F_MRG_RXBUF)
| VIRTIO_FEATURE (VIRTIO_NET_F_STATUS)
+ | VIRTIO_FEATURE (VIRTIO_NET_F_CTRL_VQ)
+ | VIRTIO_FEATURE (VIRTIO_NET_F_MQ)
| VIRTIO_FEATURE (VIRTIO_F_NOTIFY_ON_EMPTY)
| VIRTIO_FEATURE (VIRTIO_F_ANY_LAYOUT)
| VIRTIO_FEATURE (VIRTIO_RING_F_INDIRECT_DESC);
@@ -542,8 +732,7 @@ virtio_negotiate_features (vlib_main_t * vm, virtio_if_t * vif,
vif->features = req_features & vif->remote_features & supported_features;
- if (vif->
- remote_features & vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_MTU))
+ if (vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_MTU))
{
virtio_net_config_t config;
virtio_pci_legacy_read_config (vm, vif, &config.mtu,
@@ -702,7 +891,7 @@ virtio_pci_device_init (vlib_main_t * vm, virtio_if_t * vif,
u8 status = 0;
if ((error = virtio_pci_read_caps (vm, vif)))
- clib_error_return (error, "Device not supported");
+ clib_error_return (error, "Device is not supported");
if (virtio_pci_reset_device (vm, vif) < 0)
{
@@ -728,6 +917,9 @@ virtio_pci_device_init (vlib_main_t * vm, virtio_if_t * vif,
}
vif->status = status;
+ /*
+ * get or set the mac address
+ */
if (virtio_pci_get_mac (vm, vif))
{
f64 now = vlib_time_now (vm);
@@ -743,15 +935,44 @@ virtio_pci_device_init (vlib_main_t * vm, virtio_if_t * vif,
virtio_set_net_hdr_size (vif);
+ /*
+ * Initialize the virtqueues
+ */
if ((error = virtio_pci_get_max_virtqueue_pairs (vm, vif)))
goto error;
- if ((error = virtio_pci_vring_init (vm, vif, 0)))
- goto error;
+ for (int i = 0; i < vif->max_queue_pairs; i++)
+ {
+ if ((error = virtio_pci_vring_init (vm, vif, RX_QUEUE (i))))
+ virtio_log_warning (vim, vif, "%s (%u) %s", "error in rxq-queue",
+ RX_QUEUE (i), "initialization");
- if ((error = virtio_pci_vring_init (vm, vif, 1)))
- goto error;
+ if ((error = virtio_pci_vring_init (vm, vif, TX_QUEUE (i))))
+ virtio_log_warning (vim, vif, "%s (%u) %s", "error in txq-queue",
+ TX_QUEUE (i), "initialization");
+ }
+ if (vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_CTRL_VQ))
+ {
+ if ((error =
+ virtio_pci_control_vring_init (vm, vif, vif->max_queue_pairs * 2)))
+ {
+ virtio_log_warning (vim, vif, "%s (%u) %s",
+ "error in control-queue",
+ vif->max_queue_pairs * 2, "initialization");
+ if (vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_MQ))
+ vif->features &= ~VIRTIO_FEATURE (VIRTIO_NET_F_MQ);
+ }
+ }
+ else
+ {
+ virtio_log_debug (vim, vif, "control queue is not available");
+ vif->cxq_vring = NULL;
+ }
+
+ /*
+ * set the msix interrupts
+ */
if (vif->msix_enabled == VIRTIO_MSIX_ENABLED)
{
if (virtio_pci_legacy_set_config_irq (vm, vif, 1) ==
@@ -761,6 +982,10 @@ virtio_pci_device_init (vlib_main_t * vm, virtio_if_t * vif,
VIRTIO_MSI_NO_VECTOR)
virtio_log_warning (vim, vif, "queue vector 0 is not set");
}
+
+ /*
+ * set the driver status OK
+ */
virtio_pci_legacy_set_status (vm, vif, VIRTIO_CONFIG_STATUS_DRIVER_OK);
vif->status = virtio_pci_legacy_get_status (vm, vif);
error:
@@ -831,6 +1056,7 @@ virtio_pci_create_if (vlib_main_t * vm, virtio_pci_create_if_args_t * args)
}
vif->pci_dev_handle = h;
vlib_pci_set_private_data (vm, h, vif->dev_instance);
+ vif->numa_node = vlib_pci_get_numa_node (vm, h);
if ((error = vlib_pci_bus_master_enable (vm, h)))
{
@@ -931,11 +1157,15 @@ virtio_pci_create_if (vlib_main_t * vm, virtio_pci_create_if_args_t * args)
hw->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_INT_MODE;
vnet_hw_interface_set_input_node (vnm, vif->hw_if_index,
virtio_input_node.index);
- vnet_hw_interface_assign_rx_thread (vnm, vif->hw_if_index, 0, ~0);
- virtio_vring_set_numa_node (vm, vif, 0);
-
- vnet_hw_interface_set_rx_mode (vnm, vif->hw_if_index, 0,
- VNET_HW_INTERFACE_RX_MODE_POLLING);
+ u32 i = 0;
+ vec_foreach_index (i, vif->rxq_vrings)
+ {
+ vnet_hw_interface_assign_rx_thread (vnm, vif->hw_if_index, i, ~0);
+ virtio_vring_set_numa_node (vm, vif, RX_QUEUE (i));
+ /* Set default rx mode to POLLING */
+ vnet_hw_interface_set_rx_mode (vnm, vif->hw_if_index, i,
+ VNET_HW_INTERFACE_RX_MODE_POLLING);
+ }
if (virtio_pci_is_link_up (vm, vif) & VIRTIO_NET_S_LINK_UP)
{
vif->flags |= VIRTIO_IF_FLAG_ADMIN_UP;
@@ -944,6 +1174,13 @@ virtio_pci_create_if (vlib_main_t * vm, virtio_pci_create_if_args_t * args)
}
else
vnet_hw_interface_set_flags (vnm, vif->hw_if_index, 0);
+
+ if ((vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_CTRL_VQ)) &&
+ (vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_MQ)))
+ {
+ if (virtio_pci_enable_multiqueue (vm, vif, vif->max_queue_pairs))
+ virtio_log_warning (vim, vif, "multiqueue is not set");
+ }
return;
error:
@@ -964,31 +1201,51 @@ virtio_pci_delete_if (vlib_main_t * vm, virtio_if_t * vif)
vlib_pci_intr_disable (vm, vif->pci_dev_handle);
- virtio_pci_legacy_del_queue (vm, vif, 0);
- virtio_pci_legacy_del_queue (vm, vif, 1);
+ for (i = 0; i < vif->max_queue_pairs; i++)
+ {
+ virtio_pci_legacy_del_queue (vm, vif, RX_QUEUE (i));
+ virtio_pci_legacy_del_queue (vm, vif, TX_QUEUE (i));
+ }
+
+ if (vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_CTRL_VQ))
+ virtio_pci_legacy_del_queue (vm, vif, vif->max_queue_pairs * 2);
virtio_pci_legacy_reset (vm, vif);
if (vif->hw_if_index)
{
vnet_hw_interface_set_flags (vnm, vif->hw_if_index, 0);
- vnet_hw_interface_unassign_rx_thread (vnm, vif->hw_if_index, 0);
+ vec_foreach_index (i, vif->rxq_vrings)
+ {
+ vnet_hw_interface_unassign_rx_thread (vnm, vif->hw_if_index, i);
+ }
ethernet_delete_interface (vnm, vif->hw_if_index);
}
vlib_pci_device_close (vm, vif->pci_dev_handle);
- vec_foreach_index (i, vif->vrings)
+ vec_foreach_index (i, vif->rxq_vrings)
{
- virtio_vring_t *vring = vec_elt_at_index (vif->vrings, i);
+ virtio_vring_t *vring = vec_elt_at_index (vif->rxq_vrings, i);
if (vring->kick_fd != -1)
close (vring->kick_fd);
if (vring->used)
{
- if ((i & 1) == 1)
- virtio_free_used_desc (vm, vring);
- else
- virtio_free_rx_buffers (vm, vring);
+ virtio_free_rx_buffers (vm, vring);
+ }
+ vec_free (vring->buffers);
+ vec_free (vring->indirect_buffers);
+ vlib_physmem_free (vm, vring->desc);
+ }
+
+ vec_foreach_index (i, vif->txq_vrings)
+ {
+ virtio_vring_t *vring = vec_elt_at_index (vif->txq_vrings, i);
+ if (vring->kick_fd != -1)
+ close (vring->kick_fd);
+ if (vring->used)
+ {
+ virtio_free_used_desc (vm, vring);
}
if (vring->queue_id % 2)
{
@@ -999,7 +1256,23 @@ virtio_pci_delete_if (vlib_main_t * vm, virtio_if_t * vif)
vlib_physmem_free (vm, vring->desc);
}
- vec_free (vif->vrings);
+ if (vif->cxq_vring != NULL)
+ {
+ u16 last = vif->cxq_vring->last_used_idx;
+ u16 n_left = vif->cxq_vring->used->idx - last;
+ while (n_left)
+ {
+ last++;
+ n_left--;
+ }
+
+ vif->cxq_vring->last_used_idx = last;
+ vlib_physmem_free (vm, vif->cxq_vring->desc);
+ }
+
+ vec_free (vif->rxq_vrings);
+ vec_free (vif->txq_vrings);
+ vec_free (vif->cxq_vring);
if (vif->fd != -1)
vif->fd = -1;
diff --git a/src/vnet/devices/virtio/virtio.c b/src/vnet/devices/virtio/virtio.c
index 2648f29af84..72499b63731 100644
--- a/src/vnet/devices/virtio/virtio.c
+++ b/src/vnet/devices/virtio/virtio.c
@@ -81,9 +81,18 @@ virtio_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 idx, u16 sz)
if (sz == 0)
sz = 256;
- vec_validate_aligned (vif->vrings, idx, CLIB_CACHE_LINE_BYTES);
- vring = vec_elt_at_index (vif->vrings, idx);
-
+ if (idx % 2)
+ {
+ vec_validate_aligned (vif->txq_vrings, TX_QUEUE_ACCESS (idx),
+ CLIB_CACHE_LINE_BYTES);
+ vring = vec_elt_at_index (vif->txq_vrings, TX_QUEUE_ACCESS (idx));
+ }
+ else
+ {
+ vec_validate_aligned (vif->rxq_vrings, RX_QUEUE_ACCESS (idx),
+ CLIB_CACHE_LINE_BYTES);
+ vring = vec_elt_at_index (vif->rxq_vrings, RX_QUEUE_ACCESS (idx));
+ }
i = sizeof (struct vring_desc) * sz;
i = round_pow2 (i, CLIB_CACHE_LINE_BYTES);
vring->desc = clib_mem_alloc_aligned (i, CLIB_CACHE_LINE_BYTES);
@@ -101,6 +110,7 @@ virtio_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 idx, u16 sz)
vring->used = clib_mem_alloc_aligned (i, CLIB_CACHE_LINE_BYTES);
clib_memset (vring->used, 0, i);
+ vring->queue_id = idx;
ASSERT (vring->buffers == 0);
vec_validate_aligned (vring->buffers, sz, CLIB_CACHE_LINE_BYTES);
ASSERT (vring->indirect_buffers == 0);
@@ -168,29 +178,47 @@ virtio_free_rx_buffers (vlib_main_t * vm, virtio_vring_t * vring)
}
clib_error_t *
-virtio_vring_free (vlib_main_t * vm, virtio_if_t * vif, u32 idx)
+virtio_vring_free_rx (vlib_main_t * vm, virtio_if_t * vif, u32 idx)
{
- virtio_vring_t *vring = vec_elt_at_index (vif->vrings, idx);
+ virtio_vring_t *vring =
+ vec_elt_at_index (vif->rxq_vrings, RX_QUEUE_ACCESS (idx));
clib_file_del_by_index (&file_main, vring->call_file_index);
close (vring->kick_fd);
close (vring->call_fd);
if (vring->used)
{
- if ((idx & 1) == 1)
- virtio_free_used_desc (vm, vring);
- else
- virtio_free_rx_buffers (vm, vring);
+ virtio_free_rx_buffers (vm, vring);
clib_mem_free (vring->used);
}
if (vring->desc)
clib_mem_free (vring->desc);
if (vring->avail)
clib_mem_free (vring->avail);
- if (vring->queue_id % 2)
+ vec_free (vring->buffers);
+ vec_free (vring->indirect_buffers);
+ return 0;
+}
+
+clib_error_t *
+virtio_vring_free_tx (vlib_main_t * vm, virtio_if_t * vif, u32 idx)
+{
+ virtio_vring_t *vring =
+ vec_elt_at_index (vif->txq_vrings, TX_QUEUE_ACCESS (idx));
+
+ clib_file_del_by_index (&file_main, vring->call_file_index);
+ close (vring->kick_fd);
+ close (vring->call_fd);
+ if (vring->used)
{
- vlib_buffer_free_no_next (vm, vring->indirect_buffers, vring->size);
+ virtio_free_used_desc (vm, vring);
+ clib_mem_free (vring->used);
}
+ if (vring->desc)
+ clib_mem_free (vring->desc);
+ if (vring->avail)
+ clib_mem_free (vring->avail);
+ vlib_buffer_free_no_next (vm, vring->indirect_buffers, vring->size);
vec_free (vring->buffers);
vec_free (vring->indirect_buffers);
return 0;
@@ -201,10 +229,11 @@ virtio_vring_set_numa_node (vlib_main_t * vm, virtio_if_t * vif, u32 idx)
{
vnet_main_t *vnm = vnet_get_main ();
u32 thread_index;
- virtio_vring_t *vring = vec_elt_at_index (vif->vrings, idx);
+ virtio_vring_t *vring =
+ vec_elt_at_index (vif->rxq_vrings, RX_QUEUE_ACCESS (idx));
thread_index =
vnet_get_device_input_thread_index (vnm, vif->hw_if_index,
- vring->queue_id);
+ RX_QUEUE_ACCESS (idx));
vring->buffer_pool_index =
vlib_buffer_pool_get_default_for_numa (vm,
vlib_mains
@@ -313,11 +342,15 @@ virtio_show (vlib_main_t * vm, u32 * hw_if_indices, u8 show_descr, u32 type)
feat_entry->bit);
feat_entry++;
}
- vec_foreach_index (i, vif->vrings)
+ vlib_cli_output (vm, " Number of RX Virtqueue %u", vif->num_rxqs);
+ vlib_cli_output (vm, " Number of TX Virtqueue %u", vif->num_txqs);
+ if (vif->cxq_vring != NULL
+ && vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_CTRL_VQ))
+ vlib_cli_output (vm, " Number of CTRL Virtqueue 1");
+ vec_foreach_index (i, vif->rxq_vrings)
{
- // RX = 0, TX = 1
- vring = vec_elt_at_index (vif->vrings, i);
- vlib_cli_output (vm, " Virtqueue (%s)", (i & 1) ? "TX" : "RX");
+ vring = vec_elt_at_index (vif->rxq_vrings, i);
+ vlib_cli_output (vm, " Virtqueue (RX) %d", vring->queue_id);
vlib_cli_output (vm,
" qsz %d, last_used_idx %d, desc_next %d, desc_in_use %d",
vring->size, vring->last_used_idx, vring->desc_next,
@@ -338,7 +371,6 @@ virtio_show (vlib_main_t * vm, u32 * hw_if_indices, u8 show_descr, u32 type)
" id addr len flags next user_addr\n");
vlib_cli_output (vm,
" ===== ================== ===== ====== ===== ==================\n");
- vring = vif->vrings;
for (j = 0; j < vring->size; j++)
{
struct vring_desc *desc = &vring->desc[j];
@@ -350,6 +382,78 @@ virtio_show (vlib_main_t * vm, u32 * hw_if_indices, u8 show_descr, u32 type)
}
}
}
+ vec_foreach_index (i, vif->txq_vrings)
+ {
+ vring = vec_elt_at_index (vif->txq_vrings, i);
+ vlib_cli_output (vm, " Virtqueue (TX) %d", vring->queue_id);
+ vlib_cli_output (vm,
+ " qsz %d, last_used_idx %d, desc_next %d, desc_in_use %d",
+ vring->size, vring->last_used_idx, vring->desc_next,
+ vring->desc_in_use);
+ vlib_cli_output (vm,
+ " avail.flags 0x%x avail.idx %d used.flags 0x%x used.idx %d",
+ vring->avail->flags, vring->avail->idx,
+ vring->used->flags, vring->used->idx);
+ if (type == VIRTIO_IF_TYPE_TAP)
+ {
+ vlib_cli_output (vm, " kickfd %d, callfd %d", vring->kick_fd,
+ vring->call_fd);
+ }
+ if (show_descr)
+ {
+ vlib_cli_output (vm, "\n descriptor table:\n");
+ vlib_cli_output (vm,
+ " id addr len flags next user_addr\n");
+ vlib_cli_output (vm,
+ " ===== ================== ===== ====== ===== ==================\n");
+ for (j = 0; j < vring->size; j++)
+ {
+ struct vring_desc *desc = &vring->desc[j];
+ vlib_cli_output (vm,
+ " %-5d 0x%016lx %-5d 0x%04x %-5d 0x%016lx\n",
+ j, desc->addr,
+ desc->len,
+ desc->flags, desc->next, desc->addr);
+ }
+ }
+ }
+ if (vif->cxq_vring != NULL
+ && vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_CTRL_VQ))
+ {
+ vring = vif->cxq_vring;
+ vlib_cli_output (vm, " Virtqueue (CTRL) %d", vring->queue_id);
+ vlib_cli_output (vm,
+ " qsz %d, last_used_idx %d, desc_next %d, desc_in_use %d",
+ vring->size, vring->last_used_idx,
+ vring->desc_next, vring->desc_in_use);
+ vlib_cli_output (vm,
+ " avail.flags 0x%x avail.idx %d used.flags 0x%x used.idx %d",
+ vring->avail->flags, vring->avail->idx,
+ vring->used->flags, vring->used->idx);
+ if (type == VIRTIO_IF_TYPE_TAP)
+ {
+ vlib_cli_output (vm, " kickfd %d, callfd %d", vring->kick_fd,
+ vring->call_fd);
+ }
+ if (show_descr)
+ {
+ vlib_cli_output (vm, "\n descriptor table:\n");
+ vlib_cli_output (vm,
+ " id addr len flags next user_addr\n");
+ vlib_cli_output (vm,
+ " ===== ================== ===== ====== ===== ==================\n");
+ for (j = 0; j < vring->size; j++)
+ {
+ struct vring_desc *desc = &vring->desc[j];
+ vlib_cli_output (vm,
+ " %-5d 0x%016lx %-5d 0x%04x %-5d 0x%016lx\n",
+ j, desc->addr,
+ desc->len,
+ desc->flags, desc->next, desc->addr);
+ }
+ }
+ }
+
}
}
diff --git a/src/vnet/devices/virtio/virtio.h b/src/vnet/devices/virtio/virtio.h
index f72819639d7..1de704386a8 100644
--- a/src/vnet/devices/virtio/virtio.h
+++ b/src/vnet/devices/virtio/virtio.h
@@ -75,6 +75,11 @@ typedef enum
#define VIRTIO_FEATURE(X) (1ULL << X)
+#define TX_QUEUE(X) ((X*2) + 1)
+#define RX_QUEUE(X) (X*2)
+#define TX_QUEUE_ACCESS(X) (X/2)
+#define RX_QUEUE_ACCESS(X) (X/2)
+
typedef enum
{
VIRTIO_IF_TYPE_TAP,
@@ -99,6 +104,7 @@ typedef struct
struct vring_desc *desc;
struct vring_used *used;
struct vring_avail *avail;
+ clib_spinlock_t lockp;
u16 desc_in_use;
u16 desc_next;
int kick_fd;
@@ -135,6 +141,7 @@ typedef struct
u32 dev_instance;
u32 hw_if_index;
u32 sw_if_index;
+ u32 numa_node;
u16 virtio_net_hdr_sz;
virtio_if_type_t type;
union
@@ -153,16 +160,16 @@ typedef struct
int tap_fd;
u32 pci_dev_handle;
};
- virtio_vring_t *vrings;
-
+ virtio_vring_t *rxq_vrings;
+ virtio_vring_t *txq_vrings;
u64 features, remote_features;
/* error */
clib_error_t *error;
u8 support_int_mode; /* support interrupt mode */
u16 max_queue_pairs;
- u16 tx_ring_sz;
- u16 rx_ring_sz;
+ u16 num_rxqs;
+ u16 num_txqs;
u8 status;
u8 mac_addr[6];
u8 *host_if_name;
@@ -175,6 +182,7 @@ typedef struct
u8 host_ip6_prefix_len;
int gso_enabled;
int ifindex;
+ virtio_vring_t *cxq_vring;
} virtio_if_t;
typedef struct
@@ -191,8 +199,10 @@ extern vlib_node_registration_t virtio_input_node;
clib_error_t *virtio_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 idx,
u16 sz);
-clib_error_t *virtio_vring_free (vlib_main_t * vm, virtio_if_t * vif,
- u32 idx);
+clib_error_t *virtio_vring_free_rx (vlib_main_t * vm, virtio_if_t * vif,
+ u32 idx);
+clib_error_t *virtio_vring_free_tx (vlib_main_t * vm, virtio_if_t * vif,
+ u32 idx);
void virtio_vring_set_numa_node (vlib_main_t * vm, virtio_if_t * vif,
u32 idx);
extern void virtio_free_used_desc (vlib_main_t * vm, virtio_vring_t * vring);
diff --git a/src/vnet/devices/virtio/virtio_api.c b/src/vnet/devices/virtio/virtio_api.c
index 5035799befe..238c6adfb43 100644
--- a/src/vnet/devices/virtio/virtio_api.c
+++ b/src/vnet/devices/virtio/virtio_api.c
@@ -159,8 +159,10 @@ virtio_pci_send_sw_interface_details (vpe_api_main_t * am,
mp->_vl_msg_id = htons (VL_API_SW_INTERFACE_VIRTIO_PCI_DETAILS);
mp->pci_addr = htonl (vif->pci_addr.as_u32);
mp->sw_if_index = htonl (vif->sw_if_index);
- mp->rx_ring_sz = htons (vif->rx_ring_sz);
- mp->tx_ring_sz = htons (vif->tx_ring_sz);
+ virtio_vring_t *vring = vec_elt_at_index (vif->rxq_vrings, 0);
+ mp->rx_ring_sz = htons (vring->size);
+ vring = vec_elt_at_index (vif->txq_vrings, 0);
+ mp->tx_ring_sz = htons (vring->size);
clib_memcpy (mp->mac_addr, vif->mac_addr, 6);
mp->features = clib_host_to_net_u64 (vif->features);