aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/virtio
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/virtio')
-rw-r--r--drivers/net/virtio/Makefile7
-rw-r--r--drivers/net/virtio/virtio_ethdev.c515
-rw-r--r--drivers/net/virtio/virtio_ethdev.h4
-rw-r--r--drivers/net/virtio/virtio_logs.h6
-rw-r--r--drivers/net/virtio/virtio_pci.c120
-rw-r--r--drivers/net/virtio/virtio_pci.h9
-rw-r--r--drivers/net/virtio/virtio_ring.h2
-rw-r--r--drivers/net/virtio/virtio_rxtx.c361
-rw-r--r--drivers/net/virtio/virtio_rxtx.h56
-rw-r--r--drivers/net/virtio/virtio_rxtx_simple.c93
-rw-r--r--drivers/net/virtio/virtio_user/vhost.h146
-rw-r--r--drivers/net/virtio/virtio_user/vhost_user.c426
-rw-r--r--drivers/net/virtio/virtio_user/virtio_user_dev.c333
-rw-r--r--drivers/net/virtio/virtio_user/virtio_user_dev.h62
-rw-r--r--drivers/net/virtio/virtio_user_ethdev.c440
-rw-r--r--drivers/net/virtio/virtqueue.h80
16 files changed, 2183 insertions, 477 deletions
diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile
index ef84f604..3020b688 100644
--- a/drivers/net/virtio/Makefile
+++ b/drivers/net/virtio/Makefile
@@ -55,9 +55,16 @@ ifeq ($(findstring RTE_MACHINE_CPUFLAG_SSSE3,$(CFLAGS)),RTE_MACHINE_CPUFLAG_SSSE
SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx_simple.c
endif
+ifeq ($(CONFIG_RTE_VIRTIO_USER),y)
+SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost_user.c
+SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/virtio_user_dev.c
+SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user_ethdev.c
+endif
+
# this lib depends upon:
DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_eal lib/librte_ether
DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_mempool lib/librte_mbuf
DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_net
+DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_kvargs
include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index 63a368ac..480daa37 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1,7 +1,7 @@
/*-
* BSD LICENSE
*
- * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -59,8 +59,6 @@
#include "virtqueue.h"
#include "virtio_rxtx.h"
-
-static int eth_virtio_dev_init(struct rte_eth_dev *eth_dev);
static int eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev);
static int virtio_dev_configure(struct rte_eth_dev *dev);
static int virtio_dev_start(struct rte_eth_dev *dev);
@@ -80,7 +78,10 @@ static void virtio_get_hwaddr(struct virtio_hw *hw);
static void virtio_dev_stats_get(struct rte_eth_dev *dev,
struct rte_eth_stats *stats);
static int virtio_dev_xstats_get(struct rte_eth_dev *dev,
- struct rte_eth_xstats *xstats, unsigned n);
+ struct rte_eth_xstat *xstats, unsigned n);
+static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
+ struct rte_eth_xstat_name *xstats_names,
+ unsigned limit);
static void virtio_dev_stats_reset(struct rte_eth_dev *dev);
static void virtio_dev_free_mbufs(struct rte_eth_dev *dev);
static int virtio_vlan_filter_set(struct rte_eth_dev *dev,
@@ -115,40 +116,61 @@ struct rte_virtio_xstats_name_off {
};
/* [rt]x_qX_ is prepended to the name string here */
-static const struct rte_virtio_xstats_name_off rte_virtio_q_stat_strings[] = {
- {"good_packets", offsetof(struct virtqueue, packets)},
- {"good_bytes", offsetof(struct virtqueue, bytes)},
- {"errors", offsetof(struct virtqueue, errors)},
- {"multicast_packets", offsetof(struct virtqueue, multicast)},
- {"broadcast_packets", offsetof(struct virtqueue, broadcast)},
- {"undersize_packets", offsetof(struct virtqueue, size_bins[0])},
- {"size_64_packets", offsetof(struct virtqueue, size_bins[1])},
- {"size_65_127_packets", offsetof(struct virtqueue, size_bins[2])},
- {"size_128_255_packets", offsetof(struct virtqueue, size_bins[3])},
- {"size_256_511_packets", offsetof(struct virtqueue, size_bins[4])},
- {"size_512_1023_packets", offsetof(struct virtqueue, size_bins[5])},
- {"size_1024_1517_packets", offsetof(struct virtqueue, size_bins[6])},
- {"size_1518_max_packets", offsetof(struct virtqueue, size_bins[7])},
+static const struct rte_virtio_xstats_name_off rte_virtio_rxq_stat_strings[] = {
+ {"good_packets", offsetof(struct virtnet_rx, stats.packets)},
+ {"good_bytes", offsetof(struct virtnet_rx, stats.bytes)},
+ {"errors", offsetof(struct virtnet_rx, stats.errors)},
+ {"multicast_packets", offsetof(struct virtnet_rx, stats.multicast)},
+ {"broadcast_packets", offsetof(struct virtnet_rx, stats.broadcast)},
+ {"undersize_packets", offsetof(struct virtnet_rx, stats.size_bins[0])},
+ {"size_64_packets", offsetof(struct virtnet_rx, stats.size_bins[1])},
+ {"size_65_127_packets", offsetof(struct virtnet_rx, stats.size_bins[2])},
+ {"size_128_255_packets", offsetof(struct virtnet_rx, stats.size_bins[3])},
+ {"size_256_511_packets", offsetof(struct virtnet_rx, stats.size_bins[4])},
+ {"size_512_1023_packets", offsetof(struct virtnet_rx, stats.size_bins[5])},
+ {"size_1024_1517_packets", offsetof(struct virtnet_rx, stats.size_bins[6])},
+ {"size_1518_max_packets", offsetof(struct virtnet_rx, stats.size_bins[7])},
+};
+
+/* [rt]x_qX_ is prepended to the name string here */
+static const struct rte_virtio_xstats_name_off rte_virtio_txq_stat_strings[] = {
+ {"good_packets", offsetof(struct virtnet_tx, stats.packets)},
+ {"good_bytes", offsetof(struct virtnet_tx, stats.bytes)},
+ {"errors", offsetof(struct virtnet_tx, stats.errors)},
+ {"multicast_packets", offsetof(struct virtnet_tx, stats.multicast)},
+ {"broadcast_packets", offsetof(struct virtnet_tx, stats.broadcast)},
+ {"undersize_packets", offsetof(struct virtnet_tx, stats.size_bins[0])},
+ {"size_64_packets", offsetof(struct virtnet_tx, stats.size_bins[1])},
+ {"size_65_127_packets", offsetof(struct virtnet_tx, stats.size_bins[2])},
+ {"size_128_255_packets", offsetof(struct virtnet_tx, stats.size_bins[3])},
+ {"size_256_511_packets", offsetof(struct virtnet_tx, stats.size_bins[4])},
+ {"size_512_1023_packets", offsetof(struct virtnet_tx, stats.size_bins[5])},
+ {"size_1024_1517_packets", offsetof(struct virtnet_tx, stats.size_bins[6])},
+ {"size_1518_max_packets", offsetof(struct virtnet_tx, stats.size_bins[7])},
};
-#define VIRTIO_NB_Q_XSTATS (sizeof(rte_virtio_q_stat_strings) / \
- sizeof(rte_virtio_q_stat_strings[0]))
+#define VIRTIO_NB_RXQ_XSTATS (sizeof(rte_virtio_rxq_stat_strings) / \
+ sizeof(rte_virtio_rxq_stat_strings[0]))
+#define VIRTIO_NB_TXQ_XSTATS (sizeof(rte_virtio_txq_stat_strings) / \
+ sizeof(rte_virtio_txq_stat_strings[0]))
static int
-virtio_send_command(struct virtqueue *vq, struct virtio_pmd_ctrl *ctrl,
+virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl,
int *dlen, int pkt_num)
{
uint32_t head, i;
int k, sum = 0;
virtio_net_ctrl_ack status = ~0;
struct virtio_pmd_ctrl result;
+ struct virtqueue *vq;
ctrl->status = status;
- if (!(vq && vq->hw->cvq)) {
+ if (!cvq && !cvq->vq) {
PMD_INIT_LOG(ERR, "Control queue is not supported.");
return -1;
}
+ vq = cvq->vq;
head = vq->vq_desc_head_idx;
PMD_INIT_LOG(DEBUG, "vq->vq_desc_head_idx = %d, status = %d, "
@@ -158,7 +180,7 @@ virtio_send_command(struct virtqueue *vq, struct virtio_pmd_ctrl *ctrl,
if ((vq->vq_free_cnt < ((uint32_t)pkt_num + 2)) || (pkt_num < 1))
return -1;
- memcpy(vq->virtio_net_hdr_mz->addr, ctrl,
+ memcpy(cvq->virtio_net_hdr_mz->addr, ctrl,
sizeof(struct virtio_pmd_ctrl));
/*
@@ -168,14 +190,14 @@ virtio_send_command(struct virtqueue *vq, struct virtio_pmd_ctrl *ctrl,
* One RX packet for ACK.
*/
vq->vq_ring.desc[head].flags = VRING_DESC_F_NEXT;
- vq->vq_ring.desc[head].addr = vq->virtio_net_hdr_mz->phys_addr;
+ vq->vq_ring.desc[head].addr = cvq->virtio_net_hdr_mem;
vq->vq_ring.desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
vq->vq_free_cnt--;
i = vq->vq_ring.desc[head].next;
for (k = 0; k < pkt_num; k++) {
vq->vq_ring.desc[i].flags = VRING_DESC_F_NEXT;
- vq->vq_ring.desc[i].addr = vq->virtio_net_hdr_mz->phys_addr
+ vq->vq_ring.desc[i].addr = cvq->virtio_net_hdr_mem
+ sizeof(struct virtio_net_ctrl_hdr)
+ sizeof(ctrl->status) + sizeof(uint8_t)*sum;
vq->vq_ring.desc[i].len = dlen[k];
@@ -185,7 +207,7 @@ virtio_send_command(struct virtqueue *vq, struct virtio_pmd_ctrl *ctrl,
}
vq->vq_ring.desc[i].flags = VRING_DESC_F_WRITE;
- vq->vq_ring.desc[i].addr = vq->virtio_net_hdr_mz->phys_addr
+ vq->vq_ring.desc[i].addr = cvq->virtio_net_hdr_mem
+ sizeof(struct virtio_net_ctrl_hdr);
vq->vq_ring.desc[i].len = sizeof(ctrl->status);
vq->vq_free_cnt--;
@@ -200,12 +222,12 @@ virtio_send_command(struct virtqueue *vq, struct virtio_pmd_ctrl *ctrl,
virtqueue_notify(vq);
rte_rmb();
- while (vq->vq_used_cons_idx == vq->vq_ring.used->idx) {
+ while (VIRTQUEUE_NUSED(vq) == 0) {
rte_rmb();
usleep(100);
}
- while (vq->vq_used_cons_idx != vq->vq_ring.used->idx) {
+ while (VIRTQUEUE_NUSED(vq)) {
uint32_t idx, desc_idx, used_idx;
struct vring_used_elem *uep;
@@ -230,7 +252,7 @@ virtio_send_command(struct virtqueue *vq, struct virtio_pmd_ctrl *ctrl,
PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\nvq->vq_desc_head_idx=%d",
vq->vq_free_cnt, vq->vq_desc_head_idx);
- memcpy(&result, vq->virtio_net_hdr_mz->addr,
+ memcpy(&result, cvq->virtio_net_hdr_mz->addr,
sizeof(struct virtio_pmd_ctrl));
return result.status;
@@ -261,12 +283,14 @@ virtio_set_multiple_queues(struct rte_eth_dev *dev, uint16_t nb_queues)
}
void
-virtio_dev_queue_release(struct virtqueue *vq) {
+virtio_dev_queue_release(struct virtqueue *vq)
+{
struct virtio_hw *hw;
if (vq) {
hw = vq->hw;
- hw->vtpci_ops->del_queue(hw, vq);
+ if (vq->configured)
+ hw->vtpci_ops->del_queue(hw, vq);
rte_free(vq->sw_ring);
rte_free(vq);
@@ -279,13 +303,21 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
uint16_t vtpci_queue_idx,
uint16_t nb_desc,
unsigned int socket_id,
- struct virtqueue **pvq)
+ void **pvq)
{
char vq_name[VIRTQUEUE_MAX_NAME_SZ];
- const struct rte_memzone *mz;
+ char vq_hdr_name[VIRTQUEUE_MAX_NAME_SZ];
+ const struct rte_memzone *mz = NULL, *hdr_mz = NULL;
unsigned int vq_size, size;
struct virtio_hw *hw = dev->data->dev_private;
- struct virtqueue *vq = NULL;
+ struct virtnet_rx *rxvq = NULL;
+ struct virtnet_tx *txvq = NULL;
+ struct virtnet_ctl *cvq = NULL;
+ struct virtqueue *vq;
+ const char *queue_names[] = {"rvq", "txq", "cvq"};
+ size_t sz_vq, sz_q = 0, sz_hdr_mz = 0;
+ void *sw_ring = NULL;
+ int ret;
PMD_INIT_LOG(DEBUG, "setting up queue: %u", vtpci_queue_idx);
@@ -305,39 +337,33 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
return -EINVAL;
}
+ snprintf(vq_name, sizeof(vq_name), "port%d_%s%d",
+ dev->data->port_id, queue_names[queue_type], queue_idx);
+
+ sz_vq = RTE_ALIGN_CEIL(sizeof(*vq) +
+ vq_size * sizeof(struct vq_desc_extra),
+ RTE_CACHE_LINE_SIZE);
if (queue_type == VTNET_RQ) {
- snprintf(vq_name, sizeof(vq_name), "port%d_rvq%d",
- dev->data->port_id, queue_idx);
- vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) +
- vq_size * sizeof(struct vq_desc_extra), RTE_CACHE_LINE_SIZE);
- vq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
- (RTE_PMD_VIRTIO_RX_MAX_BURST + vq_size) *
- sizeof(vq->sw_ring[0]), RTE_CACHE_LINE_SIZE, socket_id);
+ sz_q = sz_vq + sizeof(*rxvq);
} else if (queue_type == VTNET_TQ) {
- snprintf(vq_name, sizeof(vq_name), "port%d_tvq%d",
- dev->data->port_id, queue_idx);
- vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) +
- vq_size * sizeof(struct vq_desc_extra), RTE_CACHE_LINE_SIZE);
+ sz_q = sz_vq + sizeof(*txvq);
+ /*
+ * For each xmit packet, allocate a virtio_net_hdr
+ * and indirect ring elements
+ */
+ sz_hdr_mz = vq_size * sizeof(struct virtio_tx_region);
} else if (queue_type == VTNET_CQ) {
- snprintf(vq_name, sizeof(vq_name), "port%d_cvq",
- dev->data->port_id);
- vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) +
- vq_size * sizeof(struct vq_desc_extra),
- RTE_CACHE_LINE_SIZE);
+ sz_q = sz_vq + sizeof(*cvq);
+ /* Allocate a page for control vq command, data and status */
+ sz_hdr_mz = PAGE_SIZE;
}
+
+ vq = rte_zmalloc_socket(vq_name, sz_q, RTE_CACHE_LINE_SIZE, socket_id);
if (vq == NULL) {
- PMD_INIT_LOG(ERR, "Can not allocate virtqueue");
+ PMD_INIT_LOG(ERR, "can not allocate vq");
return -ENOMEM;
}
- if (queue_type == VTNET_RQ && vq->sw_ring == NULL) {
- PMD_INIT_LOG(ERR, "Can not allocate RX soft ring");
- rte_free(vq);
- return -ENOMEM;
- }
-
vq->hw = hw;
- vq->port_id = dev->data->port_id;
- vq->queue_id = queue_idx;
vq->vq_queue_index = vtpci_queue_idx;
vq->vq_nentries = vq_size;
@@ -350,64 +376,103 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
*/
size = vring_size(vq_size, VIRTIO_PCI_VRING_ALIGN);
vq->vq_ring_size = RTE_ALIGN_CEIL(size, VIRTIO_PCI_VRING_ALIGN);
- PMD_INIT_LOG(DEBUG, "vring_size: %d, rounded_vring_size: %d", size, vq->vq_ring_size);
+ PMD_INIT_LOG(DEBUG, "vring_size: %d, rounded_vring_size: %d",
+ size, vq->vq_ring_size);
- mz = rte_memzone_reserve_aligned(vq_name, vq->vq_ring_size,
- socket_id, 0, VIRTIO_PCI_VRING_ALIGN);
+ mz = rte_memzone_reserve_aligned(vq_name, vq->vq_ring_size, socket_id,
+ 0, VIRTIO_PCI_VRING_ALIGN);
if (mz == NULL) {
if (rte_errno == EEXIST)
mz = rte_memzone_lookup(vq_name);
if (mz == NULL) {
- rte_free(vq);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto fail_q_alloc;
}
}
- /*
- * Virtio PCI device VIRTIO_PCI_QUEUE_PF register is 32bit,
- * and only accepts 32 bit page frame number.
- * Check if the allocated physical memory exceeds 16TB.
- */
- if ((mz->phys_addr + vq->vq_ring_size - 1) >> (VIRTIO_PCI_QUEUE_ADDR_SHIFT + 32)) {
- PMD_INIT_LOG(ERR, "vring address shouldn't be above 16TB!");
- rte_free(vq);
- return -ENOMEM;
- }
-
memset(mz->addr, 0, sizeof(mz->len));
- vq->mz = mz;
+
vq->vq_ring_mem = mz->phys_addr;
vq->vq_ring_virt_mem = mz->addr;
- PMD_INIT_LOG(DEBUG, "vq->vq_ring_mem: 0x%"PRIx64, (uint64_t)mz->phys_addr);
- PMD_INIT_LOG(DEBUG, "vq->vq_ring_virt_mem: 0x%"PRIx64, (uint64_t)(uintptr_t)mz->addr);
- vq->virtio_net_hdr_mz = NULL;
- vq->virtio_net_hdr_mem = 0;
-
- if (queue_type == VTNET_TQ) {
- const struct rte_memzone *hdr_mz;
- struct virtio_tx_region *txr;
- unsigned int i;
-
- /*
- * For each xmit packet, allocate a virtio_net_hdr
- * and indirect ring elements
- */
- snprintf(vq_name, sizeof(vq_name), "port%d_tvq%d_hdrzone",
- dev->data->port_id, queue_idx);
- hdr_mz = rte_memzone_reserve_aligned(vq_name,
- vq_size * sizeof(*txr),
+ PMD_INIT_LOG(DEBUG, "vq->vq_ring_mem: 0x%" PRIx64,
+ (uint64_t)mz->phys_addr);
+ PMD_INIT_LOG(DEBUG, "vq->vq_ring_virt_mem: 0x%" PRIx64,
+ (uint64_t)(uintptr_t)mz->addr);
+
+ if (sz_hdr_mz) {
+ snprintf(vq_hdr_name, sizeof(vq_hdr_name), "port%d_%s%d_hdr",
+ dev->data->port_id, queue_names[queue_type],
+ queue_idx);
+ hdr_mz = rte_memzone_reserve_aligned(vq_hdr_name, sz_hdr_mz,
socket_id, 0,
RTE_CACHE_LINE_SIZE);
if (hdr_mz == NULL) {
if (rte_errno == EEXIST)
- hdr_mz = rte_memzone_lookup(vq_name);
+ hdr_mz = rte_memzone_lookup(vq_hdr_name);
if (hdr_mz == NULL) {
- rte_free(vq);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto fail_q_alloc;
}
}
- vq->virtio_net_hdr_mz = hdr_mz;
- vq->virtio_net_hdr_mem = hdr_mz->phys_addr;
+ }
+
+ if (queue_type == VTNET_RQ) {
+ size_t sz_sw = (RTE_PMD_VIRTIO_RX_MAX_BURST + vq_size) *
+ sizeof(vq->sw_ring[0]);
+
+ sw_ring = rte_zmalloc_socket("sw_ring", sz_sw,
+ RTE_CACHE_LINE_SIZE, socket_id);
+ if (!sw_ring) {
+ PMD_INIT_LOG(ERR, "can not allocate RX soft ring");
+ ret = -ENOMEM;
+ goto fail_q_alloc;
+ }
+
+ vq->sw_ring = sw_ring;
+ rxvq = (struct virtnet_rx *)RTE_PTR_ADD(vq, sz_vq);
+ rxvq->vq = vq;
+ rxvq->port_id = dev->data->port_id;
+ rxvq->queue_id = queue_idx;
+ rxvq->mz = mz;
+ *pvq = rxvq;
+ } else if (queue_type == VTNET_TQ) {
+ txvq = (struct virtnet_tx *)RTE_PTR_ADD(vq, sz_vq);
+ txvq->vq = vq;
+ txvq->port_id = dev->data->port_id;
+ txvq->queue_id = queue_idx;
+ txvq->mz = mz;
+ txvq->virtio_net_hdr_mz = hdr_mz;
+ txvq->virtio_net_hdr_mem = hdr_mz->phys_addr;
+
+ *pvq = txvq;
+ } else if (queue_type == VTNET_CQ) {
+ cvq = (struct virtnet_ctl *)RTE_PTR_ADD(vq, sz_vq);
+ cvq->vq = vq;
+ cvq->mz = mz;
+ cvq->virtio_net_hdr_mz = hdr_mz;
+ cvq->virtio_net_hdr_mem = hdr_mz->phys_addr;
+ memset(cvq->virtio_net_hdr_mz->addr, 0, PAGE_SIZE);
+ *pvq = cvq;
+ }
+
+ /* For virtio-user case (that is when dev->pci_dev is NULL), we use
+ * virtual address. And we need properly set _offset_, please see
+ * MBUF_DATA_DMA_ADDR in virtqueue.h for more information.
+ */
+ if (dev->pci_dev)
+ vq->offset = offsetof(struct rte_mbuf, buf_physaddr);
+ else {
+ vq->vq_ring_mem = (uintptr_t)mz->addr;
+ vq->offset = offsetof(struct rte_mbuf, buf_addr);
+ if (queue_type == VTNET_TQ)
+ txvq->virtio_net_hdr_mem = (uintptr_t)hdr_mz->addr;
+ else if (queue_type == VTNET_CQ)
+ cvq->virtio_net_hdr_mem = (uintptr_t)hdr_mz->addr;
+ }
+
+ if (queue_type == VTNET_TQ) {
+ struct virtio_tx_region *txr;
+ unsigned int i;
txr = hdr_mz->addr;
memset(txr, 0, vq_size * sizeof(*txr));
@@ -417,57 +482,50 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
vring_desc_init(start_dp, RTE_DIM(txr[i].tx_indir));
/* first indirect descriptor is always the tx header */
- start_dp->addr = vq->virtio_net_hdr_mem
+ start_dp->addr = txvq->virtio_net_hdr_mem
+ i * sizeof(*txr)
+ offsetof(struct virtio_tx_region, tx_hdr);
- start_dp->len = vq->hw->vtnet_hdr_size;
+ start_dp->len = hw->vtnet_hdr_size;
start_dp->flags = VRING_DESC_F_NEXT;
}
-
- } else if (queue_type == VTNET_CQ) {
- /* Allocate a page for control vq command, data and status */
- snprintf(vq_name, sizeof(vq_name), "port%d_cvq_hdrzone",
- dev->data->port_id);
- vq->virtio_net_hdr_mz = rte_memzone_reserve_aligned(vq_name,
- PAGE_SIZE, socket_id, 0, RTE_CACHE_LINE_SIZE);
- if (vq->virtio_net_hdr_mz == NULL) {
- if (rte_errno == EEXIST)
- vq->virtio_net_hdr_mz =
- rte_memzone_lookup(vq_name);
- if (vq->virtio_net_hdr_mz == NULL) {
- rte_free(vq);
- return -ENOMEM;
- }
- }
- vq->virtio_net_hdr_mem =
- vq->virtio_net_hdr_mz->phys_addr;
- memset(vq->virtio_net_hdr_mz->addr, 0, PAGE_SIZE);
}
- hw->vtpci_ops->setup_queue(hw, vq);
+ if (hw->vtpci_ops->setup_queue(hw, vq) < 0) {
+ PMD_INIT_LOG(ERR, "setup_queue failed");
+ virtio_dev_queue_release(vq);
+ return -EINVAL;
+ }
- *pvq = vq;
+ vq->configured = 1;
return 0;
+
+fail_q_alloc:
+ rte_free(sw_ring);
+ rte_memzone_free(hdr_mz);
+ rte_memzone_free(mz);
+ rte_free(vq);
+
+ return ret;
}
static int
virtio_dev_cq_queue_setup(struct rte_eth_dev *dev, uint16_t vtpci_queue_idx,
uint32_t socket_id)
{
- struct virtqueue *vq;
+ struct virtnet_ctl *cvq;
int ret;
struct virtio_hw *hw = dev->data->dev_private;
PMD_INIT_FUNC_TRACE();
ret = virtio_dev_queue_setup(dev, VTNET_CQ, VTNET_SQ_CQ_QUEUE_IDX,
- vtpci_queue_idx, 0, socket_id, &vq);
+ vtpci_queue_idx, 0, socket_id, (void **)&cvq);
if (ret < 0) {
PMD_INIT_LOG(ERR, "control vq initialization failed");
return ret;
}
- hw->cvq = vq;
+ hw->cvq = cvq;
return 0;
}
@@ -491,7 +549,6 @@ static void
virtio_dev_close(struct rte_eth_dev *dev)
{
struct virtio_hw *hw = dev->data->dev_private;
- struct rte_pci_device *pci_dev = dev->pci_dev;
PMD_INIT_LOG(DEBUG, "virtio_dev_close");
@@ -499,7 +556,7 @@ virtio_dev_close(struct rte_eth_dev *dev)
virtio_dev_stop(dev);
/* reset the NIC */
- if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)
+ if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
vtpci_irq_config(hw, VIRTIO_MSI_NO_VECTOR);
vtpci_reset(hw);
virtio_dev_free_mbufs(dev);
@@ -614,6 +671,7 @@ static const struct eth_dev_ops virtio_eth_dev_ops = {
.dev_infos_get = virtio_dev_info_get,
.stats_get = virtio_dev_stats_get,
.xstats_get = virtio_dev_xstats_get,
+ .xstats_get_names = virtio_dev_xstats_get_names,
.stats_reset = virtio_dev_stats_reset,
.xstats_reset = virtio_dev_stats_reset,
.link_update = virtio_dev_link_update,
@@ -675,83 +733,121 @@ virtio_update_stats(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
unsigned i;
for (i = 0; i < dev->data->nb_tx_queues; i++) {
- const struct virtqueue *txvq = dev->data->tx_queues[i];
+ const struct virtnet_tx *txvq = dev->data->tx_queues[i];
if (txvq == NULL)
continue;
- stats->opackets += txvq->packets;
- stats->obytes += txvq->bytes;
- stats->oerrors += txvq->errors;
+ stats->opackets += txvq->stats.packets;
+ stats->obytes += txvq->stats.bytes;
+ stats->oerrors += txvq->stats.errors;
if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
- stats->q_opackets[i] = txvq->packets;
- stats->q_obytes[i] = txvq->bytes;
+ stats->q_opackets[i] = txvq->stats.packets;
+ stats->q_obytes[i] = txvq->stats.bytes;
}
}
for (i = 0; i < dev->data->nb_rx_queues; i++) {
- const struct virtqueue *rxvq = dev->data->rx_queues[i];
+ const struct virtnet_rx *rxvq = dev->data->rx_queues[i];
if (rxvq == NULL)
continue;
- stats->ipackets += rxvq->packets;
- stats->ibytes += rxvq->bytes;
- stats->ierrors += rxvq->errors;
+ stats->ipackets += rxvq->stats.packets;
+ stats->ibytes += rxvq->stats.bytes;
+ stats->ierrors += rxvq->stats.errors;
if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
- stats->q_ipackets[i] = rxvq->packets;
- stats->q_ibytes[i] = rxvq->bytes;
+ stats->q_ipackets[i] = rxvq->stats.packets;
+ stats->q_ibytes[i] = rxvq->stats.bytes;
}
}
stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed;
}
+static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
+ struct rte_eth_xstat_name *xstats_names,
+ __rte_unused unsigned limit)
+{
+ unsigned i;
+ unsigned count = 0;
+ unsigned t;
+
+ unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_TXQ_XSTATS +
+ dev->data->nb_rx_queues * VIRTIO_NB_RXQ_XSTATS;
+
+ if (xstats_names != NULL) {
+ /* Note: limit checked in rte_eth_xstats_names() */
+
+ for (i = 0; i < dev->data->nb_rx_queues; i++) {
+ struct virtqueue *rxvq = dev->data->rx_queues[i];
+ if (rxvq == NULL)
+ continue;
+ for (t = 0; t < VIRTIO_NB_RXQ_XSTATS; t++) {
+ snprintf(xstats_names[count].name,
+ sizeof(xstats_names[count].name),
+ "rx_q%u_%s", i,
+ rte_virtio_rxq_stat_strings[t].name);
+ count++;
+ }
+ }
+
+ for (i = 0; i < dev->data->nb_tx_queues; i++) {
+ struct virtqueue *txvq = dev->data->tx_queues[i];
+ if (txvq == NULL)
+ continue;
+ for (t = 0; t < VIRTIO_NB_TXQ_XSTATS; t++) {
+ snprintf(xstats_names[count].name,
+ sizeof(xstats_names[count].name),
+ "tx_q%u_%s", i,
+ rte_virtio_txq_stat_strings[t].name);
+ count++;
+ }
+ }
+ return count;
+ }
+ return nstats;
+}
+
static int
-virtio_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstats *xstats,
+virtio_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
unsigned n)
{
unsigned i;
unsigned count = 0;
- unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_Q_XSTATS +
- dev->data->nb_rx_queues * VIRTIO_NB_Q_XSTATS;
+ unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_TXQ_XSTATS +
+ dev->data->nb_rx_queues * VIRTIO_NB_RXQ_XSTATS;
if (n < nstats)
return nstats;
for (i = 0; i < dev->data->nb_rx_queues; i++) {
- struct virtqueue *rxvq = dev->data->rx_queues[i];
+ struct virtnet_rx *rxvq = dev->data->rx_queues[i];
if (rxvq == NULL)
continue;
unsigned t;
- for (t = 0; t < VIRTIO_NB_Q_XSTATS; t++) {
- snprintf(xstats[count].name, sizeof(xstats[count].name),
- "rx_q%u_%s", i,
- rte_virtio_q_stat_strings[t].name);
+ for (t = 0; t < VIRTIO_NB_RXQ_XSTATS; t++) {
xstats[count].value = *(uint64_t *)(((char *)rxvq) +
- rte_virtio_q_stat_strings[t].offset);
+ rte_virtio_rxq_stat_strings[t].offset);
count++;
}
}
for (i = 0; i < dev->data->nb_tx_queues; i++) {
- struct virtqueue *txvq = dev->data->tx_queues[i];
+ struct virtnet_tx *txvq = dev->data->tx_queues[i];
if (txvq == NULL)
continue;
unsigned t;
- for (t = 0; t < VIRTIO_NB_Q_XSTATS; t++) {
- snprintf(xstats[count].name, sizeof(xstats[count].name),
- "tx_q%u_%s", i,
- rte_virtio_q_stat_strings[t].name);
+ for (t = 0; t < VIRTIO_NB_TXQ_XSTATS; t++) {
xstats[count].value = *(uint64_t *)(((char *)txvq) +
- rte_virtio_q_stat_strings[t].offset);
+ rte_virtio_txq_stat_strings[t].offset);
count++;
}
}
@@ -771,29 +867,31 @@ virtio_dev_stats_reset(struct rte_eth_dev *dev)
unsigned int i;
for (i = 0; i < dev->data->nb_tx_queues; i++) {
- struct virtqueue *txvq = dev->data->tx_queues[i];
+ struct virtnet_tx *txvq = dev->data->tx_queues[i];
if (txvq == NULL)
continue;
- txvq->packets = 0;
- txvq->bytes = 0;
- txvq->errors = 0;
- txvq->multicast = 0;
- txvq->broadcast = 0;
- memset(txvq->size_bins, 0, sizeof(txvq->size_bins[0]) * 8);
+ txvq->stats.packets = 0;
+ txvq->stats.bytes = 0;
+ txvq->stats.errors = 0;
+ txvq->stats.multicast = 0;
+ txvq->stats.broadcast = 0;
+ memset(txvq->stats.size_bins, 0,
+ sizeof(txvq->stats.size_bins[0]) * 8);
}
for (i = 0; i < dev->data->nb_rx_queues; i++) {
- struct virtqueue *rxvq = dev->data->rx_queues[i];
+ struct virtnet_rx *rxvq = dev->data->rx_queues[i];
if (rxvq == NULL)
continue;
- rxvq->packets = 0;
- rxvq->bytes = 0;
- rxvq->errors = 0;
- rxvq->multicast = 0;
- rxvq->broadcast = 0;
- memset(rxvq->size_bins, 0, sizeof(rxvq->size_bins[0]) * 8);
+ rxvq->stats.packets = 0;
+ rxvq->stats.bytes = 0;
+ rxvq->stats.errors = 0;
+ rxvq->stats.multicast = 0;
+ rxvq->stats.broadcast = 0;
+ memset(rxvq->stats.size_bins, 0,
+ sizeof(rxvq->stats.size_bins[0]) * 8);
}
}
@@ -827,7 +925,7 @@ virtio_mac_table_set(struct virtio_hw *hw,
int err, len[2];
if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
- PMD_DRV_LOG(INFO, "host does not support mac table\n");
+ PMD_DRV_LOG(INFO, "host does not support mac table");
return;
}
@@ -1027,16 +1125,17 @@ rx_func_get(struct rte_eth_dev *eth_dev)
* This function is based on probe() function in virtio_pci.c
* It returns 0 on success.
*/
-static int
+int
eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
{
struct virtio_hw *hw = eth_dev->data->dev_private;
struct virtio_net_config *config;
struct virtio_net_config local_config;
struct rte_pci_device *pci_dev;
+ uint32_t dev_flags = RTE_ETH_DEV_DETACHABLE;
int ret;
- RTE_BUILD_BUG_ON(RTE_PKTMBUF_HEADROOM < sizeof(struct virtio_net_hdr));
+ RTE_BUILD_BUG_ON(RTE_PKTMBUF_HEADROOM < sizeof(struct virtio_net_hdr_mrg_rxbuf));
eth_dev->dev_ops = &virtio_eth_dev_ops;
eth_dev->tx_pkt_burst = &virtio_xmit_pkts;
@@ -1057,9 +1156,11 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
pci_dev = eth_dev->pci_dev;
- ret = vtpci_init(pci_dev, hw);
- if (ret)
- return ret;
+ if (pci_dev) {
+ ret = vtpci_init(pci_dev, hw, &dev_flags);
+ if (ret)
+ return ret;
+ }
/* Reset the device although not necessary at startup */
vtpci_reset(hw);
@@ -1074,9 +1175,10 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
/* If host does not support status then disable LSC */
if (!vtpci_with_feature(hw, VIRTIO_NET_F_STATUS))
- pci_dev->driver->drv_flags &= ~RTE_PCI_DRV_INTR_LSC;
+ dev_flags &= ~RTE_ETH_DEV_INTR_LSC;
rte_eth_copy_pci_info(eth_dev, pci_dev);
+ eth_dev->data->dev_flags = dev_flags;
rx_func_get(eth_dev);
@@ -1150,12 +1252,13 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
PMD_INIT_LOG(DEBUG, "hw->max_rx_queues=%d hw->max_tx_queues=%d",
hw->max_rx_queues, hw->max_tx_queues);
- PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x",
+ if (pci_dev)
+ PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x",
eth_dev->data->port_id, pci_dev->id.vendor_id,
pci_dev->id.device_id);
/* Setup interrupt callback */
- if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)
+ if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
rte_intr_callback_register(&pci_dev->intr_handle,
virtio_interrupt_handler, eth_dev);
@@ -1184,13 +1287,14 @@ eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev)
eth_dev->tx_pkt_burst = NULL;
eth_dev->rx_pkt_burst = NULL;
- virtio_dev_queue_release(hw->cvq);
+ if (hw->cvq)
+ virtio_dev_queue_release(hw->cvq->vq);
rte_free(eth_dev->data->mac_addrs);
eth_dev->data->mac_addrs = NULL;
/* reset interrupt callback */
- if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)
+ if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
rte_intr_callback_unregister(&pci_dev->intr_handle,
virtio_interrupt_handler,
eth_dev);
@@ -1240,7 +1344,6 @@ virtio_dev_configure(struct rte_eth_dev *dev)
{
const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
struct virtio_hw *hw = dev->data->dev_private;
- struct rte_pci_device *pci_dev = dev->pci_dev;
PMD_INIT_LOG(DEBUG, "configure");
@@ -1258,7 +1361,7 @@ virtio_dev_configure(struct rte_eth_dev *dev)
return -ENOTSUP;
}
- if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)
+ if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
if (vtpci_irq_config(hw, 0) == VIRTIO_MSI_NO_VECTOR) {
PMD_DRV_LOG(ERR, "failed to set config vector");
return -EBUSY;
@@ -1273,11 +1376,12 @@ virtio_dev_start(struct rte_eth_dev *dev)
{
uint16_t nb_queues, i;
struct virtio_hw *hw = dev->data->dev_private;
- struct rte_pci_device *pci_dev = dev->pci_dev;
+ struct virtnet_rx *rxvq;
+ struct virtnet_tx *txvq __rte_unused;
/* check if lsc interrupt feature is enabled */
if (dev->data->dev_conf.intr_conf.lsc) {
- if (!(pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)) {
+ if (!(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
PMD_DRV_LOG(ERR, "link status not supported by host");
return -ENOTSUP;
}
@@ -1313,16 +1417,22 @@ virtio_dev_start(struct rte_eth_dev *dev)
PMD_INIT_LOG(DEBUG, "nb_queues=%d", nb_queues);
- for (i = 0; i < nb_queues; i++)
- virtqueue_notify(dev->data->rx_queues[i]);
+ for (i = 0; i < nb_queues; i++) {
+ rxvq = dev->data->rx_queues[i];
+ virtqueue_notify(rxvq->vq);
+ }
PMD_INIT_LOG(DEBUG, "Notified backend at initialization");
- for (i = 0; i < dev->data->nb_rx_queues; i++)
- VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]);
+ for (i = 0; i < dev->data->nb_rx_queues; i++) {
+ rxvq = dev->data->rx_queues[i];
+ VIRTQUEUE_DUMP(rxvq->vq);
+ }
- for (i = 0; i < dev->data->nb_tx_queues; i++)
- VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]);
+ for (i = 0; i < dev->data->nb_tx_queues; i++) {
+ txvq = dev->data->tx_queues[i];
+ VIRTQUEUE_DUMP(txvq->vq);
+ }
return 0;
}
@@ -1333,14 +1443,14 @@ static void virtio_dev_free_mbufs(struct rte_eth_dev *dev)
int i, mbuf_num = 0;
for (i = 0; i < dev->data->nb_rx_queues; i++) {
+ struct virtnet_rx *rxvq = dev->data->rx_queues[i];
+
PMD_INIT_LOG(DEBUG,
"Before freeing rxq[%d] used and unused buf", i);
- VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]);
+ VIRTQUEUE_DUMP(rxvq->vq);
- PMD_INIT_LOG(DEBUG, "rx_queues[%d]=%p",
- i, dev->data->rx_queues[i]);
- while ((buf = (struct rte_mbuf *)virtqueue_detatch_unused(
- dev->data->rx_queues[i])) != NULL) {
+ PMD_INIT_LOG(DEBUG, "rx_queues[%d]=%p", i, rxvq);
+ while ((buf = virtqueue_detatch_unused(rxvq->vq)) != NULL) {
rte_pktmbuf_free(buf);
mbuf_num++;
}
@@ -1348,27 +1458,27 @@ static void virtio_dev_free_mbufs(struct rte_eth_dev *dev)
PMD_INIT_LOG(DEBUG, "free %d mbufs", mbuf_num);
PMD_INIT_LOG(DEBUG,
"After freeing rxq[%d] used and unused buf", i);
- VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]);
+ VIRTQUEUE_DUMP(rxvq->vq);
}
for (i = 0; i < dev->data->nb_tx_queues; i++) {
+ struct virtnet_tx *txvq = dev->data->tx_queues[i];
+
PMD_INIT_LOG(DEBUG,
"Before freeing txq[%d] used and unused bufs",
i);
- VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]);
+ VIRTQUEUE_DUMP(txvq->vq);
mbuf_num = 0;
- while ((buf = (struct rte_mbuf *)virtqueue_detatch_unused(
- dev->data->tx_queues[i])) != NULL) {
+ while ((buf = virtqueue_detatch_unused(txvq->vq)) != NULL) {
rte_pktmbuf_free(buf);
-
mbuf_num++;
}
PMD_INIT_LOG(DEBUG, "free %d mbufs", mbuf_num);
PMD_INIT_LOG(DEBUG,
"After freeing txq[%d] used and unused buf", i);
- VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]);
+ VIRTQUEUE_DUMP(txvq->vq);
}
}
@@ -1431,7 +1541,10 @@ virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
{
struct virtio_hw *hw = dev->data->dev_private;
- dev_info->driver_name = dev->driver->pci_drv.name;
+ if (dev->pci_dev)
+ dev_info->driver_name = dev->driver->pci_drv.name;
+ else
+ dev_info->driver_name = "virtio-user PMD";
dev_info->max_rx_queues = (uint16_t)hw->max_rx_queues;
dev_info->max_tx_queues = (uint16_t)hw->max_tx_queues;
dev_info->min_rx_bufsize = VIRTIO_MIN_RX_BUFSIZE;
diff --git a/drivers/net/virtio/virtio_ethdev.h b/drivers/net/virtio/virtio_ethdev.h
index 66423a07..2ecec6eb 100644
--- a/drivers/net/virtio/virtio_ethdev.h
+++ b/drivers/net/virtio/virtio_ethdev.h
@@ -81,7 +81,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
uint16_t vtpci_queue_idx,
uint16_t nb_desc,
unsigned int socket_id,
- struct virtqueue **pvq);
+ void **pvq);
void virtio_dev_queue_release(struct virtqueue *vq);
@@ -113,6 +113,8 @@ uint16_t virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
uint16_t virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);
+int eth_virtio_dev_init(struct rte_eth_dev *eth_dev);
+
/*
* The VIRTIO_NET_F_GUEST_TSO[46] features permit the host to send us
* frames larger than 1514 bytes. We do not yet support software LRO
diff --git a/drivers/net/virtio/virtio_logs.h b/drivers/net/virtio/virtio_logs.h
index d6c33f7b..90a79eaa 100644
--- a/drivers/net/virtio/virtio_logs.h
+++ b/drivers/net/virtio/virtio_logs.h
@@ -47,14 +47,14 @@
#ifdef RTE_LIBRTE_VIRTIO_DEBUG_RX
#define PMD_RX_LOG(level, fmt, args...) \
- RTE_LOG(level, PMD, "%s() rx: " fmt , __func__, ## args)
+ RTE_LOG(level, PMD, "%s() rx: " fmt "\n", __func__, ## args)
#else
#define PMD_RX_LOG(level, fmt, args...) do { } while(0)
#endif
#ifdef RTE_LIBRTE_VIRTIO_DEBUG_TX
#define PMD_TX_LOG(level, fmt, args...) \
- RTE_LOG(level, PMD, "%s() tx: " fmt , __func__, ## args)
+ RTE_LOG(level, PMD, "%s() tx: " fmt "\n", __func__, ## args)
#else
#define PMD_TX_LOG(level, fmt, args...) do { } while(0)
#endif
@@ -62,7 +62,7 @@
#ifdef RTE_LIBRTE_VIRTIO_DEBUG_DRIVER
#define PMD_DRV_LOG(level, fmt, args...) \
- RTE_LOG(level, PMD, "%s(): " fmt , __func__, ## args)
+ RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args)
#else
#define PMD_DRV_LOG(level, fmt, args...) do { } while(0)
#endif
diff --git a/drivers/net/virtio/virtio_pci.c b/drivers/net/virtio/virtio_pci.c
index c007959f..f1a7ca7e 100644
--- a/drivers/net/virtio/virtio_pci.c
+++ b/drivers/net/virtio/virtio_pci.c
@@ -55,20 +55,103 @@
*/
#define VIRTIO_PCI_CONFIG(hw) (((hw)->use_msix) ? 24 : 20)
+static inline int
+check_vq_phys_addr_ok(struct virtqueue *vq)
+{
+ /* Virtio PCI device VIRTIO_PCI_QUEUE_PF register is 32bit,
+ * and only accepts 32 bit page frame number.
+ * Check if the allocated physical memory exceeds 16TB.
+ */
+ if ((vq->vq_ring_mem + vq->vq_ring_size - 1) >>
+ (VIRTIO_PCI_QUEUE_ADDR_SHIFT + 32)) {
+ PMD_INIT_LOG(ERR, "vring address shouldn't be above 16TB!");
+ return 0;
+ }
+
+ return 1;
+}
+
+/*
+ * Since we are in legacy mode:
+ * http://ozlabs.org/~rusty/virtio-spec/virtio-0.9.5.pdf
+ *
+ * "Note that this is possible because while the virtio header is PCI (i.e.
+ * little) endian, the device-specific region is encoded in the native endian of
+ * the guest (where such distinction is applicable)."
+ *
+ * For powerpc which supports both, qemu supposes that cpu is big endian and
+ * enforces this for the virtio-net stuff.
+ */
static void
legacy_read_dev_config(struct virtio_hw *hw, size_t offset,
void *dst, int length)
{
+#ifdef RTE_ARCH_PPC_64
+ int size;
+
+ while (length > 0) {
+ if (length >= 4) {
+ size = 4;
+ rte_eal_pci_ioport_read(&hw->io, dst, size,
+ VIRTIO_PCI_CONFIG(hw) + offset);
+ *(uint32_t *)dst = rte_be_to_cpu_32(*(uint32_t *)dst);
+ } else if (length >= 2) {
+ size = 2;
+ rte_eal_pci_ioport_read(&hw->io, dst, size,
+ VIRTIO_PCI_CONFIG(hw) + offset);
+ *(uint16_t *)dst = rte_be_to_cpu_16(*(uint16_t *)dst);
+ } else {
+ size = 1;
+ rte_eal_pci_ioport_read(&hw->io, dst, size,
+ VIRTIO_PCI_CONFIG(hw) + offset);
+ }
+
+ dst = (char *)dst + size;
+ offset += size;
+ length -= size;
+ }
+#else
rte_eal_pci_ioport_read(&hw->io, dst, length,
VIRTIO_PCI_CONFIG(hw) + offset);
+#endif
}
static void
legacy_write_dev_config(struct virtio_hw *hw, size_t offset,
const void *src, int length)
{
+#ifdef RTE_ARCH_PPC_64
+ union {
+ uint32_t u32;
+ uint16_t u16;
+ } tmp;
+ int size;
+
+ while (length > 0) {
+ if (length >= 4) {
+ size = 4;
+ tmp.u32 = rte_cpu_to_be_32(*(const uint32_t *)src);
+ rte_eal_pci_ioport_write(&hw->io, &tmp.u32, size,
+ VIRTIO_PCI_CONFIG(hw) + offset);
+ } else if (length >= 2) {
+ size = 2;
+ tmp.u16 = rte_cpu_to_be_16(*(const uint16_t *)src);
+ rte_eal_pci_ioport_write(&hw->io, &tmp.u16, size,
+ VIRTIO_PCI_CONFIG(hw) + offset);
+ } else {
+ size = 1;
+ rte_eal_pci_ioport_write(&hw->io, src, size,
+ VIRTIO_PCI_CONFIG(hw) + offset);
+ }
+
+ src = (const char *)src + size;
+ offset += size;
+ length -= size;
+ }
+#else
rte_eal_pci_ioport_write(&hw->io, src, length,
VIRTIO_PCI_CONFIG(hw) + offset);
+#endif
}
static uint64_t
@@ -143,15 +226,20 @@ legacy_get_queue_num(struct virtio_hw *hw, uint16_t queue_id)
return dst;
}
-static void
+static int
legacy_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
{
uint32_t src;
+ if (!check_vq_phys_addr_ok(vq))
+ return -1;
+
rte_eal_pci_ioport_write(&hw->io, &vq->vq_queue_index, 2,
VIRTIO_PCI_QUEUE_SEL);
- src = vq->mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT;
+ src = vq->vq_ring_mem >> VIRTIO_PCI_QUEUE_ADDR_SHIFT;
rte_eal_pci_ioport_write(&hw->io, &src, 4, VIRTIO_PCI_QUEUE_PFN);
+
+ return 0;
}
static void
@@ -179,7 +267,7 @@ legacy_virtio_has_msix(const struct rte_pci_addr *loc)
char dirname[PATH_MAX];
snprintf(dirname, sizeof(dirname),
- SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/msi_irqs",
+ "%s/" PCI_PRI_FMT "/msi_irqs", pci_get_sysfs_path(),
loc->domain, loc->bus, loc->devid, loc->function);
d = opendir(dirname);
@@ -199,15 +287,15 @@ legacy_virtio_has_msix(const struct rte_pci_addr *loc __rte_unused)
static int
legacy_virtio_resource_init(struct rte_pci_device *pci_dev,
- struct virtio_hw *hw)
+ struct virtio_hw *hw, uint32_t *dev_flags)
{
if (rte_eal_pci_ioport_map(pci_dev, 0, &hw->io) < 0)
return -1;
if (pci_dev->intr_handle.type != RTE_INTR_HANDLE_UNKNOWN)
- pci_dev->driver->drv_flags |= RTE_PCI_DRV_INTR_LSC;
+ *dev_flags |= RTE_ETH_DEV_INTR_LSC;
else
- pci_dev->driver->drv_flags &= ~RTE_PCI_DRV_INTR_LSC;
+ *dev_flags &= ~RTE_ETH_DEV_INTR_LSC;
return 0;
}
@@ -367,13 +455,16 @@ modern_get_queue_num(struct virtio_hw *hw, uint16_t queue_id)
return io_read16(&hw->common_cfg->queue_size);
}
-static void
+static int
modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
{
uint64_t desc_addr, avail_addr, used_addr;
uint16_t notify_off;
- desc_addr = vq->mz->phys_addr;
+ if (!check_vq_phys_addr_ok(vq))
+ return -1;
+
+ desc_addr = vq->vq_ring_mem;
avail_addr = desc_addr + vq->vq_nentries * sizeof(struct vring_desc);
used_addr = RTE_ALIGN_CEIL(avail_addr + offsetof(struct vring_avail,
ring[vq->vq_nentries]),
@@ -400,6 +491,8 @@ modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
PMD_INIT_LOG(DEBUG, "\t used_addr: %" PRIx64, used_addr);
PMD_INIT_LOG(DEBUG, "\t notify addr: %p (notify offset: %u)",
vq->notify_addr, notify_off);
+
+ return 0;
}
static void
@@ -626,11 +719,13 @@ next:
* Return -1:
* if there is error mapping with VFIO/UIO.
* if port map error when driver type is KDRV_NONE.
+ * if whitelisted but driver type is KDRV_UNKNOWN.
* Return 1 if kernel driver is managing the device.
* Return 0 on success.
*/
int
-vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw)
+vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw,
+ uint32_t *dev_flags)
{
hw->dev = dev;
@@ -643,14 +738,15 @@ vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw)
PMD_INIT_LOG(INFO, "modern virtio pci detected.");
hw->vtpci_ops = &modern_ops;
hw->modern = 1;
- dev->driver->drv_flags |= RTE_PCI_DRV_INTR_LSC;
+ *dev_flags |= RTE_ETH_DEV_INTR_LSC;
return 0;
}
PMD_INIT_LOG(INFO, "trying with legacy virtio pci.");
- if (legacy_virtio_resource_init(dev, hw) < 0) {
+ if (legacy_virtio_resource_init(dev, hw, dev_flags) < 0) {
if (dev->kdrv == RTE_KDRV_UNKNOWN &&
- dev->devargs->type != RTE_DEVTYPE_WHITELISTED_PCI) {
+ (!dev->devargs ||
+ dev->devargs->type != RTE_DEVTYPE_WHITELISTED_PCI)) {
PMD_INIT_LOG(INFO,
"skip kernel managed virtio device.");
return 1;
diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
index b69785ea..dd7693fe 100644
--- a/drivers/net/virtio/virtio_pci.h
+++ b/drivers/net/virtio/virtio_pci.h
@@ -40,6 +40,7 @@
#include <rte_ethdev.h>
struct virtqueue;
+struct virtnet_ctl;
/* VirtIO PCI vendor/device ID. */
#define VIRTIO_PCI_VENDORID 0x1AF4
@@ -234,7 +235,7 @@ struct virtio_pci_ops {
uint16_t (*set_config_irq)(struct virtio_hw *hw, uint16_t vec);
uint16_t (*get_queue_num)(struct virtio_hw *hw, uint16_t queue_id);
- void (*setup_queue)(struct virtio_hw *hw, struct virtqueue *vq);
+ int (*setup_queue)(struct virtio_hw *hw, struct virtqueue *vq);
void (*del_queue)(struct virtio_hw *hw, struct virtqueue *vq);
void (*notify_queue)(struct virtio_hw *hw, struct virtqueue *vq);
};
@@ -242,7 +243,7 @@ struct virtio_pci_ops {
struct virtio_net_config;
struct virtio_hw {
- struct virtqueue *cvq;
+ struct virtnet_ctl *cvq;
struct rte_pci_ioport io;
uint64_t guest_features;
uint32_t max_tx_queues;
@@ -260,6 +261,7 @@ struct virtio_hw {
struct virtio_pci_common_cfg *common_cfg;
struct virtio_net_config *dev_cfg;
const struct virtio_pci_ops *vtpci_ops;
+ void *virtio_user_dev;
};
/*
@@ -293,7 +295,8 @@ vtpci_with_feature(struct virtio_hw *hw, uint64_t bit)
/*
* Function declaration from virtio_pci.c
*/
-int vtpci_init(struct rte_pci_device *, struct virtio_hw *);
+int vtpci_init(struct rte_pci_device *, struct virtio_hw *,
+ uint32_t *dev_flags);
void vtpci_reset(struct virtio_hw *);
void vtpci_reinit_complete(struct virtio_hw *);
diff --git a/drivers/net/virtio/virtio_ring.h b/drivers/net/virtio/virtio_ring.h
index 447760a8..fcecc161 100644
--- a/drivers/net/virtio/virtio_ring.h
+++ b/drivers/net/virtio/virtio_ring.h
@@ -79,7 +79,7 @@ struct vring_used_elem {
struct vring_used {
uint16_t flags;
- uint16_t idx;
+ volatile uint16_t idx;
struct vring_used_elem ring[0];
};
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index ef21d8e3..a27208e3 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -193,8 +193,7 @@ virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie)
start_dp = vq->vq_ring.desc;
start_dp[idx].addr =
- (uint64_t)(cookie->buf_physaddr + RTE_PKTMBUF_HEADROOM
- - hw->vtnet_hdr_size);
+ MBUF_DATA_DMA_ADDR(cookie, vq->offset) - hw->vtnet_hdr_size;
start_dp[idx].len =
cookie->buf_len - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size;
start_dp[idx].flags = VRING_DESC_F_WRITE;
@@ -209,23 +208,24 @@ virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie)
}
static inline void
-virtqueue_enqueue_xmit(struct virtqueue *txvq, struct rte_mbuf *cookie,
+virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
uint16_t needed, int use_indirect, int can_push)
{
struct vq_desc_extra *dxp;
+ struct virtqueue *vq = txvq->vq;
struct vring_desc *start_dp;
uint16_t seg_num = cookie->nb_segs;
uint16_t head_idx, idx;
- uint16_t head_size = txvq->hw->vtnet_hdr_size;
+ uint16_t head_size = vq->hw->vtnet_hdr_size;
unsigned long offs;
- head_idx = txvq->vq_desc_head_idx;
+ head_idx = vq->vq_desc_head_idx;
idx = head_idx;
- dxp = &txvq->vq_descx[idx];
+ dxp = &vq->vq_descx[idx];
dxp->cookie = (void *)cookie;
dxp->ndescs = needed;
- start_dp = txvq->vq_ring.desc;
+ start_dp = vq->vq_ring.desc;
if (can_push) {
/* put on zero'd transmit header (no offloads) */
@@ -259,46 +259,32 @@ virtqueue_enqueue_xmit(struct virtqueue *txvq, struct rte_mbuf *cookie,
+ offsetof(struct virtio_tx_region, tx_hdr);
start_dp[idx].addr = txvq->virtio_net_hdr_mem + offs;
- start_dp[idx].len = txvq->hw->vtnet_hdr_size;
+ start_dp[idx].len = vq->hw->vtnet_hdr_size;
start_dp[idx].flags = VRING_DESC_F_NEXT;
idx = start_dp[idx].next;
}
do {
- start_dp[idx].addr = rte_mbuf_data_dma_addr(cookie);
+ start_dp[idx].addr = MBUF_DATA_DMA_ADDR(cookie, vq->offset);
start_dp[idx].len = cookie->data_len;
start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0;
idx = start_dp[idx].next;
} while ((cookie = cookie->next) != NULL);
- start_dp[idx].flags &= ~VRING_DESC_F_NEXT;
-
if (use_indirect)
- idx = txvq->vq_ring.desc[head_idx].next;
-
- txvq->vq_desc_head_idx = idx;
- if (txvq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
- txvq->vq_desc_tail_idx = idx;
- txvq->vq_free_cnt = (uint16_t)(txvq->vq_free_cnt - needed);
- vq_update_avail_ring(txvq, head_idx);
-}
+ idx = vq->vq_ring.desc[head_idx].next;
-static inline struct rte_mbuf *
-rte_rxmbuf_alloc(struct rte_mempool *mp)
-{
- struct rte_mbuf *m;
-
- m = __rte_mbuf_raw_alloc(mp);
- __rte_mbuf_sanity_check_raw(m, 0);
-
- return m;
+ vq->vq_desc_head_idx = idx;
+ if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
+ vq->vq_desc_tail_idx = idx;
+ vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
+ vq_update_avail_ring(vq, head_idx);
}
static void
-virtio_dev_vring_start(struct virtqueue *vq, int queue_type)
+virtio_dev_vring_start(struct virtqueue *vq)
{
- struct rte_mbuf *m;
- int i, nbufs, error, size = vq->vq_nentries;
+ int size = vq->vq_nentries;
struct vring *vr = &vq->vq_ring;
uint8_t *ring_mem = vq->vq_ring_virt_mem;
@@ -322,30 +308,70 @@ virtio_dev_vring_start(struct virtqueue *vq, int queue_type)
* Disable device(host) interrupting guest
*/
virtqueue_disable_intr(vq);
+}
+
+void
+virtio_dev_cq_start(struct rte_eth_dev *dev)
+{
+ struct virtio_hw *hw = dev->data->dev_private;
+
+ if (hw->cvq && hw->cvq->vq) {
+ virtio_dev_vring_start(hw->cvq->vq);
+ VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq);
+ }
+}
- /* Only rx virtqueue needs mbufs to be allocated at initialization */
- if (queue_type == VTNET_RQ) {
- if (vq->mpool == NULL)
+void
+virtio_dev_rxtx_start(struct rte_eth_dev *dev)
+{
+ /*
+ * Start receive and transmit vrings
+ * - Setup vring structure for all queues
+ * - Initialize descriptor for the rx vring
+ * - Allocate blank mbufs for the each rx descriptor
+ *
+ */
+ uint16_t i;
+ uint16_t desc_idx;
+
+ PMD_INIT_FUNC_TRACE();
+
+ /* Start rx vring. */
+ for (i = 0; i < dev->data->nb_rx_queues; i++) {
+ struct virtnet_rx *rxvq = dev->data->rx_queues[i];
+ struct virtqueue *vq = rxvq->vq;
+ int error, nbufs;
+ struct rte_mbuf *m;
+
+ virtio_dev_vring_start(vq);
+ if (rxvq->mpool == NULL) {
rte_exit(EXIT_FAILURE,
- "Cannot allocate initial mbufs for rx virtqueue");
+ "Cannot allocate mbufs for rx virtqueue");
+ }
/* Allocate blank mbufs for the each rx descriptor */
nbufs = 0;
error = ENOSPC;
#ifdef RTE_MACHINE_CPUFLAG_SSSE3
- if (use_simple_rxtx)
- for (i = 0; i < vq->vq_nentries; i++) {
- vq->vq_ring.avail->ring[i] = i;
- vq->vq_ring.desc[i].flags = VRING_DESC_F_WRITE;
+ if (use_simple_rxtx) {
+ for (desc_idx = 0; desc_idx < vq->vq_nentries;
+ desc_idx++) {
+ vq->vq_ring.avail->ring[desc_idx] = desc_idx;
+ vq->vq_ring.desc[desc_idx].flags =
+ VRING_DESC_F_WRITE;
}
+ }
#endif
- memset(&vq->fake_mbuf, 0, sizeof(vq->fake_mbuf));
- for (i = 0; i < RTE_PMD_VIRTIO_RX_MAX_BURST; i++)
- vq->sw_ring[vq->vq_nentries + i] = &vq->fake_mbuf;
+ memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf));
+ for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST;
+ desc_idx++) {
+ vq->sw_ring[vq->vq_nentries + desc_idx] =
+ &rxvq->fake_mbuf;
+ }
while (!virtqueue_full(vq)) {
- m = rte_rxmbuf_alloc(vq->mpool);
+ m = rte_mbuf_raw_alloc(rxvq->mpool);
if (m == NULL)
break;
@@ -368,64 +394,40 @@ virtio_dev_vring_start(struct virtqueue *vq, int queue_type)
vq_update_avail_idx(vq);
PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs);
- } else if (queue_type == VTNET_TQ) {
+
+ VIRTQUEUE_DUMP(vq);
+ }
+
+ /* Start tx vring. */
+ for (i = 0; i < dev->data->nb_tx_queues; i++) {
+ struct virtnet_tx *txvq = dev->data->tx_queues[i];
+ struct virtqueue *vq = txvq->vq;
+
+ virtio_dev_vring_start(vq);
#ifdef RTE_MACHINE_CPUFLAG_SSSE3
if (use_simple_rxtx) {
- int mid_idx = vq->vq_nentries >> 1;
- for (i = 0; i < mid_idx; i++) {
- vq->vq_ring.avail->ring[i] = i + mid_idx;
- vq->vq_ring.desc[i + mid_idx].next = i;
- vq->vq_ring.desc[i + mid_idx].addr =
- vq->virtio_net_hdr_mem +
+ uint16_t mid_idx = vq->vq_nentries >> 1;
+
+ for (desc_idx = 0; desc_idx < mid_idx; desc_idx++) {
+ vq->vq_ring.avail->ring[desc_idx] =
+ desc_idx + mid_idx;
+ vq->vq_ring.desc[desc_idx + mid_idx].next =
+ desc_idx;
+ vq->vq_ring.desc[desc_idx + mid_idx].addr =
+ txvq->virtio_net_hdr_mem +
offsetof(struct virtio_tx_region, tx_hdr);
- vq->vq_ring.desc[i + mid_idx].len =
+ vq->vq_ring.desc[desc_idx + mid_idx].len =
vq->hw->vtnet_hdr_size;
- vq->vq_ring.desc[i + mid_idx].flags =
+ vq->vq_ring.desc[desc_idx + mid_idx].flags =
VRING_DESC_F_NEXT;
- vq->vq_ring.desc[i].flags = 0;
+ vq->vq_ring.desc[desc_idx].flags = 0;
}
- for (i = mid_idx; i < vq->vq_nentries; i++)
- vq->vq_ring.avail->ring[i] = i;
+ for (desc_idx = mid_idx; desc_idx < vq->vq_nentries;
+ desc_idx++)
+ vq->vq_ring.avail->ring[desc_idx] = desc_idx;
}
#endif
- }
-}
-
-void
-virtio_dev_cq_start(struct rte_eth_dev *dev)
-{
- struct virtio_hw *hw = dev->data->dev_private;
-
- if (hw->cvq) {
- virtio_dev_vring_start(hw->cvq, VTNET_CQ);
- VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq);
- }
-}
-
-void
-virtio_dev_rxtx_start(struct rte_eth_dev *dev)
-{
- /*
- * Start receive and transmit vrings
- * - Setup vring structure for all queues
- * - Initialize descriptor for the rx vring
- * - Allocate blank mbufs for the each rx descriptor
- *
- */
- int i;
-
- PMD_INIT_FUNC_TRACE();
-
- /* Start rx vring. */
- for (i = 0; i < dev->data->nb_rx_queues; i++) {
- virtio_dev_vring_start(dev->data->rx_queues[i], VTNET_RQ);
- VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]);
- }
-
- /* Start tx vring. */
- for (i = 0; i < dev->data->nb_tx_queues; i++) {
- virtio_dev_vring_start(dev->data->tx_queues[i], VTNET_TQ);
- VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]);
+ VIRTQUEUE_DUMP(vq);
}
}
@@ -438,24 +440,24 @@ virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
struct rte_mempool *mp)
{
uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
- struct virtqueue *vq;
+ struct virtnet_rx *rxvq;
int ret;
PMD_INIT_FUNC_TRACE();
ret = virtio_dev_queue_setup(dev, VTNET_RQ, queue_idx, vtpci_queue_idx,
- nb_desc, socket_id, &vq);
+ nb_desc, socket_id, (void **)&rxvq);
if (ret < 0) {
PMD_INIT_LOG(ERR, "rvq initialization failed");
return ret;
}
/* Create mempool for rx mbuf allocation */
- vq->mpool = mp;
+ rxvq->mpool = mp;
- dev->data->rx_queues[queue_idx] = vq;
+ dev->data->rx_queues[queue_idx] = rxvq;
#ifdef RTE_MACHINE_CPUFLAG_SSSE3
- virtio_rxq_vec_setup(vq);
+ virtio_rxq_vec_setup(rxvq);
#endif
return 0;
@@ -464,7 +466,16 @@ virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
void
virtio_dev_rx_queue_release(void *rxq)
{
- virtio_dev_queue_release(rxq);
+ struct virtnet_rx *rxvq = rxq;
+ struct virtqueue *vq = rxvq->vq;
+ /* rxvq is freed when vq is freed, and as mz should be freed after the
+ * del_queue, so we reserve the mz pointer first.
+ */
+ const struct rte_memzone *mz = rxvq->mz;
+
+ /* no need to free rxq as vq and rxq are allocated together */
+ virtio_dev_queue_release(vq);
+ rte_memzone_free(mz);
}
/*
@@ -486,6 +497,7 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
#ifdef RTE_MACHINE_CPUFLAG_SSSE3
struct virtio_hw *hw = dev->data->dev_private;
#endif
+ struct virtnet_tx *txvq;
struct virtqueue *vq;
uint16_t tx_free_thresh;
int ret;
@@ -510,11 +522,12 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
#endif
ret = virtio_dev_queue_setup(dev, VTNET_TQ, queue_idx, vtpci_queue_idx,
- nb_desc, socket_id, &vq);
+ nb_desc, socket_id, (void **)&txvq);
if (ret < 0) {
- PMD_INIT_LOG(ERR, "rvq initialization failed");
+ PMD_INIT_LOG(ERR, "tvq initialization failed");
return ret;
}
+ vq = txvq->vq;
tx_free_thresh = tx_conf->tx_free_thresh;
if (tx_free_thresh == 0)
@@ -532,14 +545,24 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
vq->vq_free_thresh = tx_free_thresh;
- dev->data->tx_queues[queue_idx] = vq;
+ dev->data->tx_queues[queue_idx] = txvq;
return 0;
}
void
virtio_dev_tx_queue_release(void *txq)
{
- virtio_dev_queue_release(txq);
+ struct virtnet_tx *txvq = txq;
+ struct virtqueue *vq = txvq->vq;
+ /* txvq is freed when vq is freed, and as mz should be freed after the
+ * del_queue, so we reserve the mz pointer first.
+ */
+ const struct rte_memzone *hdr_mz = txvq->virtio_net_hdr_mz;
+ const struct rte_memzone *mz = txvq->mz;
+
+ virtio_dev_queue_release(vq);
+ rte_memzone_free(mz);
+ rte_memzone_free(hdr_mz);
}
static void
@@ -558,34 +581,34 @@ virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m)
}
static void
-virtio_update_packet_stats(struct virtqueue *vq, struct rte_mbuf *mbuf)
+virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf)
{
uint32_t s = mbuf->pkt_len;
struct ether_addr *ea;
if (s == 64) {
- vq->size_bins[1]++;
+ stats->size_bins[1]++;
} else if (s > 64 && s < 1024) {
uint32_t bin;
/* count zeros, and offset into correct bin */
bin = (sizeof(s) * 8) - __builtin_clz(s) - 5;
- vq->size_bins[bin]++;
+ stats->size_bins[bin]++;
} else {
if (s < 64)
- vq->size_bins[0]++;
+ stats->size_bins[0]++;
else if (s < 1519)
- vq->size_bins[6]++;
+ stats->size_bins[6]++;
else if (s >= 1519)
- vq->size_bins[7]++;
+ stats->size_bins[7]++;
}
ea = rte_pktmbuf_mtod(mbuf, struct ether_addr *);
if (is_multicast_ether_addr(ea)) {
if (is_broadcast_ether_addr(ea))
- vq->broadcast++;
+ stats->broadcast++;
else
- vq->multicast++;
+ stats->multicast++;
}
}
@@ -594,7 +617,8 @@ virtio_update_packet_stats(struct virtqueue *vq, struct rte_mbuf *mbuf)
uint16_t
virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
{
- struct virtqueue *rxvq = rx_queue;
+ struct virtnet_rx *rxvq = rx_queue;
+ struct virtqueue *vq = rxvq->vq;
struct virtio_hw *hw;
struct rte_mbuf *rxm, *new_mbuf;
uint16_t nb_used, num, nb_rx;
@@ -604,19 +628,19 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
uint32_t i, nb_enqueued;
uint32_t hdr_size;
- nb_used = VIRTQUEUE_NUSED(rxvq);
+ nb_used = VIRTQUEUE_NUSED(vq);
virtio_rmb();
num = (uint16_t)(likely(nb_used <= nb_pkts) ? nb_used : nb_pkts);
num = (uint16_t)(likely(num <= VIRTIO_MBUF_BURST_SZ) ? num : VIRTIO_MBUF_BURST_SZ);
if (likely(num > DESC_PER_CACHELINE))
- num = num - ((rxvq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
+ num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
- num = virtqueue_dequeue_burst_rx(rxvq, rcv_pkts, len, num);
+ num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num);
- hw = rxvq->hw;
+ hw = vq->hw;
nb_rx = 0;
nb_enqueued = 0;
hdr_size = hw->vtnet_hdr_size;
@@ -629,8 +653,8 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
PMD_RX_LOG(ERR, "Packet drop");
nb_enqueued++;
- virtio_discard_rxbuf(rxvq, rxm);
- rxvq->errors++;
+ virtio_discard_rxbuf(vq, rxm);
+ rxvq->stats.errors++;
continue;
}
@@ -651,23 +675,23 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
rx_pkts[nb_rx++] = rxm;
- rxvq->bytes += rx_pkts[nb_rx - 1]->pkt_len;
- virtio_update_packet_stats(rxvq, rxm);
+ rxvq->stats.bytes += rx_pkts[nb_rx - 1]->pkt_len;
+ virtio_update_packet_stats(&rxvq->stats, rxm);
}
- rxvq->packets += nb_rx;
+ rxvq->stats.packets += nb_rx;
/* Allocate new mbuf for the used descriptor */
error = ENOSPC;
- while (likely(!virtqueue_full(rxvq))) {
- new_mbuf = rte_rxmbuf_alloc(rxvq->mpool);
+ while (likely(!virtqueue_full(vq))) {
+ new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
if (unlikely(new_mbuf == NULL)) {
struct rte_eth_dev *dev
= &rte_eth_devices[rxvq->port_id];
dev->data->rx_mbuf_alloc_failed++;
break;
}
- error = virtqueue_enqueue_recv_refill(rxvq, new_mbuf);
+ error = virtqueue_enqueue_recv_refill(vq, new_mbuf);
if (unlikely(error)) {
rte_pktmbuf_free(new_mbuf);
break;
@@ -676,11 +700,11 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
}
if (likely(nb_enqueued)) {
- vq_update_avail_idx(rxvq);
+ vq_update_avail_idx(vq);
- if (unlikely(virtqueue_kick_prepare(rxvq))) {
- virtqueue_notify(rxvq);
- PMD_RX_LOG(DEBUG, "Notified\n");
+ if (unlikely(virtqueue_kick_prepare(vq))) {
+ virtqueue_notify(vq);
+ PMD_RX_LOG(DEBUG, "Notified");
}
}
@@ -692,7 +716,8 @@ virtio_recv_mergeable_pkts(void *rx_queue,
struct rte_mbuf **rx_pkts,
uint16_t nb_pkts)
{
- struct virtqueue *rxvq = rx_queue;
+ struct virtnet_rx *rxvq = rx_queue;
+ struct virtqueue *vq = rxvq->vq;
struct virtio_hw *hw;
struct rte_mbuf *rxm, *new_mbuf;
uint16_t nb_used, num, nb_rx;
@@ -706,13 +731,13 @@ virtio_recv_mergeable_pkts(void *rx_queue,
uint32_t seg_res;
uint32_t hdr_size;
- nb_used = VIRTQUEUE_NUSED(rxvq);
+ nb_used = VIRTQUEUE_NUSED(vq);
virtio_rmb();
- PMD_RX_LOG(DEBUG, "used:%d\n", nb_used);
+ PMD_RX_LOG(DEBUG, "used:%d", nb_used);
- hw = rxvq->hw;
+ hw = vq->hw;
nb_rx = 0;
i = 0;
nb_enqueued = 0;
@@ -727,22 +752,22 @@ virtio_recv_mergeable_pkts(void *rx_queue,
if (nb_rx == nb_pkts)
break;
- num = virtqueue_dequeue_burst_rx(rxvq, rcv_pkts, len, 1);
+ num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, 1);
if (num != 1)
continue;
i++;
- PMD_RX_LOG(DEBUG, "dequeue:%d\n", num);
- PMD_RX_LOG(DEBUG, "packet len:%d\n", len[0]);
+ PMD_RX_LOG(DEBUG, "dequeue:%d", num);
+ PMD_RX_LOG(DEBUG, "packet len:%d", len[0]);
rxm = rcv_pkts[0];
if (unlikely(len[0] < hdr_size + ETHER_HDR_LEN)) {
- PMD_RX_LOG(ERR, "Packet drop\n");
+ PMD_RX_LOG(ERR, "Packet drop");
nb_enqueued++;
- virtio_discard_rxbuf(rxvq, rxm);
- rxvq->errors++;
+ virtio_discard_rxbuf(vq, rxm);
+ rxvq->stats.errors++;
continue;
}
@@ -773,18 +798,18 @@ virtio_recv_mergeable_pkts(void *rx_queue,
*/
uint16_t rcv_cnt =
RTE_MIN(seg_res, RTE_DIM(rcv_pkts));
- if (likely(VIRTQUEUE_NUSED(rxvq) >= rcv_cnt)) {
+ if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
uint32_t rx_num =
- virtqueue_dequeue_burst_rx(rxvq,
+ virtqueue_dequeue_burst_rx(vq,
rcv_pkts, len, rcv_cnt);
i += rx_num;
rcv_cnt = rx_num;
} else {
PMD_RX_LOG(ERR,
- "No enough segments for packet.\n");
+ "No enough segments for packet.");
nb_enqueued++;
- virtio_discard_rxbuf(rxvq, rxm);
- rxvq->errors++;
+ virtio_discard_rxbuf(vq, rxm);
+ rxvq->stats.errors++;
break;
}
@@ -814,24 +839,24 @@ virtio_recv_mergeable_pkts(void *rx_queue,
VIRTIO_DUMP_PACKET(rx_pkts[nb_rx],
rx_pkts[nb_rx]->data_len);
- rxvq->bytes += rx_pkts[nb_rx]->pkt_len;
- virtio_update_packet_stats(rxvq, rx_pkts[nb_rx]);
+ rxvq->stats.bytes += rx_pkts[nb_rx]->pkt_len;
+ virtio_update_packet_stats(&rxvq->stats, rx_pkts[nb_rx]);
nb_rx++;
}
- rxvq->packets += nb_rx;
+ rxvq->stats.packets += nb_rx;
/* Allocate new mbuf for the used descriptor */
error = ENOSPC;
- while (likely(!virtqueue_full(rxvq))) {
- new_mbuf = rte_rxmbuf_alloc(rxvq->mpool);
+ while (likely(!virtqueue_full(vq))) {
+ new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
if (unlikely(new_mbuf == NULL)) {
struct rte_eth_dev *dev
= &rte_eth_devices[rxvq->port_id];
dev->data->rx_mbuf_alloc_failed++;
break;
}
- error = virtqueue_enqueue_recv_refill(rxvq, new_mbuf);
+ error = virtqueue_enqueue_recv_refill(vq, new_mbuf);
if (unlikely(error)) {
rte_pktmbuf_free(new_mbuf);
break;
@@ -840,10 +865,10 @@ virtio_recv_mergeable_pkts(void *rx_queue,
}
if (likely(nb_enqueued)) {
- vq_update_avail_idx(rxvq);
+ vq_update_avail_idx(vq);
- if (unlikely(virtqueue_kick_prepare(rxvq))) {
- virtqueue_notify(rxvq);
+ if (unlikely(virtqueue_kick_prepare(vq))) {
+ virtqueue_notify(vq);
PMD_RX_LOG(DEBUG, "Notified");
}
}
@@ -854,8 +879,9 @@ virtio_recv_mergeable_pkts(void *rx_queue,
uint16_t
virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
{
- struct virtqueue *txvq = tx_queue;
- struct virtio_hw *hw = txvq->hw;
+ struct virtnet_tx *txvq = tx_queue;
+ struct virtqueue *vq = txvq->vq;
+ struct virtio_hw *hw = vq->hw;
uint16_t hdr_size = hw->vtnet_hdr_size;
uint16_t nb_used, nb_tx;
int error;
@@ -864,11 +890,11 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
return nb_pkts;
PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
- nb_used = VIRTQUEUE_NUSED(txvq);
+ nb_used = VIRTQUEUE_NUSED(vq);
virtio_rmb();
- if (likely(nb_used > txvq->vq_nentries - txvq->vq_free_thresh))
- virtio_xmit_cleanup(txvq, nb_used);
+ if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
+ virtio_xmit_cleanup(vq, nb_used);
for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
struct rte_mbuf *txm = tx_pkts[nb_tx];
@@ -886,6 +912,7 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
/* optimize ring usage */
if (vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) &&
rte_mbuf_refcnt_read(txm) == 1 &&
+ RTE_MBUF_DIRECT(txm) &&
txm->nb_segs == 1 &&
rte_pktmbuf_headroom(txm) >= hdr_size &&
rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
@@ -901,16 +928,16 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
* default => number of segments + 1
*/
slots = use_indirect ? 1 : (txm->nb_segs + !can_push);
- need = slots - txvq->vq_free_cnt;
+ need = slots - vq->vq_free_cnt;
/* Positive value indicates it need free vring descriptors */
if (unlikely(need > 0)) {
- nb_used = VIRTQUEUE_NUSED(txvq);
+ nb_used = VIRTQUEUE_NUSED(vq);
virtio_rmb();
need = RTE_MIN(need, (int)nb_used);
- virtio_xmit_cleanup(txvq, need);
- need = slots - txvq->vq_free_cnt;
+ virtio_xmit_cleanup(vq, need);
+ need = slots - vq->vq_free_cnt;
if (unlikely(need > 0)) {
PMD_TX_LOG(ERR,
"No free tx descriptors to transmit");
@@ -921,17 +948,17 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
/* Enqueue Packet buffers */
virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect, can_push);
- txvq->bytes += txm->pkt_len;
- virtio_update_packet_stats(txvq, txm);
+ txvq->stats.bytes += txm->pkt_len;
+ virtio_update_packet_stats(&txvq->stats, txm);
}
- txvq->packets += nb_tx;
+ txvq->stats.packets += nb_tx;
if (likely(nb_tx)) {
- vq_update_avail_idx(txvq);
+ vq_update_avail_idx(vq);
- if (unlikely(virtqueue_kick_prepare(txvq))) {
- virtqueue_notify(txvq);
+ if (unlikely(virtqueue_kick_prepare(vq))) {
+ virtqueue_notify(vq);
PMD_TX_LOG(DEBUG, "Notified backend after xmit");
}
}
diff --git a/drivers/net/virtio/virtio_rxtx.h b/drivers/net/virtio/virtio_rxtx.h
index a76c3e52..058b56a1 100644
--- a/drivers/net/virtio/virtio_rxtx.h
+++ b/drivers/net/virtio/virtio_rxtx.h
@@ -31,11 +31,65 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#ifndef _VIRTIO_RXTX_H_
+#define _VIRTIO_RXTX_H_
+
#define RTE_PMD_VIRTIO_RX_MAX_BURST 64
+struct virtnet_stats {
+ uint64_t packets;
+ uint64_t bytes;
+ uint64_t errors;
+ uint64_t multicast;
+ uint64_t broadcast;
+ /* Size bins in array as RFC 2819, undersized [0], 64 [1], etc */
+ uint64_t size_bins[8];
+};
+
+struct virtnet_rx {
+ struct virtqueue *vq;
+ /* dummy mbuf, for wraparound when processing RX ring. */
+ struct rte_mbuf fake_mbuf;
+ uint64_t mbuf_initializer; /**< value to init mbufs. */
+ struct rte_mempool *mpool; /**< mempool for mbuf allocation */
+
+ uint16_t queue_id; /**< DPDK queue index. */
+ uint8_t port_id; /**< Device port identifier. */
+
+ /* Statistics */
+ struct virtnet_stats stats;
+
+ const struct rte_memzone *mz; /**< mem zone to populate RX ring. */
+};
+
+struct virtnet_tx {
+ struct virtqueue *vq;
+ /**< memzone to populate hdr. */
+ const struct rte_memzone *virtio_net_hdr_mz;
+ phys_addr_t virtio_net_hdr_mem; /**< hdr for each xmit packet */
+
+ uint16_t queue_id; /**< DPDK queue index. */
+ uint8_t port_id; /**< Device port identifier. */
+
+ /* Statistics */
+ struct virtnet_stats stats;
+
+ const struct rte_memzone *mz; /**< mem zone to populate TX ring. */
+};
+
+struct virtnet_ctl {
+ struct virtqueue *vq;
+ /**< memzone to populate hdr. */
+ const struct rte_memzone *virtio_net_hdr_mz;
+ phys_addr_t virtio_net_hdr_mem; /**< hdr for each xmit packet */
+ uint8_t port_id; /**< Device port identifier. */
+ const struct rte_memzone *mz; /**< mem zone to populate RX ring. */
+};
+
#ifdef RTE_MACHINE_CPUFLAG_SSSE3
-int virtio_rxq_vec_setup(struct virtqueue *rxq);
+int virtio_rxq_vec_setup(struct virtnet_rx *rxvq);
int virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq,
struct rte_mbuf *m);
#endif
+#endif /* _VIRTIO_RXTX_H_ */
diff --git a/drivers/net/virtio/virtio_rxtx_simple.c b/drivers/net/virtio/virtio_rxtx_simple.c
index 8f5293dd..242ad90d 100644
--- a/drivers/net/virtio/virtio_rxtx_simple.c
+++ b/drivers/net/virtio/virtio_rxtx_simple.c
@@ -80,8 +80,8 @@ virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq,
vq->sw_ring[desc_idx] = cookie;
start_dp = vq->vq_ring.desc;
- start_dp[desc_idx].addr = (uint64_t)((uintptr_t)cookie->buf_physaddr +
- RTE_PKTMBUF_HEADROOM - vq->hw->vtnet_hdr_size);
+ start_dp[desc_idx].addr = MBUF_DATA_DMA_ADDR(cookie, vq->offset) -
+ vq->hw->vtnet_hdr_size;
start_dp[desc_idx].len = cookie->buf_len -
RTE_PKTMBUF_HEADROOM + vq->hw->vtnet_hdr_size;
@@ -92,17 +92,18 @@ virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq,
}
static inline void
-virtio_rxq_rearm_vec(struct virtqueue *rxvq)
+virtio_rxq_rearm_vec(struct virtnet_rx *rxvq)
{
int i;
uint16_t desc_idx;
struct rte_mbuf **sw_ring;
struct vring_desc *start_dp;
int ret;
+ struct virtqueue *vq = rxvq->vq;
- desc_idx = rxvq->vq_avail_idx & (rxvq->vq_nentries - 1);
- sw_ring = &rxvq->sw_ring[desc_idx];
- start_dp = &rxvq->vq_ring.desc[desc_idx];
+ desc_idx = vq->vq_avail_idx & (vq->vq_nentries - 1);
+ sw_ring = &vq->sw_ring[desc_idx];
+ start_dp = &vq->vq_ring.desc[desc_idx];
ret = rte_mempool_get_bulk(rxvq->mpool, (void **)sw_ring,
RTE_VIRTIO_VPMD_RX_REARM_THRESH);
@@ -119,15 +120,15 @@ virtio_rxq_rearm_vec(struct virtqueue *rxvq)
*(uint64_t *)p = rxvq->mbuf_initializer;
start_dp[i].addr =
- (uint64_t)((uintptr_t)sw_ring[i]->buf_physaddr +
- RTE_PKTMBUF_HEADROOM - rxvq->hw->vtnet_hdr_size);
+ MBUF_DATA_DMA_ADDR(sw_ring[i], vq->offset) -
+ vq->hw->vtnet_hdr_size;
start_dp[i].len = sw_ring[i]->buf_len -
- RTE_PKTMBUF_HEADROOM + rxvq->hw->vtnet_hdr_size;
+ RTE_PKTMBUF_HEADROOM + vq->hw->vtnet_hdr_size;
}
- rxvq->vq_avail_idx += RTE_VIRTIO_VPMD_RX_REARM_THRESH;
- rxvq->vq_free_cnt -= RTE_VIRTIO_VPMD_RX_REARM_THRESH;
- vq_update_avail_idx(rxvq);
+ vq->vq_avail_idx += RTE_VIRTIO_VPMD_RX_REARM_THRESH;
+ vq->vq_free_cnt -= RTE_VIRTIO_VPMD_RX_REARM_THRESH;
+ vq_update_avail_idx(vq);
}
/* virtio vPMD receive routine, only accept(nb_pkts >= RTE_VIRTIO_DESC_PER_LOOP)
@@ -143,7 +144,8 @@ uint16_t
virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
uint16_t nb_pkts)
{
- struct virtqueue *rxvq = rx_queue;
+ struct virtnet_rx *rxvq = rx_queue;
+ struct virtqueue *vq = rxvq->vq;
uint16_t nb_used;
uint16_t desc_idx;
struct vring_used_elem *rused;
@@ -175,15 +177,14 @@ virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
len_adjust = _mm_set_epi16(
0, 0,
0,
- (uint16_t)-rxvq->hw->vtnet_hdr_size,
- 0, (uint16_t)-rxvq->hw->vtnet_hdr_size,
+ (uint16_t)-vq->hw->vtnet_hdr_size,
+ 0, (uint16_t)-vq->hw->vtnet_hdr_size,
0, 0);
if (unlikely(nb_pkts < RTE_VIRTIO_DESC_PER_LOOP))
return 0;
- nb_used = *(volatile uint16_t *)&rxvq->vq_ring.used->idx -
- rxvq->vq_used_cons_idx;
+ nb_used = VIRTQUEUE_NUSED(vq);
rte_compiler_barrier();
@@ -193,17 +194,17 @@ virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_VIRTIO_DESC_PER_LOOP);
nb_used = RTE_MIN(nb_used, nb_pkts);
- desc_idx = (uint16_t)(rxvq->vq_used_cons_idx & (rxvq->vq_nentries - 1));
- rused = &rxvq->vq_ring.used->ring[desc_idx];
- sw_ring = &rxvq->sw_ring[desc_idx];
- sw_ring_end = &rxvq->sw_ring[rxvq->vq_nentries];
+ desc_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
+ rused = &vq->vq_ring.used->ring[desc_idx];
+ sw_ring = &vq->sw_ring[desc_idx];
+ sw_ring_end = &vq->sw_ring[vq->vq_nentries];
_mm_prefetch((const void *)rused, _MM_HINT_T0);
- if (rxvq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
+ if (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
virtio_rxq_rearm_vec(rxvq);
- if (unlikely(virtqueue_kick_prepare(rxvq)))
- virtqueue_notify(rxvq);
+ if (unlikely(virtqueue_kick_prepare(vq)))
+ virtqueue_notify(vq);
}
for (nb_pkts_received = 0;
@@ -286,9 +287,9 @@ virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
}
}
- rxvq->vq_used_cons_idx += nb_pkts_received;
- rxvq->vq_free_cnt += nb_pkts_received;
- rxvq->packets += nb_pkts_received;
+ vq->vq_used_cons_idx += nb_pkts_received;
+ vq->vq_free_cnt += nb_pkts_received;
+ rxvq->stats.packets += nb_pkts_received;
return nb_pkts_received;
}
@@ -342,31 +343,32 @@ uint16_t
virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts)
{
- struct virtqueue *txvq = tx_queue;
+ struct virtnet_tx *txvq = tx_queue;
+ struct virtqueue *vq = txvq->vq;
uint16_t nb_used;
uint16_t desc_idx;
struct vring_desc *start_dp;
uint16_t nb_tail, nb_commit;
int i;
- uint16_t desc_idx_max = (txvq->vq_nentries >> 1) - 1;
+ uint16_t desc_idx_max = (vq->vq_nentries >> 1) - 1;
- nb_used = VIRTQUEUE_NUSED(txvq);
+ nb_used = VIRTQUEUE_NUSED(vq);
rte_compiler_barrier();
if (nb_used >= VIRTIO_TX_FREE_THRESH)
- virtio_xmit_cleanup(tx_queue);
+ virtio_xmit_cleanup(vq);
- nb_commit = nb_pkts = RTE_MIN((txvq->vq_free_cnt >> 1), nb_pkts);
- desc_idx = (uint16_t) (txvq->vq_avail_idx & desc_idx_max);
- start_dp = txvq->vq_ring.desc;
+ nb_commit = nb_pkts = RTE_MIN((vq->vq_free_cnt >> 1), nb_pkts);
+ desc_idx = (uint16_t)(vq->vq_avail_idx & desc_idx_max);
+ start_dp = vq->vq_ring.desc;
nb_tail = (uint16_t) (desc_idx_max + 1 - desc_idx);
if (nb_commit >= nb_tail) {
for (i = 0; i < nb_tail; i++)
- txvq->vq_descx[desc_idx + i].cookie = tx_pkts[i];
+ vq->vq_descx[desc_idx + i].cookie = tx_pkts[i];
for (i = 0; i < nb_tail; i++) {
start_dp[desc_idx].addr =
- rte_mbuf_data_dma_addr(*tx_pkts);
+ MBUF_DATA_DMA_ADDR(*tx_pkts, vq->offset);
start_dp[desc_idx].len = (*tx_pkts)->pkt_len;
tx_pkts++;
desc_idx++;
@@ -375,9 +377,10 @@ virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
desc_idx = 0;
}
for (i = 0; i < nb_commit; i++)
- txvq->vq_descx[desc_idx + i].cookie = tx_pkts[i];
+ vq->vq_descx[desc_idx + i].cookie = tx_pkts[i];
for (i = 0; i < nb_commit; i++) {
- start_dp[desc_idx].addr = rte_mbuf_data_dma_addr(*tx_pkts);
+ start_dp[desc_idx].addr =
+ MBUF_DATA_DMA_ADDR(*tx_pkts, vq->offset);
start_dp[desc_idx].len = (*tx_pkts)->pkt_len;
tx_pkts++;
desc_idx++;
@@ -385,21 +388,21 @@ virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
rte_compiler_barrier();
- txvq->vq_free_cnt -= (uint16_t)(nb_pkts << 1);
- txvq->vq_avail_idx += nb_pkts;
- txvq->vq_ring.avail->idx = txvq->vq_avail_idx;
- txvq->packets += nb_pkts;
+ vq->vq_free_cnt -= (uint16_t)(nb_pkts << 1);
+ vq->vq_avail_idx += nb_pkts;
+ vq->vq_ring.avail->idx = vq->vq_avail_idx;
+ txvq->stats.packets += nb_pkts;
if (likely(nb_pkts)) {
- if (unlikely(virtqueue_kick_prepare(txvq)))
- virtqueue_notify(txvq);
+ if (unlikely(virtqueue_kick_prepare(vq)))
+ virtqueue_notify(vq);
}
return nb_pkts;
}
int __attribute__((cold))
-virtio_rxq_vec_setup(struct virtqueue *rxq)
+virtio_rxq_vec_setup(struct virtnet_rx *rxq)
{
uintptr_t p;
struct rte_mbuf mb_def = { .buf_addr = 0 }; /* zeroed mbuf */
diff --git a/drivers/net/virtio/virtio_user/vhost.h b/drivers/net/virtio/virtio_user/vhost.h
new file mode 100644
index 00000000..7adb55f5
--- /dev/null
+++ b/drivers/net/virtio/virtio_user/vhost.h
@@ -0,0 +1,146 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VHOST_NET_USER_H
+#define _VHOST_NET_USER_H
+
+#include <stdint.h>
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#include "../virtio_pci.h"
+#include "../virtio_logs.h"
+#include "../virtqueue.h"
+
+#define VHOST_MEMORY_MAX_NREGIONS 8
+
+struct vhost_vring_state {
+ unsigned int index;
+ unsigned int num;
+};
+
+struct vhost_vring_file {
+ unsigned int index;
+ int fd;
+};
+
+struct vhost_vring_addr {
+ unsigned int index;
+ /* Option flags. */
+ unsigned int flags;
+ /* Flag values: */
+ /* Whether log address is valid. If set enables logging. */
+#define VHOST_VRING_F_LOG 0
+
+ /* Start of array of descriptors (virtually contiguous) */
+ uint64_t desc_user_addr;
+ /* Used structure address. Must be 32 bit aligned */
+ uint64_t used_user_addr;
+ /* Available structure address. Must be 16 bit aligned */
+ uint64_t avail_user_addr;
+ /* Logging support. */
+ /* Log writes to used structure, at offset calculated from specified
+ * address. Address must be 32 bit aligned.
+ */
+ uint64_t log_guest_addr;
+};
+
+enum vhost_user_request {
+ VHOST_USER_NONE = 0,
+ VHOST_USER_GET_FEATURES = 1,
+ VHOST_USER_SET_FEATURES = 2,
+ VHOST_USER_SET_OWNER = 3,
+ VHOST_USER_RESET_OWNER = 4,
+ VHOST_USER_SET_MEM_TABLE = 5,
+ VHOST_USER_SET_LOG_BASE = 6,
+ VHOST_USER_SET_LOG_FD = 7,
+ VHOST_USER_SET_VRING_NUM = 8,
+ VHOST_USER_SET_VRING_ADDR = 9,
+ VHOST_USER_SET_VRING_BASE = 10,
+ VHOST_USER_GET_VRING_BASE = 11,
+ VHOST_USER_SET_VRING_KICK = 12,
+ VHOST_USER_SET_VRING_CALL = 13,
+ VHOST_USER_SET_VRING_ERR = 14,
+ VHOST_USER_GET_PROTOCOL_FEATURES = 15,
+ VHOST_USER_SET_PROTOCOL_FEATURES = 16,
+ VHOST_USER_GET_QUEUE_NUM = 17,
+ VHOST_USER_SET_VRING_ENABLE = 18,
+ VHOST_USER_MAX
+};
+
+struct vhost_memory_region {
+ uint64_t guest_phys_addr;
+ uint64_t memory_size; /* bytes */
+ uint64_t userspace_addr;
+ uint64_t mmap_offset;
+};
+
+struct vhost_memory {
+ uint32_t nregions;
+ uint32_t padding;
+ struct vhost_memory_region regions[VHOST_MEMORY_MAX_NREGIONS];
+};
+
+struct vhost_user_msg {
+ enum vhost_user_request request;
+
+#define VHOST_USER_VERSION_MASK 0x3
+#define VHOST_USER_REPLY_MASK (0x1 << 2)
+ uint32_t flags;
+ uint32_t size; /* the following payload size */
+ union {
+#define VHOST_USER_VRING_IDX_MASK 0xff
+#define VHOST_USER_VRING_NOFD_MASK (0x1 << 8)
+ uint64_t u64;
+ struct vhost_vring_state state;
+ struct vhost_vring_addr addr;
+ struct vhost_memory memory;
+ } payload;
+ int fds[VHOST_MEMORY_MAX_NREGIONS];
+} __attribute((packed));
+
+#define VHOST_USER_HDR_SIZE offsetof(struct vhost_user_msg, payload.u64)
+#define VHOST_USER_PAYLOAD_SIZE \
+ (sizeof(struct vhost_user_msg) - VHOST_USER_HDR_SIZE)
+
+/* The version of the protocol we support */
+#define VHOST_USER_VERSION 0x1
+
+#define VHOST_USER_F_PROTOCOL_FEATURES 30
+#define VHOST_USER_MQ (1ULL << VHOST_USER_F_PROTOCOL_FEATURES)
+
+int vhost_user_sock(int vhostfd, enum vhost_user_request req, void *arg);
+int vhost_user_setup(const char *path);
+int vhost_user_enable_queue_pair(int vhostfd, uint16_t pair_idx, int enable);
+
+#endif
diff --git a/drivers/net/virtio/virtio_user/vhost_user.c b/drivers/net/virtio/virtio_user/vhost_user.c
new file mode 100644
index 00000000..a2b0687f
--- /dev/null
+++ b/drivers/net/virtio/virtio_user/vhost_user.c
@@ -0,0 +1,426 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/un.h>
+#include <string.h>
+#include <errno.h>
+
+#include "vhost.h"
+
+static int
+vhost_user_write(int fd, void *buf, int len, int *fds, int fd_num)
+{
+ int r;
+ struct msghdr msgh;
+ struct iovec iov;
+ size_t fd_size = fd_num * sizeof(int);
+ char control[CMSG_SPACE(fd_size)];
+ struct cmsghdr *cmsg;
+
+ memset(&msgh, 0, sizeof(msgh));
+ memset(control, 0, sizeof(control));
+
+ iov.iov_base = (uint8_t *)buf;
+ iov.iov_len = len;
+
+ msgh.msg_iov = &iov;
+ msgh.msg_iovlen = 1;
+ msgh.msg_control = control;
+ msgh.msg_controllen = sizeof(control);
+
+ cmsg = CMSG_FIRSTHDR(&msgh);
+ cmsg->cmsg_len = CMSG_LEN(fd_size);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ memcpy(CMSG_DATA(cmsg), fds, fd_size);
+
+ do {
+ r = sendmsg(fd, &msgh, 0);
+ } while (r < 0 && errno == EINTR);
+
+ return r;
+}
+
+static int
+vhost_user_read(int fd, struct vhost_user_msg *msg)
+{
+ uint32_t valid_flags = VHOST_USER_REPLY_MASK | VHOST_USER_VERSION;
+ int ret, sz_hdr = VHOST_USER_HDR_SIZE, sz_payload;
+
+ ret = recv(fd, (void *)msg, sz_hdr, 0);
+ if (ret < sz_hdr) {
+ PMD_DRV_LOG(ERR, "Failed to recv msg hdr: %d instead of %d.",
+ ret, sz_hdr);
+ goto fail;
+ }
+
+ /* validate msg flags */
+ if (msg->flags != (valid_flags)) {
+ PMD_DRV_LOG(ERR, "Failed to recv msg: flags %x instead of %x.",
+ msg->flags, valid_flags);
+ goto fail;
+ }
+
+ sz_payload = msg->size;
+ if (sz_payload) {
+ ret = recv(fd, (void *)((char *)msg + sz_hdr), sz_payload, 0);
+ if (ret < sz_payload) {
+ PMD_DRV_LOG(ERR,
+ "Failed to recv msg payload: %d instead of %d.",
+ ret, msg->size);
+ goto fail;
+ }
+ }
+
+ return 0;
+
+fail:
+ return -1;
+}
+
+struct hugepage_file_info {
+ uint64_t addr; /**< virtual addr */
+ size_t size; /**< the file size */
+ char path[PATH_MAX]; /**< path to backing file */
+};
+
+/* Two possible options:
+ * 1. Match HUGEPAGE_INFO_FMT to find the file storing struct hugepage_file
+ * array. This is simple but cannot be used in secondary process because
+ * secondary process will close and munmap that file.
+ * 2. Match HUGEFILE_FMT to find hugepage files directly.
+ *
+ * We choose option 2.
+ */
+static int
+get_hugepage_file_info(struct hugepage_file_info huges[], int max)
+{
+ int idx;
+ FILE *f;
+ char buf[BUFSIZ], *tmp, *tail;
+ char *str_underline, *str_start;
+ int huge_index;
+ uint64_t v_start, v_end;
+
+ f = fopen("/proc/self/maps", "r");
+ if (!f) {
+ PMD_DRV_LOG(ERR, "cannot open /proc/self/maps");
+ return -1;
+ }
+
+ idx = 0;
+ while (fgets(buf, sizeof(buf), f) != NULL) {
+ if (sscanf(buf, "%" PRIx64 "-%" PRIx64, &v_start, &v_end) < 2) {
+ PMD_DRV_LOG(ERR, "Failed to parse address");
+ goto error;
+ }
+
+ tmp = strchr(buf, ' ') + 1; /** skip address */
+ tmp = strchr(tmp, ' ') + 1; /** skip perm */
+ tmp = strchr(tmp, ' ') + 1; /** skip offset */
+ tmp = strchr(tmp, ' ') + 1; /** skip dev */
+ tmp = strchr(tmp, ' ') + 1; /** skip inode */
+ while (*tmp == ' ') /** skip spaces */
+ tmp++;
+ tail = strrchr(tmp, '\n'); /** remove newline if exists */
+ if (tail)
+ *tail = '\0';
+
+ /* Match HUGEFILE_FMT, aka "%s/%smap_%d",
+ * which is defined in eal_filesystem.h
+ */
+ str_underline = strrchr(tmp, '_');
+ if (!str_underline)
+ continue;
+
+ str_start = str_underline - strlen("map");
+ if (str_start < tmp)
+ continue;
+
+ if (sscanf(str_start, "map_%d", &huge_index) != 1)
+ continue;
+
+ if (idx >= max) {
+ PMD_DRV_LOG(ERR, "Exceed maximum of %d", max);
+ goto error;
+ }
+ huges[idx].addr = v_start;
+ huges[idx].size = v_end - v_start;
+ strcpy(huges[idx].path, tmp);
+ idx++;
+ }
+
+ fclose(f);
+ return idx;
+
+error:
+ fclose(f);
+ return -1;
+}
+
+static int
+prepare_vhost_memory_user(struct vhost_user_msg *msg, int fds[])
+{
+ int i, num;
+ struct hugepage_file_info huges[VHOST_MEMORY_MAX_NREGIONS];
+ struct vhost_memory_region *mr;
+
+ num = get_hugepage_file_info(huges, VHOST_MEMORY_MAX_NREGIONS);
+ if (num < 0) {
+ PMD_INIT_LOG(ERR, "Failed to prepare memory for vhost-user");
+ return -1;
+ }
+
+ for (i = 0; i < num; ++i) {
+ mr = &msg->payload.memory.regions[i];
+ mr->guest_phys_addr = huges[i].addr; /* use vaddr! */
+ mr->userspace_addr = huges[i].addr;
+ mr->memory_size = huges[i].size;
+ mr->mmap_offset = 0;
+ fds[i] = open(huges[i].path, O_RDWR);
+ }
+
+ msg->payload.memory.nregions = num;
+ msg->payload.memory.padding = 0;
+
+ return 0;
+}
+
+static struct vhost_user_msg m;
+
+static const char * const vhost_msg_strings[] = {
+ [VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER",
+ [VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER",
+ [VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES",
+ [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES",
+ [VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL",
+ [VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM",
+ [VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE",
+ [VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE",
+ [VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR",
+ [VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK",
+ [VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE",
+ [VHOST_USER_SET_VRING_ENABLE] = "VHOST_USER_SET_VRING_ENABLE",
+ NULL,
+};
+
+int
+vhost_user_sock(int vhostfd, enum vhost_user_request req, void *arg)
+{
+ struct vhost_user_msg msg;
+ struct vhost_vring_file *file = 0;
+ int need_reply = 0;
+ int fds[VHOST_MEMORY_MAX_NREGIONS];
+ int fd_num = 0;
+ int i, len;
+
+ RTE_SET_USED(m);
+ RTE_SET_USED(vhost_msg_strings);
+
+ PMD_DRV_LOG(INFO, "%s", vhost_msg_strings[req]);
+
+ msg.request = req;
+ msg.flags = VHOST_USER_VERSION;
+ msg.size = 0;
+
+ switch (req) {
+ case VHOST_USER_GET_FEATURES:
+ need_reply = 1;
+ break;
+
+ case VHOST_USER_SET_FEATURES:
+ case VHOST_USER_SET_LOG_BASE:
+ msg.payload.u64 = *((__u64 *)arg);
+ msg.size = sizeof(m.payload.u64);
+ break;
+
+ case VHOST_USER_SET_OWNER:
+ case VHOST_USER_RESET_OWNER:
+ break;
+
+ case VHOST_USER_SET_MEM_TABLE:
+ if (prepare_vhost_memory_user(&msg, fds) < 0)
+ return -1;
+ fd_num = msg.payload.memory.nregions;
+ msg.size = sizeof(m.payload.memory.nregions);
+ msg.size += sizeof(m.payload.memory.padding);
+ msg.size += fd_num * sizeof(struct vhost_memory_region);
+ break;
+
+ case VHOST_USER_SET_LOG_FD:
+ fds[fd_num++] = *((int *)arg);
+ break;
+
+ case VHOST_USER_SET_VRING_NUM:
+ case VHOST_USER_SET_VRING_BASE:
+ case VHOST_USER_SET_VRING_ENABLE:
+ memcpy(&msg.payload.state, arg, sizeof(msg.payload.state));
+ msg.size = sizeof(m.payload.state);
+ break;
+
+ case VHOST_USER_GET_VRING_BASE:
+ memcpy(&msg.payload.state, arg, sizeof(msg.payload.state));
+ msg.size = sizeof(m.payload.state);
+ need_reply = 1;
+ break;
+
+ case VHOST_USER_SET_VRING_ADDR:
+ memcpy(&msg.payload.addr, arg, sizeof(msg.payload.addr));
+ msg.size = sizeof(m.payload.addr);
+ break;
+
+ case VHOST_USER_SET_VRING_KICK:
+ case VHOST_USER_SET_VRING_CALL:
+ case VHOST_USER_SET_VRING_ERR:
+ file = arg;
+ msg.payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK;
+ msg.size = sizeof(m.payload.u64);
+ if (file->fd > 0)
+ fds[fd_num++] = file->fd;
+ else
+ msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK;
+ break;
+
+ default:
+ PMD_DRV_LOG(ERR, "trying to send unhandled msg type");
+ return -1;
+ }
+
+ len = VHOST_USER_HDR_SIZE + msg.size;
+ if (vhost_user_write(vhostfd, &msg, len, fds, fd_num) < 0) {
+ PMD_DRV_LOG(ERR, "%s failed: %s",
+ vhost_msg_strings[req], strerror(errno));
+ return -1;
+ }
+
+ if (req == VHOST_USER_SET_MEM_TABLE)
+ for (i = 0; i < fd_num; ++i)
+ close(fds[i]);
+
+ if (need_reply) {
+ if (vhost_user_read(vhostfd, &msg) < 0) {
+ PMD_DRV_LOG(ERR, "Received msg failed: %s",
+ strerror(errno));
+ return -1;
+ }
+
+ if (req != msg.request) {
+ PMD_DRV_LOG(ERR, "Received unexpected msg type");
+ return -1;
+ }
+
+ switch (req) {
+ case VHOST_USER_GET_FEATURES:
+ if (msg.size != sizeof(m.payload.u64)) {
+ PMD_DRV_LOG(ERR, "Received bad msg size");
+ return -1;
+ }
+ *((__u64 *)arg) = msg.payload.u64;
+ break;
+ case VHOST_USER_GET_VRING_BASE:
+ if (msg.size != sizeof(m.payload.state)) {
+ PMD_DRV_LOG(ERR, "Received bad msg size");
+ return -1;
+ }
+ memcpy(arg, &msg.payload.state,
+ sizeof(struct vhost_vring_state));
+ break;
+ default:
+ PMD_DRV_LOG(ERR, "Received unexpected msg type");
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * Set up environment to talk with a vhost user backend.
+ * @param path
+ * - The path to vhost user unix socket file.
+ *
+ * @return
+ * - (-1) if fail to set up;
+ * - (>=0) if successful, and it is the fd to vhostfd.
+ */
+int
+vhost_user_setup(const char *path)
+{
+ int fd;
+ int flag;
+ struct sockaddr_un un;
+
+ fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (fd < 0) {
+ PMD_DRV_LOG(ERR, "socket() error, %s", strerror(errno));
+ return -1;
+ }
+
+ flag = fcntl(fd, F_GETFD);
+ fcntl(fd, F_SETFD, flag | FD_CLOEXEC);
+
+ memset(&un, 0, sizeof(un));
+ un.sun_family = AF_UNIX;
+ snprintf(un.sun_path, sizeof(un.sun_path), "%s", path);
+ if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
+ PMD_DRV_LOG(ERR, "connect error, %s", strerror(errno));
+ close(fd);
+ return -1;
+ }
+
+ return fd;
+}
+
+int
+vhost_user_enable_queue_pair(int vhostfd, uint16_t pair_idx, int enable)
+{
+ int i;
+
+ for (i = 0; i < 2; ++i) {
+ struct vhost_vring_state state = {
+ .index = pair_idx * 2 + i,
+ .num = enable,
+ };
+
+ if (vhost_user_sock(vhostfd,
+ VHOST_USER_SET_VRING_ENABLE, &state))
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c b/drivers/net/virtio/virtio_user/virtio_user_dev.c
new file mode 100644
index 00000000..3d12a320
--- /dev/null
+++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c
@@ -0,0 +1,333 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <sys/eventfd.h>
+
+#include "vhost.h"
+#include "virtio_user_dev.h"
+#include "../virtio_ethdev.h"
+
+static int
+virtio_user_kick_queue(struct virtio_user_dev *dev, uint32_t queue_sel)
+{
+ int callfd, kickfd;
+ struct vhost_vring_file file;
+ struct vhost_vring_state state;
+ struct vring *vring = &dev->vrings[queue_sel];
+ struct vhost_vring_addr addr = {
+ .index = queue_sel,
+ .desc_user_addr = (uint64_t)(uintptr_t)vring->desc,
+ .avail_user_addr = (uint64_t)(uintptr_t)vring->avail,
+ .used_user_addr = (uint64_t)(uintptr_t)vring->used,
+ .log_guest_addr = 0,
+ .flags = 0, /* disable log */
+ };
+
+ /* May use invalid flag, but some backend leverages kickfd and callfd as
+ * criteria to judge if dev is alive. so finally we use real event_fd.
+ */
+ callfd = eventfd(0, O_CLOEXEC | O_NONBLOCK);
+ if (callfd < 0) {
+ PMD_DRV_LOG(ERR, "callfd error, %s\n", strerror(errno));
+ return -1;
+ }
+ kickfd = eventfd(0, O_CLOEXEC | O_NONBLOCK);
+ if (kickfd < 0) {
+ close(callfd);
+ PMD_DRV_LOG(ERR, "kickfd error, %s\n", strerror(errno));
+ return -1;
+ }
+
+ /* Of all per virtqueue MSGs, make sure VHOST_SET_VRING_CALL come
+ * firstly because vhost depends on this msg to allocate virtqueue
+ * pair.
+ */
+ file.index = queue_sel;
+ file.fd = callfd;
+ vhost_user_sock(dev->vhostfd, VHOST_USER_SET_VRING_CALL, &file);
+ dev->callfds[queue_sel] = callfd;
+
+ state.index = queue_sel;
+ state.num = vring->num;
+ vhost_user_sock(dev->vhostfd, VHOST_USER_SET_VRING_NUM, &state);
+
+ state.num = 0; /* no reservation */
+ vhost_user_sock(dev->vhostfd, VHOST_USER_SET_VRING_BASE, &state);
+
+ vhost_user_sock(dev->vhostfd, VHOST_USER_SET_VRING_ADDR, &addr);
+
+ /* Of all per virtqueue MSGs, make sure VHOST_USER_SET_VRING_KICK comes
+ * lastly because vhost depends on this msg to judge if
+ * virtio is ready.
+ */
+ file.fd = kickfd;
+ vhost_user_sock(dev->vhostfd, VHOST_USER_SET_VRING_KICK, &file);
+ dev->kickfds[queue_sel] = kickfd;
+
+ return 0;
+}
+
+int
+virtio_user_start_device(struct virtio_user_dev *dev)
+{
+ uint64_t features;
+ uint32_t i, queue_sel;
+ int ret;
+
+ /* construct memory region inside each implementation */
+ ret = vhost_user_sock(dev->vhostfd, VHOST_USER_SET_MEM_TABLE, NULL);
+ if (ret < 0)
+ goto error;
+
+ for (i = 0; i < dev->max_queue_pairs; ++i) {
+ queue_sel = 2 * i + VTNET_SQ_RQ_QUEUE_IDX;
+ if (virtio_user_kick_queue(dev, queue_sel) < 0) {
+ PMD_DRV_LOG(INFO, "kick rx vq fails: %u", i);
+ goto error;
+ }
+ }
+ for (i = 0; i < dev->max_queue_pairs; ++i) {
+ queue_sel = 2 * i + VTNET_SQ_TQ_QUEUE_IDX;
+ if (virtio_user_kick_queue(dev, queue_sel) < 0) {
+ PMD_DRV_LOG(INFO, "kick tx vq fails: %u", i);
+ goto error;
+ }
+ }
+
+ /* After setup all virtqueues, we need to set_features so that these
+ * features can be set into each virtqueue in vhost side. And before
+ * that, make sure VHOST_USER_F_PROTOCOL_FEATURES is added if mq is
+ * enabled, and VIRTIO_NET_F_MAC is stripped.
+ */
+ features = dev->features;
+ if (dev->max_queue_pairs > 1)
+ features |= VHOST_USER_MQ;
+ features &= ~(1ull << VIRTIO_NET_F_MAC);
+ ret = vhost_user_sock(dev->vhostfd, VHOST_USER_SET_FEATURES, &features);
+ if (ret < 0)
+ goto error;
+ PMD_DRV_LOG(INFO, "set features: %" PRIx64, features);
+
+ return 0;
+error:
+ /* TODO: free resource here or caller to check */
+ return -1;
+}
+
+int virtio_user_stop_device(struct virtio_user_dev *dev)
+{
+ return vhost_user_sock(dev->vhostfd, VHOST_USER_RESET_OWNER, NULL);
+}
+
+static inline void
+parse_mac(struct virtio_user_dev *dev, const char *mac)
+{
+ int i, r;
+ uint32_t tmp[ETHER_ADDR_LEN];
+
+ if (!mac)
+ return;
+
+ r = sscanf(mac, "%x:%x:%x:%x:%x:%x", &tmp[0],
+ &tmp[1], &tmp[2], &tmp[3], &tmp[4], &tmp[5]);
+ if (r == ETHER_ADDR_LEN) {
+ for (i = 0; i < ETHER_ADDR_LEN; ++i)
+ dev->mac_addr[i] = (uint8_t)tmp[i];
+ dev->mac_specified = 1;
+ } else {
+ /* ignore the wrong mac, use random mac */
+ PMD_DRV_LOG(ERR, "wrong format of mac: %s", mac);
+ }
+}
+
+int
+virtio_user_dev_init(struct virtio_user_dev *dev, char *path, int queues,
+ int cq, int queue_size, const char *mac)
+{
+ strncpy(dev->path, path, PATH_MAX);
+ dev->max_queue_pairs = queues;
+ dev->queue_pairs = 1; /* mq disabled by default */
+ dev->queue_size = queue_size;
+ dev->mac_specified = 0;
+ parse_mac(dev, mac);
+ dev->vhostfd = -1;
+
+ dev->vhostfd = vhost_user_setup(dev->path);
+ if (dev->vhostfd < 0) {
+ PMD_INIT_LOG(ERR, "backend set up fails");
+ return -1;
+ }
+ if (vhost_user_sock(dev->vhostfd, VHOST_USER_SET_OWNER, NULL) < 0) {
+ PMD_INIT_LOG(ERR, "set_owner fails: %s", strerror(errno));
+ return -1;
+ }
+
+ if (vhost_user_sock(dev->vhostfd, VHOST_USER_GET_FEATURES,
+ &dev->features) < 0) {
+ PMD_INIT_LOG(ERR, "get_features failed: %s", strerror(errno));
+ return -1;
+ }
+ if (dev->mac_specified)
+ dev->features |= (1ull << VIRTIO_NET_F_MAC);
+
+ if (!cq) {
+ dev->features &= ~(1ull << VIRTIO_NET_F_CTRL_VQ);
+ /* Also disable features depends on VIRTIO_NET_F_CTRL_VQ */
+ dev->features &= ~(1ull << VIRTIO_NET_F_CTRL_RX);
+ dev->features &= ~(1ull << VIRTIO_NET_F_CTRL_VLAN);
+ dev->features &= ~(1ull << VIRTIO_NET_F_GUEST_ANNOUNCE);
+ dev->features &= ~(1ull << VIRTIO_NET_F_MQ);
+ dev->features &= ~(1ull << VIRTIO_NET_F_CTRL_MAC_ADDR);
+ } else {
+ /* vhost user backend does not need to know ctrl-q, so
+ * actually we need add this bit into features. However,
+ * DPDK vhost-user does send features with this bit, so we
+ * check it instead of OR it for now.
+ */
+ if (!(dev->features & (1ull << VIRTIO_NET_F_CTRL_VQ)))
+ PMD_INIT_LOG(INFO, "vhost does not support ctrl-q");
+ }
+
+ if (dev->max_queue_pairs > 1) {
+ if (!(dev->features & VHOST_USER_MQ)) {
+ PMD_INIT_LOG(ERR, "MQ not supported by the backend");
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+void
+virtio_user_dev_uninit(struct virtio_user_dev *dev)
+{
+ uint32_t i;
+
+ for (i = 0; i < dev->max_queue_pairs * 2; ++i) {
+ close(dev->callfds[i]);
+ close(dev->kickfds[i]);
+ }
+
+ close(dev->vhostfd);
+}
+
+static uint8_t
+virtio_user_handle_mq(struct virtio_user_dev *dev, uint16_t q_pairs)
+{
+ uint16_t i;
+ uint8_t ret = 0;
+
+ if (q_pairs > dev->max_queue_pairs) {
+ PMD_INIT_LOG(ERR, "multi-q config %u, but only %u supported",
+ q_pairs, dev->max_queue_pairs);
+ return -1;
+ }
+
+ for (i = 0; i < q_pairs; ++i)
+ ret |= vhost_user_enable_queue_pair(dev->vhostfd, i, 1);
+ for (i = q_pairs; i < dev->max_queue_pairs; ++i)
+ ret |= vhost_user_enable_queue_pair(dev->vhostfd, i, 0);
+
+ dev->queue_pairs = q_pairs;
+
+ return ret;
+}
+
+static uint32_t
+virtio_user_handle_ctrl_msg(struct virtio_user_dev *dev, struct vring *vring,
+ uint16_t idx_hdr)
+{
+ struct virtio_net_ctrl_hdr *hdr;
+ virtio_net_ctrl_ack status = ~0;
+ uint16_t i, idx_data, idx_status;
+ uint32_t n_descs = 0;
+
+ /* locate desc for header, data, and status */
+ idx_data = vring->desc[idx_hdr].next;
+ n_descs++;
+
+ i = idx_data;
+ while (vring->desc[i].flags == VRING_DESC_F_NEXT) {
+ i = vring->desc[i].next;
+ n_descs++;
+ }
+
+ /* locate desc for status */
+ idx_status = i;
+ n_descs++;
+
+ hdr = (void *)(uintptr_t)vring->desc[idx_hdr].addr;
+ if (hdr->class == VIRTIO_NET_CTRL_MQ &&
+ hdr->cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
+ uint16_t queues;
+
+ queues = *(uint16_t *)(uintptr_t)vring->desc[idx_data].addr;
+ status = virtio_user_handle_mq(dev, queues);
+ }
+
+ /* Update status */
+ *(virtio_net_ctrl_ack *)(uintptr_t)vring->desc[idx_status].addr = status;
+
+ return n_descs;
+}
+
+void
+virtio_user_handle_cq(struct virtio_user_dev *dev, uint16_t queue_idx)
+{
+ uint16_t avail_idx, desc_idx;
+ struct vring_used_elem *uep;
+ uint32_t n_descs;
+ struct vring *vring = &dev->vrings[queue_idx];
+
+ /* Consume avail ring, using used ring idx as first one */
+ while (vring->used->idx != vring->avail->idx) {
+ avail_idx = (vring->used->idx) & (vring->num - 1);
+ desc_idx = vring->avail->ring[avail_idx];
+
+ n_descs = virtio_user_handle_ctrl_msg(dev, vring, desc_idx);
+
+ /* Update used ring */
+ uep = &vring->used->ring[avail_idx];
+ uep->id = avail_idx;
+ uep->len = n_descs;
+
+ vring->used->idx++;
+ }
+}
diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.h b/drivers/net/virtio/virtio_user/virtio_user_dev.h
new file mode 100644
index 00000000..33690b5c
--- /dev/null
+++ b/drivers/net/virtio/virtio_user/virtio_user_dev.h
@@ -0,0 +1,62 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VIRTIO_USER_DEV_H
+#define _VIRTIO_USER_DEV_H
+
+#include <limits.h>
+#include "../virtio_pci.h"
+#include "../virtio_ring.h"
+
+struct virtio_user_dev {
+ int vhostfd;
+ int callfds[VIRTIO_MAX_VIRTQUEUES * 2 + 1];
+ int kickfds[VIRTIO_MAX_VIRTQUEUES * 2 + 1];
+ int mac_specified;
+ uint32_t max_queue_pairs;
+ uint32_t queue_pairs;
+ uint32_t queue_size;
+ uint64_t features;
+ uint8_t status;
+ uint8_t mac_addr[ETHER_ADDR_LEN];
+ char path[PATH_MAX];
+ struct vring vrings[VIRTIO_MAX_VIRTQUEUES * 2 + 1];
+};
+
+int virtio_user_start_device(struct virtio_user_dev *dev);
+int virtio_user_stop_device(struct virtio_user_dev *dev);
+int virtio_user_dev_init(struct virtio_user_dev *dev, char *path, int queues,
+ int cq, int queue_size, const char *mac);
+void virtio_user_dev_uninit(struct virtio_user_dev *dev);
+void virtio_user_handle_cq(struct virtio_user_dev *dev, uint16_t queue_idx);
+#endif
diff --git a/drivers/net/virtio/virtio_user_ethdev.c b/drivers/net/virtio/virtio_user_ethdev.c
new file mode 100644
index 00000000..5ab24711
--- /dev/null
+++ b/drivers/net/virtio/virtio_user_ethdev.c
@@ -0,0 +1,440 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <rte_malloc.h>
+#include <rte_kvargs.h>
+
+#include "virtio_ethdev.h"
+#include "virtio_logs.h"
+#include "virtio_pci.h"
+#include "virtqueue.h"
+#include "virtio_rxtx.h"
+#include "virtio_user/virtio_user_dev.h"
+
+#define virtio_user_get_dev(hw) \
+ ((struct virtio_user_dev *)(hw)->virtio_user_dev)
+
+static void
+virtio_user_read_dev_config(struct virtio_hw *hw, size_t offset,
+ void *dst, int length)
+{
+ int i;
+ struct virtio_user_dev *dev = virtio_user_get_dev(hw);
+
+ if (offset == offsetof(struct virtio_net_config, mac) &&
+ length == ETHER_ADDR_LEN) {
+ for (i = 0; i < ETHER_ADDR_LEN; ++i)
+ ((uint8_t *)dst)[i] = dev->mac_addr[i];
+ return;
+ }
+
+ if (offset == offsetof(struct virtio_net_config, status))
+ *(uint16_t *)dst = dev->status;
+
+ if (offset == offsetof(struct virtio_net_config, max_virtqueue_pairs))
+ *(uint16_t *)dst = dev->max_queue_pairs;
+}
+
+static void
+virtio_user_write_dev_config(struct virtio_hw *hw, size_t offset,
+ const void *src, int length)
+{
+ int i;
+ struct virtio_user_dev *dev = virtio_user_get_dev(hw);
+
+ if ((offset == offsetof(struct virtio_net_config, mac)) &&
+ (length == ETHER_ADDR_LEN))
+ for (i = 0; i < ETHER_ADDR_LEN; ++i)
+ dev->mac_addr[i] = ((const uint8_t *)src)[i];
+ else
+ PMD_DRV_LOG(ERR, "not supported offset=%zu, len=%d\n",
+ offset, length);
+}
+
+static void
+virtio_user_set_status(struct virtio_hw *hw, uint8_t status)
+{
+ struct virtio_user_dev *dev = virtio_user_get_dev(hw);
+
+ if (status & VIRTIO_CONFIG_STATUS_DRIVER_OK)
+ virtio_user_start_device(dev);
+ dev->status = status;
+}
+
+static void
+virtio_user_reset(struct virtio_hw *hw)
+{
+ struct virtio_user_dev *dev = virtio_user_get_dev(hw);
+
+ virtio_user_stop_device(dev);
+}
+
+static uint8_t
+virtio_user_get_status(struct virtio_hw *hw)
+{
+ struct virtio_user_dev *dev = virtio_user_get_dev(hw);
+
+ return dev->status;
+}
+
+static uint64_t
+virtio_user_get_features(struct virtio_hw *hw)
+{
+ struct virtio_user_dev *dev = virtio_user_get_dev(hw);
+
+ return dev->features;
+}
+
+static void
+virtio_user_set_features(struct virtio_hw *hw, uint64_t features)
+{
+ struct virtio_user_dev *dev = virtio_user_get_dev(hw);
+
+ dev->features = features;
+}
+
+static uint8_t
+virtio_user_get_isr(struct virtio_hw *hw __rte_unused)
+{
+ /* When config interrupt happens, driver calls this function to query
+ * what kinds of change happen. Interrupt mode not supported for now.
+ */
+ return 0;
+}
+
+static uint16_t
+virtio_user_set_config_irq(struct virtio_hw *hw __rte_unused,
+ uint16_t vec __rte_unused)
+{
+ return VIRTIO_MSI_NO_VECTOR;
+}
+
+/* This function is to get the queue size, aka, number of descs, of a specified
+ * queue. Different with the VHOST_USER_GET_QUEUE_NUM, which is used to get the
+ * max supported queues.
+ */
+static uint16_t
+virtio_user_get_queue_num(struct virtio_hw *hw, uint16_t queue_id __rte_unused)
+{
+ struct virtio_user_dev *dev = virtio_user_get_dev(hw);
+
+ /* Currently, each queue has same queue size */
+ return dev->queue_size;
+}
+
+static int
+virtio_user_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
+{
+ struct virtio_user_dev *dev = virtio_user_get_dev(hw);
+ uint16_t queue_idx = vq->vq_queue_index;
+ uint64_t desc_addr, avail_addr, used_addr;
+
+ desc_addr = (uintptr_t)vq->vq_ring_virt_mem;
+ avail_addr = desc_addr + vq->vq_nentries * sizeof(struct vring_desc);
+ used_addr = RTE_ALIGN_CEIL(avail_addr + offsetof(struct vring_avail,
+ ring[vq->vq_nentries]),
+ VIRTIO_PCI_VRING_ALIGN);
+
+ dev->vrings[queue_idx].num = vq->vq_nentries;
+ dev->vrings[queue_idx].desc = (void *)(uintptr_t)desc_addr;
+ dev->vrings[queue_idx].avail = (void *)(uintptr_t)avail_addr;
+ dev->vrings[queue_idx].used = (void *)(uintptr_t)used_addr;
+
+ return 0;
+}
+
+static void
+virtio_user_del_queue(struct virtio_hw *hw, struct virtqueue *vq)
+{
+ /* For legacy devices, write 0 to VIRTIO_PCI_QUEUE_PFN port, QEMU
+ * correspondingly stops the ioeventfds, and reset the status of
+ * the device.
+ * For modern devices, set queue desc, avail, used in PCI bar to 0,
+ * not see any more behavior in QEMU.
+ *
+ * Here we just care about what information to deliver to vhost-user
+ * or vhost-kernel. So we just close ioeventfd for now.
+ */
+ struct virtio_user_dev *dev = virtio_user_get_dev(hw);
+
+ close(dev->callfds[vq->vq_queue_index]);
+ close(dev->kickfds[vq->vq_queue_index]);
+}
+
+static void
+virtio_user_notify_queue(struct virtio_hw *hw, struct virtqueue *vq)
+{
+ uint64_t buf = 1;
+ struct virtio_user_dev *dev = virtio_user_get_dev(hw);
+
+ if (hw->cvq && (hw->cvq->vq == vq)) {
+ virtio_user_handle_cq(dev, vq->vq_queue_index);
+ return;
+ }
+
+ if (write(dev->kickfds[vq->vq_queue_index], &buf, sizeof(buf)) < 0)
+ PMD_DRV_LOG(ERR, "failed to kick backend: %s\n",
+ strerror(errno));
+}
+
+static const struct virtio_pci_ops virtio_user_ops = {
+ .read_dev_cfg = virtio_user_read_dev_config,
+ .write_dev_cfg = virtio_user_write_dev_config,
+ .reset = virtio_user_reset,
+ .get_status = virtio_user_get_status,
+ .set_status = virtio_user_set_status,
+ .get_features = virtio_user_get_features,
+ .set_features = virtio_user_set_features,
+ .get_isr = virtio_user_get_isr,
+ .set_config_irq = virtio_user_set_config_irq,
+ .get_queue_num = virtio_user_get_queue_num,
+ .setup_queue = virtio_user_setup_queue,
+ .del_queue = virtio_user_del_queue,
+ .notify_queue = virtio_user_notify_queue,
+};
+
+static const char *valid_args[] = {
+#define VIRTIO_USER_ARG_QUEUES_NUM "queues"
+ VIRTIO_USER_ARG_QUEUES_NUM,
+#define VIRTIO_USER_ARG_CQ_NUM "cq"
+ VIRTIO_USER_ARG_CQ_NUM,
+#define VIRTIO_USER_ARG_MAC "mac"
+ VIRTIO_USER_ARG_MAC,
+#define VIRTIO_USER_ARG_PATH "path"
+ VIRTIO_USER_ARG_PATH,
+#define VIRTIO_USER_ARG_QUEUE_SIZE "queue_size"
+ VIRTIO_USER_ARG_QUEUE_SIZE,
+ NULL
+};
+
+#define VIRTIO_USER_DEF_CQ_EN 0
+#define VIRTIO_USER_DEF_Q_NUM 1
+#define VIRTIO_USER_DEF_Q_SZ 256
+
+static int
+get_string_arg(const char *key __rte_unused,
+ const char *value, void *extra_args)
+{
+ if (!value || !extra_args)
+ return -EINVAL;
+
+ *(char **)extra_args = strdup(value);
+
+ return 0;
+}
+
+static int
+get_integer_arg(const char *key __rte_unused,
+ const char *value, void *extra_args)
+{
+ if (!value || !extra_args)
+ return -EINVAL;
+
+ *(uint64_t *)extra_args = strtoull(value, NULL, 0);
+
+ return 0;
+}
+
+static struct rte_eth_dev *
+virtio_user_eth_dev_alloc(const char *name)
+{
+ struct rte_eth_dev *eth_dev;
+ struct rte_eth_dev_data *data;
+ struct virtio_hw *hw;
+ struct virtio_user_dev *dev;
+
+ eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_VIRTUAL);
+ if (!eth_dev) {
+ PMD_INIT_LOG(ERR, "cannot alloc rte_eth_dev");
+ return NULL;
+ }
+
+ data = eth_dev->data;
+
+ hw = rte_zmalloc(NULL, sizeof(*hw), 0);
+ if (!hw) {
+ PMD_INIT_LOG(ERR, "malloc virtio_hw failed");
+ rte_eth_dev_release_port(eth_dev);
+ return NULL;
+ }
+
+ dev = rte_zmalloc(NULL, sizeof(*dev), 0);
+ if (!dev) {
+ PMD_INIT_LOG(ERR, "malloc virtio_user_dev failed");
+ rte_eth_dev_release_port(eth_dev);
+ rte_free(hw);
+ return NULL;
+ }
+
+ hw->vtpci_ops = &virtio_user_ops;
+ hw->use_msix = 0;
+ hw->modern = 0;
+ hw->virtio_user_dev = dev;
+ data->dev_private = hw;
+ data->numa_node = SOCKET_ID_ANY;
+ data->kdrv = RTE_KDRV_NONE;
+ data->dev_flags = RTE_ETH_DEV_DETACHABLE;
+ eth_dev->pci_dev = NULL;
+ eth_dev->driver = NULL;
+ return eth_dev;
+}
+
+/* Dev initialization routine. Invoked once for each virtio vdev at
+ * EAL init time, see rte_eal_dev_init().
+ * Returns 0 on success.
+ */
+static int
+virtio_user_pmd_devinit(const char *name, const char *params)
+{
+ struct rte_kvargs *kvlist;
+ struct rte_eth_dev *eth_dev;
+ struct virtio_hw *hw;
+ uint64_t queues = VIRTIO_USER_DEF_Q_NUM;
+ uint64_t cq = VIRTIO_USER_DEF_CQ_EN;
+ uint64_t queue_size = VIRTIO_USER_DEF_Q_SZ;
+ char *path = NULL;
+ char *mac_addr = NULL;
+ int ret = -1;
+
+ if (!params || params[0] == '\0') {
+ PMD_INIT_LOG(ERR, "arg %s is mandatory for virtio-user",
+ VIRTIO_USER_ARG_QUEUE_SIZE);
+ goto end;
+ }
+
+ kvlist = rte_kvargs_parse(params, valid_args);
+ if (!kvlist) {
+ PMD_INIT_LOG(ERR, "error when parsing param");
+ goto end;
+ }
+
+ if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_PATH) == 1)
+ rte_kvargs_process(kvlist, VIRTIO_USER_ARG_PATH,
+ &get_string_arg, &path);
+ else {
+ PMD_INIT_LOG(ERR, "arg %s is mandatory for virtio-user\n",
+ VIRTIO_USER_ARG_QUEUE_SIZE);
+ goto end;
+ }
+
+ if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_MAC) == 1)
+ rte_kvargs_process(kvlist, VIRTIO_USER_ARG_MAC,
+ &get_string_arg, &mac_addr);
+
+ if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_QUEUE_SIZE) == 1)
+ rte_kvargs_process(kvlist, VIRTIO_USER_ARG_QUEUE_SIZE,
+ &get_integer_arg, &queue_size);
+
+ if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_QUEUES_NUM) == 1)
+ rte_kvargs_process(kvlist, VIRTIO_USER_ARG_QUEUES_NUM,
+ &get_integer_arg, &queues);
+
+ if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_CQ_NUM) == 1)
+ rte_kvargs_process(kvlist, VIRTIO_USER_ARG_CQ_NUM,
+ &get_integer_arg, &cq);
+ else if (queues > 1)
+ cq = 1;
+
+ if (queues > 1 && cq == 0) {
+ PMD_INIT_LOG(ERR, "multi-q requires ctrl-q");
+ goto end;
+ }
+
+ eth_dev = virtio_user_eth_dev_alloc(name);
+ if (!eth_dev) {
+ PMD_INIT_LOG(ERR, "virtio-user fails to alloc device");
+ goto end;
+ }
+
+ hw = eth_dev->data->dev_private;
+ if (virtio_user_dev_init(hw->virtio_user_dev, path, queues, cq,
+ queue_size, mac_addr) < 0)
+ goto end;
+
+ /* previously called by rte_eal_pci_probe() for physical dev */
+ if (eth_virtio_dev_init(eth_dev) < 0) {
+ PMD_INIT_LOG(ERR, "eth_virtio_dev_init fails");
+ goto end;
+ }
+ ret = 0;
+
+end:
+ if (path)
+ free(path);
+ if (mac_addr)
+ free(mac_addr);
+ return ret;
+}
+
+/** Called by rte_eth_dev_detach() */
+static int
+virtio_user_pmd_devuninit(const char *name)
+{
+ struct rte_eth_dev *eth_dev;
+ struct virtio_hw *hw;
+ struct virtio_user_dev *dev;
+
+ if (!name)
+ return -EINVAL;
+
+ PMD_DRV_LOG(INFO, "Un-Initializing %s\n", name);
+ eth_dev = rte_eth_dev_allocated(name);
+ if (!eth_dev)
+ return -ENODEV;
+
+ /* make sure the device is stopped, queues freed */
+ rte_eth_dev_close(eth_dev->data->port_id);
+
+ hw = eth_dev->data->dev_private;
+ dev = hw->virtio_user_dev;
+ virtio_user_dev_uninit(dev);
+
+ rte_free(eth_dev->data->dev_private);
+ rte_free(eth_dev->data);
+ rte_eth_dev_release_port(eth_dev);
+
+ return 0;
+}
+
+static struct rte_driver virtio_user_driver = {
+ .name = "virtio-user",
+ .type = PMD_VDEV,
+ .init = virtio_user_pmd_devinit,
+ .uninit = virtio_user_pmd_devuninit,
+};
+
+PMD_REGISTER_DRIVER(virtio_user_driver);
diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
index 4e9239e0..455aaafe 100644
--- a/drivers/net/virtio/virtqueue.h
+++ b/drivers/net/virtio/virtqueue.h
@@ -66,6 +66,14 @@ struct rte_mbuf;
#define VIRTQUEUE_MAX_NAME_SZ 32
+#ifdef RTE_VIRTIO_USER
+#define MBUF_DATA_DMA_ADDR(mb, offset) \
+ ((uint64_t)((uintptr_t)(*(void **)((uintptr_t)mb + offset)) \
+ + (mb)->data_off))
+#else /* RTE_VIRTIO_USER */
+#define MBUF_DATA_DMA_ADDR(mb, offset) rte_mbuf_data_dma_addr(mb)
+#endif /* RTE_VIRTIO_USER */
+
#define VTNET_SQ_RQ_QUEUE_IDX 0
#define VTNET_SQ_TQ_QUEUE_IDX 1
#define VTNET_SQ_CQ_QUEUE_IDX 2
@@ -153,23 +161,30 @@ struct virtio_pmd_ctrl {
uint8_t data[VIRTIO_MAX_CTRL_DATA];
};
+struct vq_desc_extra {
+ void *cookie;
+ uint16_t ndescs;
+};
+
struct virtqueue {
- struct virtio_hw *hw; /**< virtio_hw structure pointer. */
- const struct rte_memzone *mz; /**< mem zone to populate RX ring. */
- const struct rte_memzone *virtio_net_hdr_mz; /**< memzone to populate hdr. */
- struct rte_mempool *mpool; /**< mempool for mbuf allocation */
- uint16_t queue_id; /**< DPDK queue index. */
- uint8_t port_id; /**< Device port identifier. */
- uint16_t vq_queue_index; /**< PCI queue index */
-
- void *vq_ring_virt_mem; /**< linear address of vring*/
+ struct virtio_hw *hw; /**< virtio_hw structure pointer. */
+ struct vring vq_ring; /**< vring keeping desc, used and avail */
+ /**
+ * Last consumed descriptor in the used table,
+ * trails vq_ring.used->idx.
+ */
+ uint16_t vq_used_cons_idx;
+ uint16_t vq_nentries; /**< vring desc numbers */
+ uint16_t vq_free_cnt; /**< num of desc available */
+ uint16_t vq_avail_idx; /**< sync until needed */
+ uint16_t vq_free_thresh; /**< free threshold */
+
+ void *vq_ring_virt_mem; /**< linear address of vring*/
unsigned int vq_ring_size;
- phys_addr_t vq_ring_mem; /**< physical address of vring */
- struct vring vq_ring; /**< vring keeping desc, used and avail */
- uint16_t vq_free_cnt; /**< num of desc available */
- uint16_t vq_nentries; /**< vring desc numbers */
- uint16_t vq_free_thresh; /**< free threshold */
+ phys_addr_t vq_ring_mem; /**< physical address of vring */
+ /**< use virtual address for virtio-user. */
+
/**
* Head of the free chain in the descriptor table. If
* there are no free descriptors, this will be set to
@@ -177,34 +192,12 @@ struct virtqueue {
*/
uint16_t vq_desc_head_idx;
uint16_t vq_desc_tail_idx;
- /**
- * Last consumed descriptor in the used table,
- * trails vq_ring.used->idx.
- */
- uint16_t vq_used_cons_idx;
- uint16_t vq_avail_idx;
- uint64_t mbuf_initializer; /**< value to init mbufs. */
- phys_addr_t virtio_net_hdr_mem; /**< hdr for each xmit packet */
-
- struct rte_mbuf **sw_ring; /**< RX software ring. */
- /* dummy mbuf, for wraparound when processing RX ring. */
- struct rte_mbuf fake_mbuf;
-
- /* Statistics */
- uint64_t packets;
- uint64_t bytes;
- uint64_t errors;
- uint64_t multicast;
- uint64_t broadcast;
- /* Size bins in array as RFC 2819, undersized [0], 64 [1], etc */
- uint64_t size_bins[8];
-
- uint16_t *notify_addr;
-
- struct vq_desc_extra {
- void *cookie;
- uint16_t ndescs;
- } vq_descx[0];
+ uint16_t vq_queue_index; /**< PCI queue index */
+ uint16_t offset; /**< relative offset to obtain addr in mbuf */
+ uint16_t *notify_addr;
+ int configured;
+ struct rte_mbuf **sw_ring; /**< RX software ring. */
+ struct vq_desc_extra vq_descx[0];
};
/* If multiqueue is provided by host, then we suppport it. */
@@ -302,7 +295,8 @@ vq_update_avail_ring(struct virtqueue *vq, uint16_t desc_idx)
* descriptor.
*/
avail_idx = (uint16_t)(vq->vq_avail_idx & (vq->vq_nentries - 1));
- vq->vq_ring.avail->ring[avail_idx] = desc_idx;
+ if (unlikely(vq->vq_ring.avail->ring[avail_idx] != desc_idx))
+ vq->vq_ring.avail->ring[avail_idx] = desc_idx;
vq->vq_avail_idx++;
}