16 files changed, 2183 insertions, 477 deletions
diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile
index ef84f604..3020b688 100644
--- a/drivers/net/virtio/Makefile
+++ b/drivers/net/virtio/Makefile
@@ -55,9 +55,16 @@ ifeq ($(findstring RTE_MACHINE_CPUFLAG_SSSE3,$(CFLAGS)),RTE_MACHINE_CPUFLAG_SSSE
 SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx_simple.c
 endif
 
+ifeq ($(CONFIG_RTE_VIRTIO_USER),y)
+SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost_user.c
+SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/virtio_user_dev.c
+SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user_ethdev.c
+endif
+
 # this lib depends upon:
 DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_eal lib/librte_ether
 DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_mempool lib/librte_mbuf
 DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_net
+DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_kvargs
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index 63a368ac..480daa37 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -59,8 +59,6 @@
 #include "virtqueue.h"
 #include "virtio_rxtx.h"
 
-
-static int eth_virtio_dev_init(struct rte_eth_dev *eth_dev);
 static int eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev);
 static int  virtio_dev_configure(struct rte_eth_dev *dev);
 static int  virtio_dev_start(struct rte_eth_dev *dev);
@@ -80,7 +78,10 @@ static void virtio_get_hwaddr(struct virtio_hw *hw);
 static void virtio_dev_stats_get(struct rte_eth_dev *dev,
 				 struct rte_eth_stats *stats);
 static int virtio_dev_xstats_get(struct rte_eth_dev *dev,
-				 struct rte_eth_xstats *xstats, unsigned n);
+				 struct rte_eth_xstat *xstats, unsigned n);
+static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
+				       struct rte_eth_xstat_name *xstats_names,
+				       unsigned limit);
 static void virtio_dev_stats_reset(struct rte_eth_dev *dev);
 static void virtio_dev_free_mbufs(struct rte_eth_dev *dev);
 static int virtio_vlan_filter_set(struct rte_eth_dev *dev,
@@ -115,40 +116,61 @@ struct rte_virtio_xstats_name_off {
 };
 
 /* [rt]x_qX_ is prepended to the name string here */
-static const struct rte_virtio_xstats_name_off rte_virtio_q_stat_strings[] = {
-	{"good_packets",           offsetof(struct virtqueue, packets)},
-	{"good_bytes",             offsetof(struct virtqueue, bytes)},
-	{"errors",                 offsetof(struct virtqueue, errors)},
-	{"multicast_packets",      offsetof(struct virtqueue, multicast)},
-	{"broadcast_packets",      offsetof(struct virtqueue, broadcast)},
-	{"undersize_packets",      offsetof(struct virtqueue, size_bins[0])},
-	{"size_64_packets",        offsetof(struct virtqueue, size_bins[1])},
-	{"size_65_127_packets",    offsetof(struct virtqueue, size_bins[2])},
-	{"size_128_255_packets",   offsetof(struct virtqueue, size_bins[3])},
-	{"size_256_511_packets",   offsetof(struct virtqueue, size_bins[4])},
-	{"size_512_1023_packets",  offsetof(struct virtqueue, size_bins[5])},
-	{"size_1024_1517_packets", offsetof(struct virtqueue, size_bins[6])},
-	{"size_1518_max_packets",  offsetof(struct virtqueue, size_bins[7])},
+static const struct rte_virtio_xstats_name_off rte_virtio_rxq_stat_strings[] = {
+	{"good_packets",           offsetof(struct virtnet_rx, stats.packets)},
+	{"good_bytes",             offsetof(struct virtnet_rx, stats.bytes)},
+	{"errors",                 offsetof(struct virtnet_rx, stats.errors)},
+	{"multicast_packets",      offsetof(struct virtnet_rx, stats.multicast)},
+	{"broadcast_packets",      offsetof(struct virtnet_rx, stats.broadcast)},
+	{"undersize_packets",      offsetof(struct virtnet_rx, stats.size_bins[0])},
+	{"size_64_packets",        offsetof(struct virtnet_rx, stats.size_bins[1])},
+	{"size_65_127_packets",    offsetof(struct virtnet_rx, stats.size_bins[2])},
+	{"size_128_255_packets",   offsetof(struct virtnet_rx, stats.size_bins[3])},
+	{"size_256_511_packets",   offsetof(struct virtnet_rx, stats.size_bins[4])},
+	{"size_512_1023_packets",  offsetof(struct virtnet_rx, stats.size_bins[5])},
+	{"size_1024_1517_packets", offsetof(struct virtnet_rx, stats.size_bins[6])},
+	{"size_1518_max_packets",  offsetof(struct virtnet_rx, stats.size_bins[7])},
+};
+
+/* [rt]x_qX_ is prepended to the name string here */
+static const struct rte_virtio_xstats_name_off rte_virtio_txq_stat_strings[] = {
+	{"good_packets",           offsetof(struct virtnet_tx, stats.packets)},
+	{"good_bytes",             offsetof(struct virtnet_tx, stats.bytes)},
+	{"errors",                 offsetof(struct virtnet_tx, stats.errors)},
+	{"multicast_packets",      offsetof(struct virtnet_tx, stats.multicast)},
+	{"broadcast_packets",      offsetof(struct virtnet_tx, stats.broadcast)},
+	{"undersize_packets",      offsetof(struct virtnet_tx, stats.size_bins[0])},
+	{"size_64_packets",        offsetof(struct virtnet_tx, stats.size_bins[1])},
+	{"size_65_127_packets",    offsetof(struct virtnet_tx, stats.size_bins[2])},
+	{"size_128_255_packets",   offsetof(struct virtnet_tx, stats.size_bins[3])},
+	{"size_256_511_packets",   offsetof(struct virtnet_tx, stats.size_bins[4])},
+	{"size_512_1023_packets",  offsetof(struct virtnet_tx, stats.size_bins[5])},
+	{"size_1024_1517_packets", offsetof(struct virtnet_tx, stats.size_bins[6])},
+	{"size_1518_max_packets",  offsetof(struct virtnet_tx, stats.size_bins[7])},
 };
 
-#define VIRTIO_NB_Q_XSTATS (sizeof(rte_virtio_q_stat_strings) / \
-			    sizeof(rte_virtio_q_stat_strings[0]))
+#define VIRTIO_NB_RXQ_XSTATS (sizeof(rte_virtio_rxq_stat_strings) / \
+			    sizeof(rte_virtio_rxq_stat_strings[0]))
+#define VIRTIO_NB_TXQ_XSTATS (sizeof(rte_virtio_txq_stat_strings) / \
+			    sizeof(rte_virtio_txq_stat_strings[0]))
 
 static int
-virtio_send_command(struct virtqueue *vq, struct virtio_pmd_ctrl *ctrl,
+virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl,
 		int *dlen, int pkt_num)
 {
 	uint32_t head, i;
 	int k, sum = 0;
 	virtio_net_ctrl_ack status = ~0;
 	struct virtio_pmd_ctrl result;
+	struct virtqueue *vq;
 
 	ctrl->status = status;
 
-	if (!(vq && vq->hw->cvq)) {
+	if (!cvq && !cvq->vq) {
 		PMD_INIT_LOG(ERR, "Control queue is not supported.");
 		return -1;
 	}
+	vq = cvq->vq;
 	head = vq->vq_desc_head_idx;
 
 	PMD_INIT_LOG(DEBUG, "vq->vq_desc_head_idx = %d, status = %d, "
@@ -158,7 +180,7 @@ virtio_send_command(struct virtqueue *vq, struct virtio_pmd_ctrl *ctrl,
 	if ((vq->vq_free_cnt < ((uint32_t)pkt_num + 2)) || (pkt_num < 1))
 		return -1;
 
-	memcpy(vq->virtio_net_hdr_mz->addr, ctrl,
+	memcpy(cvq->virtio_net_hdr_mz->addr, ctrl,
 		sizeof(struct virtio_pmd_ctrl));
 
 	/*
@@ -168,14 +190,14 @@ virtio_send_command(struct virtqueue *vq, struct virtio_pmd_ctrl *ctrl,
 	 * One RX packet for ACK.
 	 */
 	vq->vq_ring.desc[head].flags = VRING_DESC_F_NEXT;
-	vq->vq_ring.desc[head].addr = vq->virtio_net_hdr_mz->phys_addr;
+	vq->vq_ring.desc[head].addr = cvq->virtio_net_hdr_mem;
 	vq->vq_ring.desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
 	vq->vq_free_cnt--;
 	i = vq->vq_ring.desc[head].next;
 
 	for (k = 0; k < pkt_num; k++) {
 		vq->vq_ring.desc[i].flags = VRING_DESC_F_NEXT;
-		vq->vq_ring.desc[i].addr = vq->virtio_net_hdr_mz->phys_addr
+		vq->vq_ring.desc[i].addr = cvq->virtio_net_hdr_mem
 			+ sizeof(struct virtio_net_ctrl_hdr)
 			+ sizeof(ctrl->status) + sizeof(uint8_t)*sum;
 		vq->vq_ring.desc[i].len = dlen[k];
@@ -185,7 +207,7 @@ virtio_send_command(struct virtqueue *vq, struct virtio_pmd_ctrl *ctrl,
 	}
 
 	vq->vq_ring.desc[i].flags = VRING_DESC_F_WRITE;
-	vq->vq_ring.desc[i].addr = vq->virtio_net_hdr_mz->phys_addr
+	vq->vq_ring.desc[i].addr = cvq->virtio_net_hdr_mem
 			+ sizeof(struct virtio_net_ctrl_hdr);
 	vq->vq_ring.desc[i].len = sizeof(ctrl->status);
 	vq->vq_free_cnt--;
@@ -200,12 +222,12 @@ virtio_send_command(struct virtqueue *vq, struct virtio_pmd_ctrl *ctrl,
 	virtqueue_notify(vq);
 
 	rte_rmb();
-	while (vq->vq_used_cons_idx == vq->vq_ring.used->idx) {
+	while (VIRTQUEUE_NUSED(vq) == 0) {
 		rte_rmb();
 		usleep(100);
 	}
 
-	while (vq->vq_used_cons_idx != vq->vq_ring.used->idx) {
+	while (VIRTQUEUE_NUSED(vq)) {
 		uint32_t idx, desc_idx, used_idx;
 		struct vring_used_elem *uep;
 
@@ -230,7 +252,7 @@ virtio_send_command(struct virtqueue *vq, struct virtio_pmd_ctrl *ctrl,
 	PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\nvq->vq_desc_head_idx=%d",
 			vq->vq_free_cnt, vq->vq_desc_head_idx);
 
-	memcpy(&result, vq->virtio_net_hdr_mz->addr,
+	memcpy(&result, cvq->virtio_net_hdr_mz->addr,
 			sizeof(struct virtio_pmd_ctrl));
 
 	return result.status;
@@ -261,12 +283,14 @@ virtio_set_multiple_queues(struct rte_eth_dev *dev, uint16_t nb_queues)
 }
 
 void
-virtio_dev_queue_release(struct virtqueue *vq) {
+virtio_dev_queue_release(struct virtqueue *vq)
+{
 	struct virtio_hw *hw;
 
 	if (vq) {
 		hw = vq->hw;
-		hw->vtpci_ops->del_queue(hw, vq);
+		if (vq->configured)
+			hw->vtpci_ops->del_queue(hw, vq);
 
 		rte_free(vq->sw_ring);
 		rte_free(vq);
@@ -279,13 +303,21 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
 			uint16_t vtpci_queue_idx,
 			uint16_t nb_desc,
 			unsigned int socket_id,
-			struct virtqueue **pvq)
+			void **pvq)
 {
 	char vq_name[VIRTQUEUE_MAX_NAME_SZ];
-	const struct rte_memzone *mz;
+	char vq_hdr_name[VIRTQUEUE_MAX_NAME_SZ];
+	const struct rte_memzone *mz = NULL, *hdr_mz = NULL;
 	unsigned int vq_size, size;
 	struct virtio_hw *hw = dev->data->dev_private;
-	struct virtqueue *vq = NULL;
+	struct virtnet_rx *rxvq = NULL;
+	struct virtnet_tx *txvq = NULL;
+	struct virtnet_ctl *cvq = NULL;
+	struct virtqueue *vq;
+	const char *queue_names[] = {"rvq", "txq", "cvq"};
+	size_t sz_vq, sz_q = 0, sz_hdr_mz = 0;
+	void *sw_ring = NULL;
+	int ret;
 
 	PMD_INIT_LOG(DEBUG, "setting up queue: %u", vtpci_queue_idx);
 
@@ -305,39 +337,33 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
 		return -EINVAL;
 	}
 
+	snprintf(vq_name, sizeof(vq_name), "port%d_%s%d",
+		 dev->data->port_id, queue_names[queue_type], queue_idx);
+
+	sz_vq = RTE_ALIGN_CEIL(sizeof(*vq) +
+				vq_size * sizeof(struct vq_desc_extra),
+				RTE_CACHE_LINE_SIZE);
 	if (queue_type == VTNET_RQ) {
-		snprintf(vq_name, sizeof(vq_name), "port%d_rvq%d",
-			dev->data->port_id, queue_idx);
-		vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) +
-			vq_size * sizeof(struct vq_desc_extra), RTE_CACHE_LINE_SIZE);
-		vq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
-			(RTE_PMD_VIRTIO_RX_MAX_BURST + vq_size) *
-			sizeof(vq->sw_ring[0]), RTE_CACHE_LINE_SIZE, socket_id);
+		sz_q = sz_vq + sizeof(*rxvq);
 	} else if (queue_type == VTNET_TQ) {
-		snprintf(vq_name, sizeof(vq_name), "port%d_tvq%d",
-			dev->data->port_id, queue_idx);
-		vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) +
-			vq_size * sizeof(struct vq_desc_extra), RTE_CACHE_LINE_SIZE);
+		sz_q = sz_vq + sizeof(*txvq);
+		/*
+		 * For each xmit packet, allocate a virtio_net_hdr
+		 * and indirect ring elements
+		 */
+		sz_hdr_mz = vq_size * sizeof(struct virtio_tx_region);
 	} else if (queue_type == VTNET_CQ) {
-		snprintf(vq_name, sizeof(vq_name), "port%d_cvq",
-			dev->data->port_id);
-		vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) +
-			vq_size * sizeof(struct vq_desc_extra),
-			RTE_CACHE_LINE_SIZE);
+		sz_q = sz_vq + sizeof(*cvq);
+		/* Allocate a page for control vq command, data and status */
+		sz_hdr_mz = PAGE_SIZE;
 	}
+
+	vq = rte_zmalloc_socket(vq_name, sz_q, RTE_CACHE_LINE_SIZE, socket_id);
 	if (vq == NULL) {
-		PMD_INIT_LOG(ERR, "Can not allocate virtqueue");
+		PMD_INIT_LOG(ERR, "can not allocate vq");
 		return -ENOMEM;
 	}
-	if (queue_type == VTNET_RQ && vq->sw_ring == NULL) {
-		PMD_INIT_LOG(ERR, "Can not allocate RX soft ring");
-		rte_free(vq);
-		return -ENOMEM;
-	}
-
 	vq->hw = hw;
-	vq->port_id = dev->data->port_id;
-	vq->queue_id = queue_idx;
 	vq->vq_queue_index = vtpci_queue_idx;
 	vq->vq_nentries = vq_size;
 
@@ -350,64 +376,103 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
 	 */
 	size = vring_size(vq_size, VIRTIO_PCI_VRING_ALIGN);
 	vq->vq_ring_size = RTE_ALIGN_CEIL(size, VIRTIO_PCI_VRING_ALIGN);
-	PMD_INIT_LOG(DEBUG, "vring_size: %d, rounded_vring_size: %d", size, vq->vq_ring_size);
+	PMD_INIT_LOG(DEBUG, "vring_size: %d, rounded_vring_size: %d",
+		     size, vq->vq_ring_size);
 
-	mz = rte_memzone_reserve_aligned(vq_name, vq->vq_ring_size,
-		socket_id, 0, VIRTIO_PCI_VRING_ALIGN);
+	mz = rte_memzone_reserve_aligned(vq_name, vq->vq_ring_size, socket_id,
+					 0, VIRTIO_PCI_VRING_ALIGN);
 	if (mz == NULL) {
 		if (rte_errno == EEXIST)
 			mz = rte_memzone_lookup(vq_name);
 		if (mz == NULL) {
-			rte_free(vq);
-			return -ENOMEM;
+			ret = -ENOMEM;
+			goto fail_q_alloc;
 		}
 	}
 
-	/*
-	 * Virtio PCI device VIRTIO_PCI_QUEUE_PF register is 32bit,
-	 * and only accepts 32 bit page frame number.
-	 * Check if the allocated physical memory exceeds 16TB.
-	 */
-	if ((mz->phys_addr + vq->vq_ring_size - 1) >> (VIRTIO_PCI_QUEUE_ADDR_SHIFT + 32)) {
-		PMD_INIT_LOG(ERR, "vring address shouldn't be above 16TB!");
-		rte_free(vq);
-		return -ENOMEM;
-	}
-
 	memset(mz->addr, 0, sizeof(mz->len));
-	vq->mz = mz;
+
 	vq->vq_ring_mem = mz->phys_addr;
 	vq->vq_ring_virt_mem = mz->addr;
-	PMD_INIT_LOG(DEBUG, "vq->vq_ring_mem:      0x%"PRIx64, (uint64_t)mz->phys_addr);
-	PMD_INIT_LOG(DEBUG, "vq->vq_ring_virt_mem: 0x%"PRIx64, (uint64_t)(uintptr_t)mz->addr);
-	vq->virtio_net_hdr_mz  = NULL;
-	vq->virtio_net_hdr_mem = 0;
-
-	if (queue_type == VTNET_TQ) {
-		const struct rte_memzone *hdr_mz;
-		struct virtio_tx_region *txr;
-		unsigned int i;
-
-		/*
-		 * For each xmit packet, allocate a virtio_net_hdr
-		 * and indirect ring elements
-		 */
-		snprintf(vq_name, sizeof(vq_name), "port%d_tvq%d_hdrzone",
-			 dev->data->port_id, queue_idx);
-		hdr_mz = rte_memzone_reserve_aligned(vq_name,
-						     vq_size * sizeof(*txr),
+	PMD_INIT_LOG(DEBUG, "vq->vq_ring_mem:      0x%" PRIx64,
+		     (uint64_t)mz->phys_addr);
+	PMD_INIT_LOG(DEBUG, "vq->vq_ring_virt_mem: 0x%" PRIx64,
+		     (uint64_t)(uintptr_t)mz->addr);
+
+	if (sz_hdr_mz) {
+		snprintf(vq_hdr_name, sizeof(vq_hdr_name), "port%d_%s%d_hdr",
+			 dev->data->port_id, queue_names[queue_type],
+			 queue_idx);
+		hdr_mz = rte_memzone_reserve_aligned(vq_hdr_name, sz_hdr_mz,
 						     socket_id, 0,
 						     RTE_CACHE_LINE_SIZE);
 		if (hdr_mz == NULL) {
 			if (rte_errno == EEXIST)
-				hdr_mz = rte_memzone_lookup(vq_name);
+				hdr_mz = rte_memzone_lookup(vq_hdr_name);
 			if (hdr_mz == NULL) {
-				rte_free(vq);
-				return -ENOMEM;
+				ret = -ENOMEM;
+				goto fail_q_alloc;
 			}
 		}
-		vq->virtio_net_hdr_mz = hdr_mz;
-		vq->virtio_net_hdr_mem = hdr_mz->phys_addr;
+	}
+
+	if (queue_type == VTNET_RQ) {
+		size_t sz_sw = (RTE_PMD_VIRTIO_RX_MAX_BURST + vq_size) *
+			       sizeof(vq->sw_ring[0]);
+
+		sw_ring = rte_zmalloc_socket("sw_ring", sz_sw,
+					     RTE_CACHE_LINE_SIZE, socket_id);
+		if (!sw_ring) {
+			PMD_INIT_LOG(ERR, "can not allocate RX soft ring");
+			ret = -ENOMEM;
+			goto fail_q_alloc;
+		}
+
+		vq->sw_ring = sw_ring;
+		rxvq = (struct virtnet_rx *)RTE_PTR_ADD(vq, sz_vq);
+		rxvq->vq = vq;
+		rxvq->port_id = dev->data->port_id;
+		rxvq->queue_id = queue_idx;
+		rxvq->mz = mz;
+		*pvq = rxvq;
+	} else if (queue_type == VTNET_TQ) {
+		txvq = (struct virtnet_tx *)RTE_PTR_ADD(vq, sz_vq);
+		txvq->vq = vq;
+		txvq->port_id = dev->data->port_id;
+		txvq->queue_id = queue_idx;
+		txvq->mz = mz;
+		txvq->virtio_net_hdr_mz = hdr_mz;
+		txvq->virtio_net_hdr_mem = hdr_mz->phys_addr;
+
+		*pvq = txvq;
+	} else if (queue_type == VTNET_CQ) {
+		cvq = (struct virtnet_ctl *)RTE_PTR_ADD(vq, sz_vq);
+		cvq->vq = vq;
+		cvq->mz = mz;
+		cvq->virtio_net_hdr_mz = hdr_mz;
+		cvq->virtio_net_hdr_mem = hdr_mz->phys_addr;
+		memset(cvq->virtio_net_hdr_mz->addr, 0, PAGE_SIZE);
+		*pvq = cvq;
+	}
+
+	/* For virtio-user case (that is when dev->pci_dev is NULL), we use
+	 * virtual address. And we need properly set _offset_, please see
+	 * MBUF_DATA_DMA_ADDR in virtqueue.h for more information.
+	 */
+	if (dev->pci_dev)
+		vq->offset = offsetof(struct rte_mbuf, buf_physaddr);
+	else {
+		vq->vq_ring_mem = (uintptr_t)mz->addr;
+		vq->offset = offsetof(struct rte_mbuf, buf_addr);
+		if (queue_type == VTNET_TQ)
+			txvq->virtio_net_hdr_mem = (uintptr_t)hdr_mz->addr;
+		else if (queue_type == VTNET_CQ)
+			cvq->virtio_net_hdr_mem = (uintptr_t)hdr_mz->addr;
+	}
+
+	if (queue_type == VTNET_TQ) {
+		struct virtio_tx_region *txr;
+		unsigned int i;
 
 		txr = hdr_mz->addr;
 		memset(txr, 0, vq_size * sizeof(*txr));
@@ -417,57 +482,50 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
 			vring_desc_init(start_dp, RTE_DIM(txr[i].tx_indir));
 
 			/* first indirect descriptor is always the tx header */
-			start_dp->addr = vq->virtio_net_hdr_mem
+			start_dp->addr = txvq->virtio_net_hdr_mem
 				+ i * sizeof(*txr)
 				+ offsetof(struct virtio_tx_region, tx_hdr);
 
-			start_dp->len = vq->hw->vtnet_hdr_size;
+			start_dp->len = hw->vtnet_hdr_size;
 			start_dp->flags = VRING_DESC_F_NEXT;
 		}
-
-	} else if (queue_type == VTNET_CQ) {
-		/* Allocate a page for control vq command, data and status */
-		snprintf(vq_name, sizeof(vq_name), "port%d_cvq_hdrzone",
-			dev->data->port_id);
-		vq->virtio_net_hdr_mz = rte_memzone_reserve_aligned(vq_name,
-			PAGE_SIZE, socket_id, 0, RTE_CACHE_LINE_SIZE);
-		if (vq->virtio_net_hdr_mz == NULL) {
-			if (rte_errno == EEXIST)
-				vq->virtio_net_hdr_mz =
-					rte_memzone_lookup(vq_name);
-			if (vq->virtio_net_hdr_mz == NULL) {
-				rte_free(vq);
-				return -ENOMEM;
-			}
-		}
-		vq->virtio_net_hdr_mem =
-			vq->virtio_net_hdr_mz->phys_addr;
-		memset(vq->virtio_net_hdr_mz->addr, 0, PAGE_SIZE);
 	}
 
-	hw->vtpci_ops->setup_queue(hw, vq);
+	if (hw->vtpci_ops->setup_queue(hw, vq) < 0) {
+		PMD_INIT_LOG(ERR, "setup_queue failed");
+		virtio_dev_queue_release(vq);
+		return -EINVAL;
+	}
 
-	*pvq = vq;
+	vq->configured = 1;
 	return 0;
+
+fail_q_alloc:
+	rte_free(sw_ring);
+	rte_memzone_free(hdr_mz);
+	rte_memzone_free(mz);
+	rte_free(vq);
+
+	return ret;
 }
 
 static int
 virtio_dev_cq_queue_setup(struct rte_eth_dev *dev, uint16_t vtpci_queue_idx,
 		uint32_t socket_id)
 {
-	struct virtqueue *vq;
+	struct virtnet_ctl *cvq;
 	int ret;
 	struct virtio_hw *hw = dev->data->dev_private;
 
 	PMD_INIT_FUNC_TRACE();
 	ret = virtio_dev_queue_setup(dev, VTNET_CQ, VTNET_SQ_CQ_QUEUE_IDX,
-			vtpci_queue_idx, 0, socket_id, &vq);
+			vtpci_queue_idx, 0, socket_id, (void **)&cvq);
 	if (ret < 0) {
 		PMD_INIT_LOG(ERR, "control vq initialization failed");
 		return ret;
 	}
 
-	hw->cvq = vq;
+	hw->cvq = cvq;
 	return 0;
 }
 
@@ -491,7 +549,6 @@ static void
 virtio_dev_close(struct rte_eth_dev *dev)
 {
 	struct virtio_hw *hw = dev->data->dev_private;
-	struct rte_pci_device *pci_dev = dev->pci_dev;
 
 	PMD_INIT_LOG(DEBUG, "virtio_dev_close");
 
@@ -499,7 +556,7 @@ virtio_dev_close(struct rte_eth_dev *dev)
 		virtio_dev_stop(dev);
 
 	/* reset the NIC */
-	if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)
+	if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
 		vtpci_irq_config(hw, VIRTIO_MSI_NO_VECTOR);
 	vtpci_reset(hw);
 	virtio_dev_free_mbufs(dev);
@@ -614,6 +671,7 @@ static const struct eth_dev_ops virtio_eth_dev_ops = {
 	.dev_infos_get           = virtio_dev_info_get,
 	.stats_get               = virtio_dev_stats_get,
 	.xstats_get              = virtio_dev_xstats_get,
+	.xstats_get_names        = virtio_dev_xstats_get_names,
 	.stats_reset             = virtio_dev_stats_reset,
 	.xstats_reset            = virtio_dev_stats_reset,
 	.link_update             = virtio_dev_link_update,
@@ -675,83 +733,121 @@ virtio_update_stats(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 	unsigned i;
 
 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
-		const struct virtqueue *txvq = dev->data->tx_queues[i];
+		const struct virtnet_tx *txvq = dev->data->tx_queues[i];
 		if (txvq == NULL)
 			continue;
 
-		stats->opackets += txvq->packets;
-		stats->obytes += txvq->bytes;
-		stats->oerrors += txvq->errors;
+		stats->opackets += txvq->stats.packets;
+		stats->obytes += txvq->stats.bytes;
+		stats->oerrors += txvq->stats.errors;
 
 		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
-			stats->q_opackets[i] = txvq->packets;
-			stats->q_obytes[i] = txvq->bytes;
+			stats->q_opackets[i] = txvq->stats.packets;
+			stats->q_obytes[i] = txvq->stats.bytes;
 		}
 	}
 
 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		const struct virtqueue *rxvq = dev->data->rx_queues[i];
+		const struct virtnet_rx *rxvq = dev->data->rx_queues[i];
 		if (rxvq == NULL)
 			continue;
 
-		stats->ipackets += rxvq->packets;
-		stats->ibytes += rxvq->bytes;
-		stats->ierrors += rxvq->errors;
+		stats->ipackets += rxvq->stats.packets;
+		stats->ibytes += rxvq->stats.bytes;
+		stats->ierrors += rxvq->stats.errors;
 
 		if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
-			stats->q_ipackets[i] = rxvq->packets;
-			stats->q_ibytes[i] = rxvq->bytes;
+			stats->q_ipackets[i] = rxvq->stats.packets;
+			stats->q_ibytes[i] = rxvq->stats.bytes;
 		}
 	}
 
 	stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed;
 }
 
+static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
+				       struct rte_eth_xstat_name *xstats_names,
+				       __rte_unused unsigned limit)
+{
+	unsigned i;
+	unsigned count = 0;
+	unsigned t;
+
+	unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_TXQ_XSTATS +
+		dev->data->nb_rx_queues * VIRTIO_NB_RXQ_XSTATS;
+
+	if (xstats_names != NULL) {
+		/* Note: limit checked in rte_eth_xstats_names() */
+
+		for (i = 0; i < dev->data->nb_rx_queues; i++) {
+			struct virtqueue *rxvq = dev->data->rx_queues[i];
+			if (rxvq == NULL)
+				continue;
+			for (t = 0; t < VIRTIO_NB_RXQ_XSTATS; t++) {
+				snprintf(xstats_names[count].name,
+					sizeof(xstats_names[count].name),
+					"rx_q%u_%s", i,
+					rte_virtio_rxq_stat_strings[t].name);
+				count++;
+			}
+		}
+
+		for (i = 0; i < dev->data->nb_tx_queues; i++) {
+			struct virtqueue *txvq = dev->data->tx_queues[i];
+			if (txvq == NULL)
+				continue;
+			for (t = 0; t < VIRTIO_NB_TXQ_XSTATS; t++) {
+				snprintf(xstats_names[count].name,
+					sizeof(xstats_names[count].name),
+					"tx_q%u_%s", i,
+					rte_virtio_txq_stat_strings[t].name);
+				count++;
+			}
+		}
+		return count;
+	}
+	return nstats;
+}
+
 static int
-virtio_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstats *xstats,
+virtio_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
 		      unsigned n)
 {
 	unsigned i;
 	unsigned count = 0;
 
-	unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_Q_XSTATS +
-		dev->data->nb_rx_queues * VIRTIO_NB_Q_XSTATS;
+	unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_TXQ_XSTATS +
+		dev->data->nb_rx_queues * VIRTIO_NB_RXQ_XSTATS;
 
 	if (n < nstats)
 		return nstats;
 
 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		struct virtqueue *rxvq = dev->data->rx_queues[i];
+		struct virtnet_rx *rxvq = dev->data->rx_queues[i];
 
 		if (rxvq == NULL)
 			continue;
 
 		unsigned t;
 
-		for (t = 0; t < VIRTIO_NB_Q_XSTATS; t++) {
-			snprintf(xstats[count].name, sizeof(xstats[count].name),
-				 "rx_q%u_%s", i,
-				 rte_virtio_q_stat_strings[t].name);
+		for (t = 0; t < VIRTIO_NB_RXQ_XSTATS; t++) {
 			xstats[count].value = *(uint64_t *)(((char *)rxvq) +
-				rte_virtio_q_stat_strings[t].offset);
+				rte_virtio_rxq_stat_strings[t].offset);
 			count++;
 		}
 	}
 
 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
-		struct virtqueue *txvq = dev->data->tx_queues[i];
+		struct virtnet_tx *txvq = dev->data->tx_queues[i];
 
 		if (txvq == NULL)
 			continue;
 
 		unsigned t;
 
-		for (t = 0; t < VIRTIO_NB_Q_XSTATS; t++) {
-			snprintf(xstats[count].name, sizeof(xstats[count].name),
-				 "tx_q%u_%s", i,
-				 rte_virtio_q_stat_strings[t].name);
+		for (t = 0; t < VIRTIO_NB_TXQ_XSTATS; t++) {
 			xstats[count].value = *(uint64_t *)(((char *)txvq) +
-				rte_virtio_q_stat_strings[t].offset);
+				rte_virtio_txq_stat_strings[t].offset);
 			count++;
 		}
 	}
@@ -771,29 +867,31 @@ virtio_dev_stats_reset(struct rte_eth_dev *dev)
 	unsigned int i;
 
 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
-		struct virtqueue *txvq = dev->data->tx_queues[i];
+		struct virtnet_tx *txvq = dev->data->tx_queues[i];
 		if (txvq == NULL)
 			continue;
 
-		txvq->packets = 0;
-		txvq->bytes = 0;
-		txvq->errors = 0;
-		txvq->multicast = 0;
-		txvq->broadcast = 0;
-		memset(txvq->size_bins, 0, sizeof(txvq->size_bins[0]) * 8);
+		txvq->stats.packets = 0;
+		txvq->stats.bytes = 0;
+		txvq->stats.errors = 0;
+		txvq->stats.multicast = 0;
+		txvq->stats.broadcast = 0;
+		memset(txvq->stats.size_bins, 0,
+		       sizeof(txvq->stats.size_bins[0]) * 8);
 	}
 
 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		struct virtqueue *rxvq = dev->data->rx_queues[i];
+		struct virtnet_rx *rxvq = dev->data->rx_queues[i];
 		if (rxvq == NULL)
 			continue;
 
-		rxvq->packets = 0;
-		rxvq->bytes = 0;
-		rxvq->errors = 0;
-		rxvq->multicast = 0;
-		rxvq->broadcast = 0;
-		memset(rxvq->size_bins, 0, sizeof(rxvq->size_bins[0]) * 8);
+		rxvq->stats.packets = 0;
+		rxvq->stats.bytes = 0;
+		rxvq->stats.errors = 0;
+		rxvq->stats.multicast = 0;
+		rxvq->stats.broadcast = 0;
+		memset(rxvq->stats.size_bins, 0,
+		       sizeof(rxvq->stats.size_bins[0]) * 8);
 	}
 }
 
@@ -827,7 +925,7 @@ virtio_mac_table_set(struct virtio_hw *hw,
 	int err, len[2];
 
 	if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
-		PMD_DRV_LOG(INFO, "host does not support mac table\n");
+		PMD_DRV_LOG(INFO, "host does not support mac table");
 		return;
 	}
 
@@ -1027,16 +1125,17 @@ rx_func_get(struct rte_eth_dev *eth_dev)
  * This function is based on probe() function in virtio_pci.c
  * It returns 0 on success.
  */
-static int
+int
 eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
 {
 	struct virtio_hw *hw = eth_dev->data->dev_private;
 	struct virtio_net_config *config;
 	struct virtio_net_config local_config;
 	struct rte_pci_device *pci_dev;
+	uint32_t dev_flags = RTE_ETH_DEV_DETACHABLE;
 	int ret;
 
-	RTE_BUILD_BUG_ON(RTE_PKTMBUF_HEADROOM < sizeof(struct virtio_net_hdr));
+	RTE_BUILD_BUG_ON(RTE_PKTMBUF_HEADROOM < sizeof(struct virtio_net_hdr_mrg_rxbuf));
 
 	eth_dev->dev_ops = &virtio_eth_dev_ops;
 	eth_dev->tx_pkt_burst = &virtio_xmit_pkts;
@@ -1057,9 +1156,11 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
 
 	pci_dev = eth_dev->pci_dev;
 
-	ret = vtpci_init(pci_dev, hw);
-	if (ret)
-		return ret;
+	if (pci_dev) {
+		ret = vtpci_init(pci_dev, hw, &dev_flags);
+		if (ret)
+			return ret;
+	}
 
 	/* Reset the device although not necessary at startup */
 	vtpci_reset(hw);
@@ -1074,9 +1175,10 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
 
 	/* If host does not support status then disable LSC */
 	if (!vtpci_with_feature(hw, VIRTIO_NET_F_STATUS))
-		pci_dev->driver->drv_flags &= ~RTE_PCI_DRV_INTR_LSC;
+		dev_flags &= ~RTE_ETH_DEV_INTR_LSC;
 
 	rte_eth_copy_pci_info(eth_dev, pci_dev);
+	eth_dev->data->dev_flags = dev_flags;
 
 	rx_func_get(eth_dev);
 
@@ -1150,12 +1252,13 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
 
 	PMD_INIT_LOG(DEBUG, "hw->max_rx_queues=%d   hw->max_tx_queues=%d",
 			hw->max_rx_queues, hw->max_tx_queues);
-	PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x",
+	if (pci_dev)
+		PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x",
 			eth_dev->data->port_id, pci_dev->id.vendor_id,
 			pci_dev->id.device_id);
 
 	/* Setup interrupt callback  */
-	if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)
+	if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
 		rte_intr_callback_register(&pci_dev->intr_handle,
 				   virtio_interrupt_handler, eth_dev);
 
@@ -1184,13 +1287,14 @@ eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev)
 	eth_dev->tx_pkt_burst = NULL;
 	eth_dev->rx_pkt_burst = NULL;
 
-	virtio_dev_queue_release(hw->cvq);
+	if (hw->cvq)
+		virtio_dev_queue_release(hw->cvq->vq);
 
 	rte_free(eth_dev->data->mac_addrs);
 	eth_dev->data->mac_addrs = NULL;
 
 	/* reset interrupt callback  */
-	if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)
+	if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
 		rte_intr_callback_unregister(&pci_dev->intr_handle,
 						virtio_interrupt_handler,
 						eth_dev);
@@ -1240,7 +1344,6 @@ virtio_dev_configure(struct rte_eth_dev *dev)
 {
 	const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
 	struct virtio_hw *hw = dev->data->dev_private;
-	struct rte_pci_device *pci_dev = dev->pci_dev;
 
 	PMD_INIT_LOG(DEBUG, "configure");
 
@@ -1258,7 +1361,7 @@ virtio_dev_configure(struct rte_eth_dev *dev)
 		return -ENOTSUP;
 	}
 
-	if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)
+	if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
 		if (vtpci_irq_config(hw, 0) == VIRTIO_MSI_NO_VECTOR) {
 			PMD_DRV_LOG(ERR, "failed to set config vector");
 			return -EBUSY;
@@ -1273,11 +1376,12 @@ virtio_dev_start(struct rte_eth_dev *dev)
 {
 	uint16_t nb_queues, i;
 	struct virtio_hw *hw = dev->data->dev_private;
-	struct rte_pci_device *pci_dev = dev->pci_dev;
+	struct virtnet_rx *rxvq;
+	struct virtnet_tx *txvq __rte_unused;
 
 	/* check if lsc interrupt feature is enabled */
 	if (dev->data->dev_conf.intr_conf.lsc) {
-		if (!(pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)) {
+		if (!(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
 			PMD_DRV_LOG(ERR, "link status not supported by host");
 			return -ENOTSUP;
 		}
@@ -1313,16 +1417,22 @@ virtio_dev_start(struct rte_eth_dev *dev)
 
 	PMD_INIT_LOG(DEBUG, "nb_queues=%d", nb_queues);
 
-	for (i = 0; i < nb_queues; i++)
-		virtqueue_notify(dev->data->rx_queues[i]);
+	for (i = 0; i < nb_queues; i++) {
+		rxvq = dev->data->rx_queues[i];
+		virtqueue_notify(rxvq->vq);
+	}
 
 	PMD_INIT_LOG(DEBUG, "Notified backend at initialization");
 
-	for (i = 0; i < dev->data->nb_rx_queues; i++)
-		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]);
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		rxvq = dev->data->rx_queues[i];
+		VIRTQUEUE_DUMP(rxvq->vq);
+	}
 
-	for (i = 0; i < dev->data->nb_tx_queues; i++)
-		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]);
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		txvq = dev->data->tx_queues[i];
+		VIRTQUEUE_DUMP(txvq->vq);
+	}
 
 	return 0;
 }
@@ -1333,14 +1443,14 @@ static void virtio_dev_free_mbufs(struct rte_eth_dev *dev)
 	int i, mbuf_num = 0;
 
 	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		struct virtnet_rx *rxvq = dev->data->rx_queues[i];
+
 		PMD_INIT_LOG(DEBUG,
 			     "Before freeing rxq[%d] used and unused buf", i);
-		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]);
+		VIRTQUEUE_DUMP(rxvq->vq);
 
-		PMD_INIT_LOG(DEBUG, "rx_queues[%d]=%p",
-				i, dev->data->rx_queues[i]);
-		while ((buf = (struct rte_mbuf *)virtqueue_detatch_unused(
-					dev->data->rx_queues[i])) != NULL) {
+		PMD_INIT_LOG(DEBUG, "rx_queues[%d]=%p", i, rxvq);
+		while ((buf = virtqueue_detatch_unused(rxvq->vq)) != NULL) {
 			rte_pktmbuf_free(buf);
 			mbuf_num++;
 		}
@@ -1348,27 +1458,27 @@ static void virtio_dev_free_mbufs(struct rte_eth_dev *dev)
 		PMD_INIT_LOG(DEBUG, "free %d mbufs", mbuf_num);
 		PMD_INIT_LOG(DEBUG,
 			     "After freeing rxq[%d] used and unused buf", i);
-		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]);
+		VIRTQUEUE_DUMP(rxvq->vq);
 	}
 
 	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		struct virtnet_tx *txvq = dev->data->tx_queues[i];
+
 		PMD_INIT_LOG(DEBUG,
 			     "Before freeing txq[%d] used and unused bufs",
 			     i);
-		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]);
+		VIRTQUEUE_DUMP(txvq->vq);
 
 		mbuf_num = 0;
-		while ((buf = (struct rte_mbuf *)virtqueue_detatch_unused(
-					dev->data->tx_queues[i])) != NULL) {
+		while ((buf = virtqueue_detatch_unused(txvq->vq)) != NULL) {
 			rte_pktmbuf_free(buf);
-
 			mbuf_num++;
 		}
 
 		PMD_INIT_LOG(DEBUG, "free %d mbufs", mbuf_num);
 		PMD_INIT_LOG(DEBUG,
 			     "After freeing txq[%d] used and unused buf", i);
-		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]);
+		VIRTQUEUE_DUMP(txvq->vq);
 	}
 }
 
@@ -1431,7 +1541,10 @@ virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 {
 	struct virtio_hw *hw = dev->data->dev_private;
 
-	dev_info->driver_name = dev->driver->pci_drv.name;
+	if (dev->pci_dev)
+		dev_info->driver_name = dev->driver->pci_drv.name;
+	else
+		dev_info->driver_name = "virtio-user PMD";
 	dev_info->max_rx_queues = (uint16_t)hw->max_rx_queues;
 	dev_info->max_tx_queues = (uint16_t)hw->max_tx_queues;
 	dev_info->min_rx_bufsize = VIRTIO_MIN_RX_BUFSIZE;
diff --git a/drivers/net/virtio/virtio_ethdev.h b/drivers/net/virtio/virtio_ethdev.h
index 66423a07..2ecec6eb 100644
--- a/drivers/net/virtio/virtio_ethdev.h
+++ b/drivers/net/virtio/virtio_ethdev.h
@@ -81,7 +81,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
 			uint16_t vtpci_queue_idx,
 			uint16_t nb_desc,
 			unsigned int socket_id,
-			struct virtqueue **pvq);
+			void **pvq);
 
 void virtio_dev_queue_release(struct virtqueue *vq);
 
@@ -113,6 +113,8 @@ uint16_t virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 uint16_t virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
 		uint16_t nb_pkts);
 
+int eth_virtio_dev_init(struct rte_eth_dev *eth_dev);
+
 /*
  * The VIRTIO_NET_F_GUEST_TSO[46] features permit the host to send us
  * frames larger than 1514 bytes. We do not yet support software LRO
diff --git a/drivers/net/virtio/virtio_logs.h b/drivers/net/virtio/virtio_logs.h
index d6c33f7b..90a79eaa 100644
--- a/drivers/net/virtio/virtio_logs.h
+++ b/drivers/net/virtio/virtio_logs.h
@@ -47,14 +47,14 @@
 
 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_RX
 #define PMD_RX_LOG(level, fmt, args...) \
-	RTE_LOG(level, PMD, "%s() rx: " fmt , __func__, ## args)
+	RTE_LOG(level, PMD, "%s() rx: " fmt "\n", __func__, ## args)
 #else
 #define PMD_RX_LOG(level, fmt, args...) do { } while(0)
 #endif
 
 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_TX
 #define PMD_TX_LOG(level, fmt, args...) \
-	RTE_LOG(level, PMD, "%s() tx: " fmt , __func__, ## args)
+	RTE_LOG(level, PMD, "%s() tx: " fmt "\n", __func__, ## args)
 #else
 #define PMD_TX_LOG(level, fmt, args...) do { } while(0)
 #endif
@@ -62,7 +62,7 @@
 
 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DRIVER
 #define PMD_DRV_LOG(level, fmt, args...) \
-	RTE_LOG(level, PMD, "%s(): " fmt , __func__, ## args)
+	RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args)
 #else
 #define PMD_DRV_LOG(level, fmt, args...) do { } while(0)
 #endif
diff --git a/drivers/net/virtio/virtio_pci.c b/drivers/net/virtio/virtio_pci.c
index c007959f..f1a7ca7e 100644
--- a/drivers/net/virtio/virtio_pci.c
+++ b/drivers/net/virtio/virtio_pci.c
@@ -55,20 +55,103 @@
  */
 #define VIRTIO_PCI_CONFIG(hw) (((hw)->use_msix) ? 24 : 20)
 
+static inline int
+check_vq_phys_addr_ok(struct virtqueue *vq)
+{
+	/* Virtio PCI device VIRTIO_PCI_QUEUE_PF register is 32bit,
+	 * and only accepts 32 bit page frame number.
+	 * Check if the allocated physical memory exceeds 16TB.
+	 */
+	if ((vq->vq_ring_mem + vq->vq_ring_size - 1) >>
+			(VIRTIO_PCI_QUEUE_ADDR_SHIFT + 32)) {
+		PMD_INIT_LOG(ERR, "vring address shouldn't be above 16TB!");
+		return 0;
+	}
+
+	return 1;
+}
+
+/*
+ * Since we are in legacy mode:
+ * http://ozlabs.org/~rusty/virtio-spec/virtio-0.9.5.pdf
+ *
+ * "Note that this is possible because while the virtio header is PCI (i.e.
+ * little) endian, the device-specific region is encoded in the native endian of
+ * the guest (where such distinction is applicable)."
+ *
+ * For powerpc which supports both, qemu supposes that cpu is big endian and
+ * enforces this for the virtio-net stuff.
+ */
 static void
 legacy_read_dev_config(struct virtio_hw *hw, size_t offset,
 		       void *dst, int length)
 {
+#ifdef RTE_ARCH_PPC_64
+	int size;
+
+	while (length > 0) {
+		if (length >= 4) {
+			size = 4;
+			rte_eal_pci_ioport_read(&hw->io, dst, size,
+				VIRTIO_PCI_CONFIG(hw) + offset);
+			*(uint32_t *)dst = rte_be_to_cpu_32(*(uint32_t *)dst);
+		} else if (length >= 2) {
+			size = 2;
+			rte_eal_pci_ioport_read(&hw->io, dst, size,
+				VIRTIO_PCI_CONFIG(hw) + offset);
+			*(uint16_t *)dst = rte_be_to_cpu_16(*(uint16_t *)dst);
+		} else {
+			size = 1;
+			rte_eal_pci_ioport_read(&hw->io, dst, size,
+				VIRTIO_PCI_CONFIG(hw) + offset);
+		}
+
+		dst = (char *)dst + size;
+		offset += size;
+		length -= size;
+	}
+#else
 	rte_eal_pci_ioport_read(&hw->io, dst, length,
 				VIRTIO_PCI_CONFIG(hw) + offset);
+#endif
 }
 
 static void
 legacy_write_dev_config(struct virtio_hw *hw, size_t offset,
 			const void *src, int length)
 {
+#ifdef RTE_ARCH_PPC_64
+	union {
+		uint32_t u32;
+		uint16_t u16;
+	} tmp;
+	int size;
+
+	while (length > 0) {
+		if (length >= 4) {
+			size = 4;
+			tmp.u32 = rte_cpu_to_be_32(*(const uint32_t *)src);
+			rte_eal_pci_ioport_write(&hw->io, &tmp.u32, size,
+				VIRTIO_PCI_CONFIG(hw) + offset);
+		} else if (length >= 2) {
+			size = 2;
+			tmp.u16 = rte_cpu_to_be_16(*(const uint16_t *)src);
+			rte_eal_pci_ioport_write(&hw->io, &tmp.u16, size,
+				VIRTIO_PCI_CONFIG(hw) + offset);
+		} else {
+			size = 1;
+			rte_eal_pci_ioport_write(&hw->io, src, size,
+				VIRTIO_PCI_CONFIG(hw) + offset);
+		}
+
+		src = (const char *)src + size;
+		offset += size;
+		length -= size;
+	}
+#else
 	rte_eal_pci_ioport_write(&hw->io, src, length,
 				 VIRTIO_PCI_CONFIG(hw) + offset);
+#endif
 }
 
 static uint64_t
@@ -143,15 +226,20 @@ legacy_get_queue_num(struct virtio_hw *hw, uint16_t queue_id)
 	return dst;
 }
 
-static void
+static int
 legacy_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
 {
 	uint32_t src;
 
+	if (!check_vq_phys_addr_ok(vq))
+		return -1;
+
 	rte_eal_pci_ioport_write(&hw->io, &vq->vq_queue_index, 2,
 			 VIRTIO_PCI_QUEUE_SEL);
-	src = vq->mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT;
+	src = vq->vq_ring_mem >> VIRTIO_PCI_QUEUE_ADDR_SHIFT;
 	rte_eal_pci_ioport_write(&hw->io, &src, 4, VIRTIO_PCI_QUEUE_PFN);
+
+	return 0;
 }
 
 static void
@@ -179,7 +267,7 @@ legacy_virtio_has_msix(const struct rte_pci_addr *loc)
 	char dirname[PATH_MAX];
 
 	snprintf(dirname, sizeof(dirname),
-		     SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/msi_irqs",
+		     "%s/" PCI_PRI_FMT "/msi_irqs", pci_get_sysfs_path(),
 		     loc->domain, loc->bus, loc->devid, loc->function);
 
 	d = opendir(dirname);
@@ -199,15 +287,15 @@ legacy_virtio_has_msix(const struct rte_pci_addr *loc __rte_unused)
 
 static int
 legacy_virtio_resource_init(struct rte_pci_device *pci_dev,
-			    struct virtio_hw *hw)
+			    struct virtio_hw *hw, uint32_t *dev_flags)
 {
 	if (rte_eal_pci_ioport_map(pci_dev, 0, &hw->io) < 0)
 		return -1;
 
 	if (pci_dev->intr_handle.type != RTE_INTR_HANDLE_UNKNOWN)
-		pci_dev->driver->drv_flags |= RTE_PCI_DRV_INTR_LSC;
+		*dev_flags |= RTE_ETH_DEV_INTR_LSC;
 	else
-		pci_dev->driver->drv_flags &= ~RTE_PCI_DRV_INTR_LSC;
+		*dev_flags &= ~RTE_ETH_DEV_INTR_LSC;
 
 	return 0;
 }
@@ -367,13 +455,16 @@ modern_get_queue_num(struct virtio_hw *hw, uint16_t queue_id)
 	return io_read16(&hw->common_cfg->queue_size);
 }
 
-static void
+static int
 modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
 {
 	uint64_t desc_addr, avail_addr, used_addr;
 	uint16_t notify_off;
 
-	desc_addr = vq->mz->phys_addr;
+	if (!check_vq_phys_addr_ok(vq))
+		return -1;
+
+	desc_addr = vq->vq_ring_mem;
 	avail_addr = desc_addr + vq->vq_nentries * sizeof(struct vring_desc);
 	used_addr = RTE_ALIGN_CEIL(avail_addr + offsetof(struct vring_avail,
 							 ring[vq->vq_nentries]),
@@ -400,6 +491,8 @@ modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
 	PMD_INIT_LOG(DEBUG, "\t used_addr: %" PRIx64, used_addr);
 	PMD_INIT_LOG(DEBUG, "\t notify addr: %p (notify offset: %u)",
 		vq->notify_addr, notify_off);
+
+	return 0;
 }
 
 static void
@@ -626,11 +719,13 @@ next:
  * Return -1:
  *   if there is error mapping with VFIO/UIO.
  *   if port map error when driver type is KDRV_NONE.
+ *   if whitelisted but driver type is KDRV_UNKNOWN.
  * Return 1 if kernel driver is managing the device.
  * Return 0 on success.
  */
 int
-vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw)
+vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw,
+	   uint32_t *dev_flags)
 {
 	hw->dev = dev;
 
@@ -643,14 +738,15 @@ vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw)
 		PMD_INIT_LOG(INFO, "modern virtio pci detected.");
 		hw->vtpci_ops = &modern_ops;
 		hw->modern    = 1;
-		dev->driver->drv_flags |= RTE_PCI_DRV_INTR_LSC;
+		*dev_flags |= RTE_ETH_DEV_INTR_LSC;
 		return 0;
 	}
 
 	PMD_INIT_LOG(INFO, "trying with legacy virtio pci.");
-	if (legacy_virtio_resource_init(dev, hw) < 0) {
+	if (legacy_virtio_resource_init(dev, hw, dev_flags) < 0) {
 		if (dev->kdrv == RTE_KDRV_UNKNOWN &&
-		    dev->devargs->type != RTE_DEVTYPE_WHITELISTED_PCI) {
+		    (!dev->devargs ||
+		     dev->devargs->type != RTE_DEVTYPE_WHITELISTED_PCI)) {
 			PMD_INIT_LOG(INFO,
 				"skip kernel managed virtio device.");
 			return 1;
diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
index b69785ea..dd7693fe 100644
--- a/drivers/net/virtio/virtio_pci.h
+++ b/drivers/net/virtio/virtio_pci.h
@@ -40,6 +40,7 @@
 #include <rte_ethdev.h>
 
 struct virtqueue;
+struct virtnet_ctl;
 
 /* VirtIO PCI vendor/device ID. */
 #define VIRTIO_PCI_VENDORID     0x1AF4
@@ -234,7 +235,7 @@ struct virtio_pci_ops {
 	uint16_t (*set_config_irq)(struct virtio_hw *hw, uint16_t vec);
 
 	uint16_t (*get_queue_num)(struct virtio_hw *hw, uint16_t queue_id);
-	void (*setup_queue)(struct virtio_hw *hw, struct virtqueue *vq);
+	int (*setup_queue)(struct virtio_hw *hw, struct virtqueue *vq);
 	void (*del_queue)(struct virtio_hw *hw, struct virtqueue *vq);
 	void (*notify_queue)(struct virtio_hw *hw, struct virtqueue *vq);
 };
@@ -242,7 +243,7 @@ struct virtio_pci_ops {
 struct virtio_net_config;
 
 struct virtio_hw {
-	struct virtqueue *cvq;
+	struct virtnet_ctl *cvq;
 	struct rte_pci_ioport io;
 	uint64_t    guest_features;
 	uint32_t    max_tx_queues;
@@ -260,6 +261,7 @@ struct virtio_hw {
 	struct virtio_pci_common_cfg *common_cfg;
 	struct virtio_net_config *dev_cfg;
 	const struct virtio_pci_ops *vtpci_ops;
+	void	    *virtio_user_dev;
 };
 
 /*
@@ -293,7 +295,8 @@ vtpci_with_feature(struct virtio_hw *hw, uint64_t bit)
 /*
  * Function declaration from virtio_pci.c
  */
-int vtpci_init(struct rte_pci_device *, struct virtio_hw *);
+int vtpci_init(struct rte_pci_device *, struct virtio_hw *,
+	       uint32_t *dev_flags);
 void vtpci_reset(struct virtio_hw *);
 
 void vtpci_reinit_complete(struct virtio_hw *);
diff --git a/drivers/net/virtio/virtio_ring.h b/drivers/net/virtio/virtio_ring.h
index 447760a8..fcecc161 100644
--- a/drivers/net/virtio/virtio_ring.h
+++ b/drivers/net/virtio/virtio_ring.h
@@ -79,7 +79,7 @@ struct vring_used_elem {
 
 struct vring_used {
 	uint16_t flags;
-	uint16_t idx;
+	volatile uint16_t idx;
 	struct vring_used_elem ring[0];
 };
 
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index ef21d8e3..a27208e3 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -193,8 +193,7 @@ virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie)
 
 	start_dp = vq->vq_ring.desc;
 	start_dp[idx].addr =
-		(uint64_t)(cookie->buf_physaddr + RTE_PKTMBUF_HEADROOM
-		- hw->vtnet_hdr_size);
+		MBUF_DATA_DMA_ADDR(cookie, vq->offset) - hw->vtnet_hdr_size;
 	start_dp[idx].len =
 		cookie->buf_len - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size;
 	start_dp[idx].flags =  VRING_DESC_F_WRITE;
@@ -209,23 +208,24 @@ virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie)
 }
 
 static inline void
-virtqueue_enqueue_xmit(struct virtqueue *txvq, struct rte_mbuf *cookie,
+virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
 		       uint16_t needed, int use_indirect, int can_push)
 {
 	struct vq_desc_extra *dxp;
+	struct virtqueue *vq = txvq->vq;
 	struct vring_desc *start_dp;
 	uint16_t seg_num = cookie->nb_segs;
 	uint16_t head_idx, idx;
-	uint16_t head_size = txvq->hw->vtnet_hdr_size;
+	uint16_t head_size = vq->hw->vtnet_hdr_size;
 	unsigned long offs;
 
-	head_idx = txvq->vq_desc_head_idx;
+	head_idx = vq->vq_desc_head_idx;
 	idx = head_idx;
-	dxp = &txvq->vq_descx[idx];
+	dxp = &vq->vq_descx[idx];
 	dxp->cookie = (void *)cookie;
 	dxp->ndescs = needed;
 
-	start_dp = txvq->vq_ring.desc;
+	start_dp = vq->vq_ring.desc;
 
 	if (can_push) {
 		/* put on zero'd transmit header (no offloads) */
@@ -259,46 +259,32 @@ virtqueue_enqueue_xmit(struct virtqueue *txvq, struct rte_mbuf *cookie,
 			+ offsetof(struct virtio_tx_region, tx_hdr);
 
 		start_dp[idx].addr  = txvq->virtio_net_hdr_mem + offs;
-		start_dp[idx].len   = txvq->hw->vtnet_hdr_size;
+		start_dp[idx].len   = vq->hw->vtnet_hdr_size;
 		start_dp[idx].flags = VRING_DESC_F_NEXT;
 		idx = start_dp[idx].next;
 	}
 
 	do {
-		start_dp[idx].addr  = rte_mbuf_data_dma_addr(cookie);
+		start_dp[idx].addr  = MBUF_DATA_DMA_ADDR(cookie, vq->offset);
 		start_dp[idx].len   = cookie->data_len;
 		start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0;
 		idx = start_dp[idx].next;
 	} while ((cookie = cookie->next) != NULL);
 
-	start_dp[idx].flags &= ~VRING_DESC_F_NEXT;
-
 	if (use_indirect)
-		idx = txvq->vq_ring.desc[head_idx].next;
-
-	txvq->vq_desc_head_idx = idx;
-	if (txvq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
-		txvq->vq_desc_tail_idx = idx;
-	txvq->vq_free_cnt = (uint16_t)(txvq->vq_free_cnt - needed);
-	vq_update_avail_ring(txvq, head_idx);
-}
+		idx = vq->vq_ring.desc[head_idx].next;
 
-static inline struct rte_mbuf *
-rte_rxmbuf_alloc(struct rte_mempool *mp)
-{
-	struct rte_mbuf *m;
-
-	m = __rte_mbuf_raw_alloc(mp);
-	__rte_mbuf_sanity_check_raw(m, 0);
-
-	return m;
+	vq->vq_desc_head_idx = idx;
+	if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
+		vq->vq_desc_tail_idx = idx;
+	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
+	vq_update_avail_ring(vq, head_idx);
 }
 
 static void
-virtio_dev_vring_start(struct virtqueue *vq, int queue_type)
+virtio_dev_vring_start(struct virtqueue *vq)
 {
-	struct rte_mbuf *m;
-	int i, nbufs, error, size = vq->vq_nentries;
+	int size = vq->vq_nentries;
 	struct vring *vr = &vq->vq_ring;
 	uint8_t *ring_mem = vq->vq_ring_virt_mem;
 
@@ -322,30 +308,70 @@ virtio_dev_vring_start(struct virtqueue *vq, int queue_type)
 	 * Disable device(host) interrupting guest
 	 */
 	virtqueue_disable_intr(vq);
+}
+
+void
+virtio_dev_cq_start(struct rte_eth_dev *dev)
+{
+	struct virtio_hw *hw = dev->data->dev_private;
+
+	if (hw->cvq && hw->cvq->vq) {
+		virtio_dev_vring_start(hw->cvq->vq);
+		VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq);
+	}
+}
 
-	/* Only rx virtqueue needs mbufs to be allocated at initialization */
-	if (queue_type == VTNET_RQ) {
-		if (vq->mpool == NULL)
+void
+virtio_dev_rxtx_start(struct rte_eth_dev *dev)
+{
+	/*
+	 * Start receive and transmit vrings
+	 * -	Setup vring structure for all queues
+	 * -	Initialize descriptor for the rx vring
+	 * -	Allocate blank mbufs for the each rx descriptor
+	 *
+	 */
+	uint16_t i;
+	uint16_t desc_idx;
+
+	PMD_INIT_FUNC_TRACE();
+
+	/* Start rx vring. */
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		struct virtnet_rx *rxvq = dev->data->rx_queues[i];
+		struct virtqueue *vq = rxvq->vq;
+		int error, nbufs;
+		struct rte_mbuf *m;
+
+		virtio_dev_vring_start(vq);
+		if (rxvq->mpool == NULL) {
 			rte_exit(EXIT_FAILURE,
-			"Cannot allocate initial mbufs for rx virtqueue");
+				"Cannot allocate mbufs for rx virtqueue");
+		}
 
 		/* Allocate blank mbufs for the each rx descriptor */
 		nbufs = 0;
 		error = ENOSPC;
 
 #ifdef RTE_MACHINE_CPUFLAG_SSSE3
-		if (use_simple_rxtx)
-			for (i = 0; i < vq->vq_nentries; i++) {
-				vq->vq_ring.avail->ring[i] = i;
-				vq->vq_ring.desc[i].flags = VRING_DESC_F_WRITE;
+		if (use_simple_rxtx) {
+			for (desc_idx = 0; desc_idx < vq->vq_nentries;
+			     desc_idx++) {
+				vq->vq_ring.avail->ring[desc_idx] = desc_idx;
+				vq->vq_ring.desc[desc_idx].flags =
+					VRING_DESC_F_WRITE;
 			}
+		}
 #endif
-		memset(&vq->fake_mbuf, 0, sizeof(vq->fake_mbuf));
-		for (i = 0; i < RTE_PMD_VIRTIO_RX_MAX_BURST; i++)
-			vq->sw_ring[vq->vq_nentries + i] = &vq->fake_mbuf;
+		memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf));
+		for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST;
+		     desc_idx++) {
+			vq->sw_ring[vq->vq_nentries + desc_idx] =
+				&rxvq->fake_mbuf;
+		}
 
 		while (!virtqueue_full(vq)) {
-			m = rte_rxmbuf_alloc(vq->mpool);
+			m = rte_mbuf_raw_alloc(rxvq->mpool);
 			if (m == NULL)
 				break;
 
@@ -368,64 +394,40 @@ virtio_dev_vring_start(struct virtqueue *vq, int queue_type)
 		vq_update_avail_idx(vq);
 
 		PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs);
-	} else if (queue_type == VTNET_TQ) {
+
+		VIRTQUEUE_DUMP(vq);
+	}
+
+	/* Start tx vring. */
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		struct virtnet_tx *txvq = dev->data->tx_queues[i];
+		struct virtqueue *vq = txvq->vq;
+
+		virtio_dev_vring_start(vq);
 #ifdef RTE_MACHINE_CPUFLAG_SSSE3
 		if (use_simple_rxtx) {
-			int mid_idx  = vq->vq_nentries >> 1;
-			for (i = 0; i < mid_idx; i++) {
-				vq->vq_ring.avail->ring[i] = i + mid_idx;
-				vq->vq_ring.desc[i + mid_idx].next = i;
-				vq->vq_ring.desc[i + mid_idx].addr =
-					vq->virtio_net_hdr_mem +
+			uint16_t mid_idx  = vq->vq_nentries >> 1;
+
+			for (desc_idx = 0; desc_idx < mid_idx; desc_idx++) {
+				vq->vq_ring.avail->ring[desc_idx] =
+					desc_idx + mid_idx;
+				vq->vq_ring.desc[desc_idx + mid_idx].next =
+					desc_idx;
+				vq->vq_ring.desc[desc_idx + mid_idx].addr =
+					txvq->virtio_net_hdr_mem +
 					offsetof(struct virtio_tx_region, tx_hdr);
-				vq->vq_ring.desc[i + mid_idx].len =
+				vq->vq_ring.desc[desc_idx + mid_idx].len =
 					vq->hw->vtnet_hdr_size;
-				vq->vq_ring.desc[i + mid_idx].flags =
+				vq->vq_ring.desc[desc_idx + mid_idx].flags =
 					VRING_DESC_F_NEXT;
-				vq->vq_ring.desc[i].flags = 0;
+				vq->vq_ring.desc[desc_idx].flags = 0;
 			}
-			for (i = mid_idx; i < vq->vq_nentries; i++)
-				vq->vq_ring.avail->ring[i] = i;
+			for (desc_idx = mid_idx; desc_idx < vq->vq_nentries;
+			     desc_idx++)
+				vq->vq_ring.avail->ring[desc_idx] = desc_idx;
 		}
 #endif
-	}
-}
-
-void
-virtio_dev_cq_start(struct rte_eth_dev *dev)
-{
-	struct virtio_hw *hw = dev->data->dev_private;
-
-	if (hw->cvq) {
-		virtio_dev_vring_start(hw->cvq, VTNET_CQ);
-		VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq);
-	}
-}
-
-void
-virtio_dev_rxtx_start(struct rte_eth_dev *dev)
-{
-	/*
-	 * Start receive and transmit vrings
-	 * -	Setup vring structure for all queues
-	 * -	Initialize descriptor for the rx vring
-	 * -	Allocate blank mbufs for the each rx descriptor
-	 *
-	 */
-	int i;
-
-	PMD_INIT_FUNC_TRACE();
-
-	/* Start rx vring. */
-	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		virtio_dev_vring_start(dev->data->rx_queues[i], VTNET_RQ);
-		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]);
-	}
-
-	/* Start tx vring. */
-	for (i = 0; i < dev->data->nb_tx_queues; i++) {
-		virtio_dev_vring_start(dev->data->tx_queues[i], VTNET_TQ);
-		VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]);
+		VIRTQUEUE_DUMP(vq);
 	}
 }
 
@@ -438,24 +440,24 @@ virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
 			struct rte_mempool *mp)
 {
 	uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
-	struct virtqueue *vq;
+	struct virtnet_rx *rxvq;
 	int ret;
 
 	PMD_INIT_FUNC_TRACE();
 	ret = virtio_dev_queue_setup(dev, VTNET_RQ, queue_idx, vtpci_queue_idx,
-			nb_desc, socket_id, &vq);
+			nb_desc, socket_id, (void **)&rxvq);
 	if (ret < 0) {
 		PMD_INIT_LOG(ERR, "rvq initialization failed");
 		return ret;
 	}
 
 	/* Create mempool for rx mbuf allocation */
-	vq->mpool = mp;
+	rxvq->mpool = mp;
 
-	dev->data->rx_queues[queue_idx] = vq;
+	dev->data->rx_queues[queue_idx] = rxvq;
 
 #ifdef RTE_MACHINE_CPUFLAG_SSSE3
-	virtio_rxq_vec_setup(vq);
+	virtio_rxq_vec_setup(rxvq);
 #endif
 
 	return 0;
@@ -464,7 +466,16 @@ virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
 void
 virtio_dev_rx_queue_release(void *rxq)
 {
-	virtio_dev_queue_release(rxq);
+	struct virtnet_rx *rxvq = rxq;
+	struct virtqueue *vq = rxvq->vq;
+	/* rxvq is freed when vq is freed, and as mz should be freed after the
+	 * del_queue, so we reserve the mz pointer first.
+	 */
+	const struct rte_memzone *mz = rxvq->mz;
+
+	/* no need to free rxq as vq and rxq are allocated together */
+	virtio_dev_queue_release(vq);
+	rte_memzone_free(mz);
 }
 
 /*
@@ -486,6 +497,7 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
 #ifdef RTE_MACHINE_CPUFLAG_SSSE3
 	struct virtio_hw *hw = dev->data->dev_private;
 #endif
+	struct virtnet_tx *txvq;
 	struct virtqueue *vq;
 	uint16_t tx_free_thresh;
 	int ret;
@@ -510,11 +522,12 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
 #endif
 
 	ret = virtio_dev_queue_setup(dev, VTNET_TQ, queue_idx, vtpci_queue_idx,
-			nb_desc, socket_id, &vq);
+			nb_desc, socket_id, (void **)&txvq);
 	if (ret < 0) {
-		PMD_INIT_LOG(ERR, "rvq initialization failed");
+		PMD_INIT_LOG(ERR, "tvq initialization failed");
 		return ret;
 	}
+	vq = txvq->vq;
 
 	tx_free_thresh = tx_conf->tx_free_thresh;
 	if (tx_free_thresh == 0)
@@ -532,14 +545,24 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
 
 	vq->vq_free_thresh = tx_free_thresh;
 
-	dev->data->tx_queues[queue_idx] = vq;
+	dev->data->tx_queues[queue_idx] = txvq;
 	return 0;
 }
 
 void
 virtio_dev_tx_queue_release(void *txq)
 {
-	virtio_dev_queue_release(txq);
+	struct virtnet_tx *txvq = txq;
+	struct virtqueue *vq = txvq->vq;
+	/* txvq is freed when vq is freed, and as mz should be freed after the
+	 * del_queue, so we reserve the mz pointer first.
+	 */
+	const struct rte_memzone *hdr_mz = txvq->virtio_net_hdr_mz;
+	const struct rte_memzone *mz = txvq->mz;
+
+	virtio_dev_queue_release(vq);
+	rte_memzone_free(mz);
+	rte_memzone_free(hdr_mz);
 }
 
 static void
@@ -558,34 +581,34 @@ virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m)
 }
 
 static void
-virtio_update_packet_stats(struct virtqueue *vq, struct rte_mbuf *mbuf)
+virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf)
 {
 	uint32_t s = mbuf->pkt_len;
 	struct ether_addr *ea;
 
 	if (s == 64) {
-		vq->size_bins[1]++;
+		stats->size_bins[1]++;
 	} else if (s > 64 && s < 1024) {
 		uint32_t bin;
 
 		/* count zeros, and offset into correct bin */
 		bin = (sizeof(s) * 8) - __builtin_clz(s) - 5;
-		vq->size_bins[bin]++;
+		stats->size_bins[bin]++;
 	} else {
 		if (s < 64)
-			vq->size_bins[0]++;
+			stats->size_bins[0]++;
 		else if (s < 1519)
-			vq->size_bins[6]++;
+			stats->size_bins[6]++;
 		else if (s >= 1519)
-			vq->size_bins[7]++;
+			stats->size_bins[7]++;
 	}
 
 	ea = rte_pktmbuf_mtod(mbuf, struct ether_addr *);
 	if (is_multicast_ether_addr(ea)) {
 		if (is_broadcast_ether_addr(ea))
-			vq->broadcast++;
+			stats->broadcast++;
 		else
-			vq->multicast++;
+			stats->multicast++;
 	}
 }
 
@@ -594,7 +617,8 @@ virtio_update_packet_stats(struct virtqueue *vq, struct rte_mbuf *mbuf)
 uint16_t
 virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 {
-	struct virtqueue *rxvq = rx_queue;
+	struct virtnet_rx *rxvq = rx_queue;
+	struct virtqueue *vq = rxvq->vq;
 	struct virtio_hw *hw;
 	struct rte_mbuf *rxm, *new_mbuf;
 	uint16_t nb_used, num, nb_rx;
@@ -604,19 +628,19 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 	uint32_t i, nb_enqueued;
 	uint32_t hdr_size;
 
-	nb_used = VIRTQUEUE_NUSED(rxvq);
+	nb_used = VIRTQUEUE_NUSED(vq);
 
 	virtio_rmb();
 
 	num = (uint16_t)(likely(nb_used <= nb_pkts) ? nb_used : nb_pkts);
 	num = (uint16_t)(likely(num <= VIRTIO_MBUF_BURST_SZ) ? num : VIRTIO_MBUF_BURST_SZ);
 	if (likely(num > DESC_PER_CACHELINE))
-		num = num - ((rxvq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
+		num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
 
-	num = virtqueue_dequeue_burst_rx(rxvq, rcv_pkts, len, num);
+	num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
 	PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num);
 
-	hw = rxvq->hw;
+	hw = vq->hw;
 	nb_rx = 0;
 	nb_enqueued = 0;
 	hdr_size = hw->vtnet_hdr_size;
@@ -629,8 +653,8 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 		if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
 			PMD_RX_LOG(ERR, "Packet drop");
 			nb_enqueued++;
-			virtio_discard_rxbuf(rxvq, rxm);
-			rxvq->errors++;
+			virtio_discard_rxbuf(vq, rxm);
+			rxvq->stats.errors++;
 			continue;
 		}
 
@@ -651,23 +675,23 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 
 		rx_pkts[nb_rx++] = rxm;
 
-		rxvq->bytes += rx_pkts[nb_rx - 1]->pkt_len;
-		virtio_update_packet_stats(rxvq, rxm);
+		rxvq->stats.bytes += rx_pkts[nb_rx - 1]->pkt_len;
+		virtio_update_packet_stats(&rxvq->stats, rxm);
 	}
 
-	rxvq->packets += nb_rx;
+	rxvq->stats.packets += nb_rx;
 
 	/* Allocate new mbuf for the used descriptor */
 	error = ENOSPC;
-	while (likely(!virtqueue_full(rxvq))) {
-		new_mbuf = rte_rxmbuf_alloc(rxvq->mpool);
+	while (likely(!virtqueue_full(vq))) {
+		new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
 		if (unlikely(new_mbuf == NULL)) {
 			struct rte_eth_dev *dev
 				= &rte_eth_devices[rxvq->port_id];
 			dev->data->rx_mbuf_alloc_failed++;
 			break;
 		}
-		error = virtqueue_enqueue_recv_refill(rxvq, new_mbuf);
+		error = virtqueue_enqueue_recv_refill(vq, new_mbuf);
 		if (unlikely(error)) {
 			rte_pktmbuf_free(new_mbuf);
 			break;
@@ -676,11 +700,11 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 	}
 
 	if (likely(nb_enqueued)) {
-		vq_update_avail_idx(rxvq);
+		vq_update_avail_idx(vq);
 
-		if (unlikely(virtqueue_kick_prepare(rxvq))) {
-			virtqueue_notify(rxvq);
-			PMD_RX_LOG(DEBUG, "Notified\n");
+		if (unlikely(virtqueue_kick_prepare(vq))) {
+			virtqueue_notify(vq);
+			PMD_RX_LOG(DEBUG, "Notified");
 		}
 	}
 
@@ -692,7 +716,8 @@ virtio_recv_mergeable_pkts(void *rx_queue,
 			struct rte_mbuf **rx_pkts,
 			uint16_t nb_pkts)
 {
-	struct virtqueue *rxvq = rx_queue;
+	struct virtnet_rx *rxvq = rx_queue;
+	struct virtqueue *vq = rxvq->vq;
 	struct virtio_hw *hw;
 	struct rte_mbuf *rxm, *new_mbuf;
 	uint16_t nb_used, num, nb_rx;
@@ -706,13 +731,13 @@ virtio_recv_mergeable_pkts(void *rx_queue,
 	uint32_t seg_res;
 	uint32_t hdr_size;
 
-	nb_used = VIRTQUEUE_NUSED(rxvq);
+	nb_used = VIRTQUEUE_NUSED(vq);
 
 	virtio_rmb();
 
-	PMD_RX_LOG(DEBUG, "used:%d\n", nb_used);
+	PMD_RX_LOG(DEBUG, "used:%d", nb_used);
 
-	hw = rxvq->hw;
+	hw = vq->hw;
 	nb_rx = 0;
 	i = 0;
 	nb_enqueued = 0;
@@ -727,22 +752,22 @@ virtio_recv_mergeable_pkts(void *rx_queue,
 		if (nb_rx == nb_pkts)
 			break;
 
-		num = virtqueue_dequeue_burst_rx(rxvq, rcv_pkts, len, 1);
+		num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, 1);
 		if (num != 1)
 			continue;
 
 		i++;
 
-		PMD_RX_LOG(DEBUG, "dequeue:%d\n", num);
-		PMD_RX_LOG(DEBUG, "packet len:%d\n", len[0]);
+		PMD_RX_LOG(DEBUG, "dequeue:%d", num);
+		PMD_RX_LOG(DEBUG, "packet len:%d", len[0]);
 
 		rxm = rcv_pkts[0];
 
 		if (unlikely(len[0] < hdr_size + ETHER_HDR_LEN)) {
-			PMD_RX_LOG(ERR, "Packet drop\n");
+			PMD_RX_LOG(ERR, "Packet drop");
 			nb_enqueued++;
-			virtio_discard_rxbuf(rxvq, rxm);
-			rxvq->errors++;
+			virtio_discard_rxbuf(vq, rxm);
+			rxvq->stats.errors++;
 			continue;
 		}
 
@@ -773,18 +798,18 @@ virtio_recv_mergeable_pkts(void *rx_queue,
 			 */
 			uint16_t  rcv_cnt =
 				RTE_MIN(seg_res, RTE_DIM(rcv_pkts));
-			if (likely(VIRTQUEUE_NUSED(rxvq) >= rcv_cnt)) {
+			if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
 				uint32_t rx_num =
-					virtqueue_dequeue_burst_rx(rxvq,
+					virtqueue_dequeue_burst_rx(vq,
 					rcv_pkts, len, rcv_cnt);
 				i += rx_num;
 				rcv_cnt = rx_num;
 			} else {
 				PMD_RX_LOG(ERR,
-					"No enough segments for packet.\n");
+					   "No enough segments for packet.");
 				nb_enqueued++;
-				virtio_discard_rxbuf(rxvq, rxm);
-				rxvq->errors++;
+				virtio_discard_rxbuf(vq, rxm);
+				rxvq->stats.errors++;
 				break;
 			}
 
@@ -814,24 +839,24 @@ virtio_recv_mergeable_pkts(void *rx_queue,
 		VIRTIO_DUMP_PACKET(rx_pkts[nb_rx],
 			rx_pkts[nb_rx]->data_len);
 
-		rxvq->bytes += rx_pkts[nb_rx]->pkt_len;
-		virtio_update_packet_stats(rxvq, rx_pkts[nb_rx]);
+		rxvq->stats.bytes += rx_pkts[nb_rx]->pkt_len;
+		virtio_update_packet_stats(&rxvq->stats, rx_pkts[nb_rx]);
 		nb_rx++;
 	}
 
-	rxvq->packets += nb_rx;
+	rxvq->stats.packets += nb_rx;
 
 	/* Allocate new mbuf for the used descriptor */
 	error = ENOSPC;
-	while (likely(!virtqueue_full(rxvq))) {
-		new_mbuf = rte_rxmbuf_alloc(rxvq->mpool);
+	while (likely(!virtqueue_full(vq))) {
+		new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
 		if (unlikely(new_mbuf == NULL)) {
 			struct rte_eth_dev *dev
 				= &rte_eth_devices[rxvq->port_id];
 			dev->data->rx_mbuf_alloc_failed++;
 			break;
 		}
-		error = virtqueue_enqueue_recv_refill(rxvq, new_mbuf);
+		error = virtqueue_enqueue_recv_refill(vq, new_mbuf);
 		if (unlikely(error)) {
 			rte_pktmbuf_free(new_mbuf);
 			break;
@@ -840,10 +865,10 @@ virtio_recv_mergeable_pkts(void *rx_queue,
 	}
 
 	if (likely(nb_enqueued)) {
-		vq_update_avail_idx(rxvq);
+		vq_update_avail_idx(vq);
 
-		if (unlikely(virtqueue_kick_prepare(rxvq))) {
-			virtqueue_notify(rxvq);
+		if (unlikely(virtqueue_kick_prepare(vq))) {
+			virtqueue_notify(vq);
 			PMD_RX_LOG(DEBUG, "Notified");
 		}
 	}
@@ -854,8 +879,9 @@ virtio_recv_mergeable_pkts(void *rx_queue,
 uint16_t
 virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 {
-	struct virtqueue *txvq = tx_queue;
-	struct virtio_hw *hw = txvq->hw;
+	struct virtnet_tx *txvq = tx_queue;
+	struct virtqueue *vq = txvq->vq;
+	struct virtio_hw *hw = vq->hw;
 	uint16_t hdr_size = hw->vtnet_hdr_size;
 	uint16_t nb_used, nb_tx;
 	int error;
@@ -864,11 +890,11 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 		return nb_pkts;
 
 	PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
-	nb_used = VIRTQUEUE_NUSED(txvq);
+	nb_used = VIRTQUEUE_NUSED(vq);
 
 	virtio_rmb();
-	if (likely(nb_used > txvq->vq_nentries - txvq->vq_free_thresh))
-		virtio_xmit_cleanup(txvq, nb_used);
+	if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
+		virtio_xmit_cleanup(vq, nb_used);
 
 	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
 		struct rte_mbuf *txm = tx_pkts[nb_tx];
@@ -886,6 +912,7 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 		/* optimize ring usage */
 		if (vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) &&
 		    rte_mbuf_refcnt_read(txm) == 1 &&
+		    RTE_MBUF_DIRECT(txm) &&
 		    txm->nb_segs == 1 &&
 		    rte_pktmbuf_headroom(txm) >= hdr_size &&
 		    rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
@@ -901,16 +928,16 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 		 * default    => number of segments + 1
 		 */
 		slots = use_indirect ? 1 : (txm->nb_segs + !can_push);
-		need = slots - txvq->vq_free_cnt;
+		need = slots - vq->vq_free_cnt;
 
 		/* Positive value indicates it need free vring descriptors */
 		if (unlikely(need > 0)) {
-			nb_used = VIRTQUEUE_NUSED(txvq);
+			nb_used = VIRTQUEUE_NUSED(vq);
 			virtio_rmb();
 			need = RTE_MIN(need, (int)nb_used);
 
-			virtio_xmit_cleanup(txvq, need);
-			need = slots - txvq->vq_free_cnt;
+			virtio_xmit_cleanup(vq, need);
+			need = slots - vq->vq_free_cnt;
 			if (unlikely(need > 0)) {
 				PMD_TX_LOG(ERR,
 					   "No free tx descriptors to transmit");
@@ -921,17 +948,17 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 		/* Enqueue Packet buffers */
 		virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect, can_push);
 
-		txvq->bytes += txm->pkt_len;
-		virtio_update_packet_stats(txvq, txm);
+		txvq->stats.bytes += txm->pkt_len;
+		virtio_update_packet_stats(&txvq->stats, txm);
 	}
 
-	txvq->packets += nb_tx;
+	txvq->stats.packets += nb_tx;
 
 	if (likely(nb_tx)) {
-		vq_update_avail_idx(txvq);
+		vq_update_avail_idx(vq);
 
-		if (unlikely(virtqueue_kick_prepare(txvq))) {
-			virtqueue_notify(txvq);
+		if (unlikely(virtqueue_kick_prepare(vq))) {
+			virtqueue_notify(vq);
 			PMD_TX_LOG(DEBUG, "Notified backend after xmit");
 		}
 	}
diff --git a/drivers/net/virtio/virtio_rxtx.h b/drivers/net/virtio/virtio_rxtx.h
index a76c3e52..058b56a1 100644
--- a/drivers/net/virtio/virtio_rxtx.h
+++ b/drivers/net/virtio/virtio_rxtx.h
@@ -31,11 +31,65 @@
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#ifndef _VIRTIO_RXTX_H_
+#define _VIRTIO_RXTX_H_
+
 #define RTE_PMD_VIRTIO_RX_MAX_BURST 64
 
+struct virtnet_stats {
+	uint64_t	packets;
+	uint64_t	bytes;
+	uint64_t	errors;
+	uint64_t	multicast;
+	uint64_t	broadcast;
+	/* Size bins in array as RFC 2819, undersized [0], 64 [1], etc */
+	uint64_t	size_bins[8];
+};
+
+struct virtnet_rx {
+	struct virtqueue *vq;
+	/* dummy mbuf, for wraparound when processing RX ring. */
+	struct rte_mbuf fake_mbuf;
+	uint64_t mbuf_initializer; /**< value to init mbufs. */
+	struct rte_mempool *mpool; /**< mempool for mbuf allocation */
+
+	uint16_t queue_id;   /**< DPDK queue index. */
+	uint8_t port_id;     /**< Device port identifier. */
+
+	/* Statistics */
+	struct virtnet_stats stats;
+
+	const struct rte_memzone *mz; /**< mem zone to populate RX ring. */
+};
+
+struct virtnet_tx {
+	struct virtqueue *vq;
+	/**< memzone to populate hdr. */
+	const struct rte_memzone *virtio_net_hdr_mz;
+	phys_addr_t virtio_net_hdr_mem;  /**< hdr for each xmit packet */
+
+	uint16_t    queue_id;            /**< DPDK queue index. */
+	uint8_t     port_id;             /**< Device port identifier. */
+
+	/* Statistics */
+	struct virtnet_stats stats;
+
+	const struct rte_memzone *mz;    /**< mem zone to populate TX ring. */
+};
+
+struct virtnet_ctl {
+	struct virtqueue *vq;
+	/**< memzone to populate hdr. */
+	const struct rte_memzone *virtio_net_hdr_mz;
+	phys_addr_t virtio_net_hdr_mem; /**< hdr for each xmit packet */
+	uint8_t port_id;                /**< Device port identifier. */
+	const struct rte_memzone *mz;   /**< mem zone to populate RX ring. */
+};
+
 #ifdef RTE_MACHINE_CPUFLAG_SSSE3
-int virtio_rxq_vec_setup(struct virtqueue *rxq);
+int virtio_rxq_vec_setup(struct virtnet_rx *rxvq);
 
 int virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq,
 	struct rte_mbuf *m);
 #endif
+#endif /* _VIRTIO_RXTX_H_ */
diff --git a/drivers/net/virtio/virtio_rxtx_simple.c b/drivers/net/virtio/virtio_rxtx_simple.c
index 8f5293dd..242ad90d 100644
--- a/drivers/net/virtio/virtio_rxtx_simple.c
+++ b/drivers/net/virtio/virtio_rxtx_simple.c
@@ -80,8 +80,8 @@ virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq,
 	vq->sw_ring[desc_idx] = cookie;
 
 	start_dp = vq->vq_ring.desc;
-	start_dp[desc_idx].addr = (uint64_t)((uintptr_t)cookie->buf_physaddr +
-		RTE_PKTMBUF_HEADROOM - vq->hw->vtnet_hdr_size);
+	start_dp[desc_idx].addr = MBUF_DATA_DMA_ADDR(cookie, vq->offset) -
+				  vq->hw->vtnet_hdr_size;
 	start_dp[desc_idx].len = cookie->buf_len -
 		RTE_PKTMBUF_HEADROOM + vq->hw->vtnet_hdr_size;
 
@@ -92,17 +92,18 @@ virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq,
 }
 
 static inline void
-virtio_rxq_rearm_vec(struct virtqueue *rxvq)
+virtio_rxq_rearm_vec(struct virtnet_rx *rxvq)
 {
 	int i;
 	uint16_t desc_idx;
 	struct rte_mbuf **sw_ring;
 	struct vring_desc *start_dp;
 	int ret;
+	struct virtqueue *vq = rxvq->vq;
 
-	desc_idx = rxvq->vq_avail_idx & (rxvq->vq_nentries - 1);
-	sw_ring = &rxvq->sw_ring[desc_idx];
-	start_dp = &rxvq->vq_ring.desc[desc_idx];
+	desc_idx = vq->vq_avail_idx & (vq->vq_nentries - 1);
+	sw_ring = &vq->sw_ring[desc_idx];
+	start_dp = &vq->vq_ring.desc[desc_idx];
 
 	ret = rte_mempool_get_bulk(rxvq->mpool, (void **)sw_ring,
 		RTE_VIRTIO_VPMD_RX_REARM_THRESH);
@@ -119,15 +120,15 @@ virtio_rxq_rearm_vec(struct virtqueue *rxvq)
 		*(uint64_t *)p = rxvq->mbuf_initializer;
 
 		start_dp[i].addr =
-			(uint64_t)((uintptr_t)sw_ring[i]->buf_physaddr +
-			RTE_PKTMBUF_HEADROOM - rxvq->hw->vtnet_hdr_size);
+			MBUF_DATA_DMA_ADDR(sw_ring[i], vq->offset) -
+			vq->hw->vtnet_hdr_size;
 		start_dp[i].len = sw_ring[i]->buf_len -
-			RTE_PKTMBUF_HEADROOM + rxvq->hw->vtnet_hdr_size;
+			RTE_PKTMBUF_HEADROOM + vq->hw->vtnet_hdr_size;
 	}
 
-	rxvq->vq_avail_idx += RTE_VIRTIO_VPMD_RX_REARM_THRESH;
-	rxvq->vq_free_cnt -= RTE_VIRTIO_VPMD_RX_REARM_THRESH;
-	vq_update_avail_idx(rxvq);
+	vq->vq_avail_idx += RTE_VIRTIO_VPMD_RX_REARM_THRESH;
+	vq->vq_free_cnt -= RTE_VIRTIO_VPMD_RX_REARM_THRESH;
+	vq_update_avail_idx(vq);
 }
 
 /* virtio vPMD receive routine, only accept(nb_pkts >= RTE_VIRTIO_DESC_PER_LOOP)
@@ -143,7 +144,8 @@ uint16_t
 virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 	uint16_t nb_pkts)
 {
-	struct virtqueue *rxvq = rx_queue;
+	struct virtnet_rx *rxvq = rx_queue;
+	struct virtqueue *vq = rxvq->vq;
 	uint16_t nb_used;
 	uint16_t desc_idx;
 	struct vring_used_elem *rused;
@@ -175,15 +177,14 @@ virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 	len_adjust = _mm_set_epi16(
 		0, 0,
 		0,
-		(uint16_t)-rxvq->hw->vtnet_hdr_size,
-		0, (uint16_t)-rxvq->hw->vtnet_hdr_size,
+		(uint16_t)-vq->hw->vtnet_hdr_size,
+		0, (uint16_t)-vq->hw->vtnet_hdr_size,
 		0, 0);
 
 	if (unlikely(nb_pkts < RTE_VIRTIO_DESC_PER_LOOP))
 		return 0;
 
-	nb_used = *(volatile uint16_t *)&rxvq->vq_ring.used->idx -
-		rxvq->vq_used_cons_idx;
+	nb_used = VIRTQUEUE_NUSED(vq);
 
 	rte_compiler_barrier();
 
@@ -193,17 +194,17 @@ virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_VIRTIO_DESC_PER_LOOP);
 	nb_used = RTE_MIN(nb_used, nb_pkts);
 
-	desc_idx = (uint16_t)(rxvq->vq_used_cons_idx & (rxvq->vq_nentries - 1));
-	rused = &rxvq->vq_ring.used->ring[desc_idx];
-	sw_ring  = &rxvq->sw_ring[desc_idx];
-	sw_ring_end = &rxvq->sw_ring[rxvq->vq_nentries];
+	desc_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
+	rused = &vq->vq_ring.used->ring[desc_idx];
+	sw_ring  = &vq->sw_ring[desc_idx];
+	sw_ring_end = &vq->sw_ring[vq->vq_nentries];
 
 	_mm_prefetch((const void *)rused, _MM_HINT_T0);
 
-	if (rxvq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
+	if (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
 		virtio_rxq_rearm_vec(rxvq);
-		if (unlikely(virtqueue_kick_prepare(rxvq)))
-			virtqueue_notify(rxvq);
+		if (unlikely(virtqueue_kick_prepare(vq)))
+			virtqueue_notify(vq);
 	}
 
 	for (nb_pkts_received = 0;
@@ -286,9 +287,9 @@ virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 		}
 	}
 
-	rxvq->vq_used_cons_idx += nb_pkts_received;
-	rxvq->vq_free_cnt += nb_pkts_received;
-	rxvq->packets += nb_pkts_received;
+	vq->vq_used_cons_idx += nb_pkts_received;
+	vq->vq_free_cnt += nb_pkts_received;
+	rxvq->stats.packets += nb_pkts_received;
 	return nb_pkts_received;
 }
 
@@ -342,31 +343,32 @@ uint16_t
 virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
 	uint16_t nb_pkts)
 {
-	struct virtqueue *txvq = tx_queue;
+	struct virtnet_tx *txvq = tx_queue;
+	struct virtqueue *vq = txvq->vq;
 	uint16_t nb_used;
 	uint16_t desc_idx;
 	struct vring_desc *start_dp;
 	uint16_t nb_tail, nb_commit;
 	int i;
-	uint16_t desc_idx_max = (txvq->vq_nentries >> 1) - 1;
+	uint16_t desc_idx_max = (vq->vq_nentries >> 1) - 1;
 
-	nb_used = VIRTQUEUE_NUSED(txvq);
+	nb_used = VIRTQUEUE_NUSED(vq);
 	rte_compiler_barrier();
 
 	if (nb_used >= VIRTIO_TX_FREE_THRESH)
-		virtio_xmit_cleanup(tx_queue);
+		virtio_xmit_cleanup(vq);
 
-	nb_commit = nb_pkts = RTE_MIN((txvq->vq_free_cnt >> 1), nb_pkts);
-	desc_idx = (uint16_t) (txvq->vq_avail_idx & desc_idx_max);
-	start_dp = txvq->vq_ring.desc;
+	nb_commit = nb_pkts = RTE_MIN((vq->vq_free_cnt >> 1), nb_pkts);
+	desc_idx = (uint16_t)(vq->vq_avail_idx & desc_idx_max);
+	start_dp = vq->vq_ring.desc;
 	nb_tail = (uint16_t) (desc_idx_max + 1 - desc_idx);
 
 	if (nb_commit >= nb_tail) {
 		for (i = 0; i < nb_tail; i++)
-			txvq->vq_descx[desc_idx + i].cookie = tx_pkts[i];
+			vq->vq_descx[desc_idx + i].cookie = tx_pkts[i];
 		for (i = 0; i < nb_tail; i++) {
 			start_dp[desc_idx].addr =
-				rte_mbuf_data_dma_addr(*tx_pkts);
+				MBUF_DATA_DMA_ADDR(*tx_pkts, vq->offset);
 			start_dp[desc_idx].len = (*tx_pkts)->pkt_len;
 			tx_pkts++;
 			desc_idx++;
@@ -375,9 +377,10 @@ virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
 		desc_idx = 0;
 	}
 	for (i = 0; i < nb_commit; i++)
-		txvq->vq_descx[desc_idx + i].cookie = tx_pkts[i];
+		vq->vq_descx[desc_idx + i].cookie = tx_pkts[i];
 	for (i = 0; i < nb_commit; i++) {
-		start_dp[desc_idx].addr = rte_mbuf_data_dma_addr(*tx_pkts);
+		start_dp[desc_idx].addr =
+			MBUF_DATA_DMA_ADDR(*tx_pkts, vq->offset);
 		start_dp[desc_idx].len = (*tx_pkts)->pkt_len;
 		tx_pkts++;
 		desc_idx++;
@@ -385,21 +388,21 @@ virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
 
 	rte_compiler_barrier();
 
-	txvq->vq_free_cnt -= (uint16_t)(nb_pkts << 1);
-	txvq->vq_avail_idx += nb_pkts;
-	txvq->vq_ring.avail->idx = txvq->vq_avail_idx;
-	txvq->packets += nb_pkts;
+	vq->vq_free_cnt -= (uint16_t)(nb_pkts << 1);
+	vq->vq_avail_idx += nb_pkts;
+	vq->vq_ring.avail->idx = vq->vq_avail_idx;
+	txvq->stats.packets += nb_pkts;
 
 	if (likely(nb_pkts)) {
-		if (unlikely(virtqueue_kick_prepare(txvq)))
-			virtqueue_notify(txvq);
+		if (unlikely(virtqueue_kick_prepare(vq)))
+			virtqueue_notify(vq);
 	}
 
 	return nb_pkts;
 }
 
 int __attribute__((cold))
-virtio_rxq_vec_setup(struct virtqueue *rxq)
+virtio_rxq_vec_setup(struct virtnet_rx *rxq)
 {
 	uintptr_t p;
 	struct rte_mbuf mb_def = { .buf_addr = 0 }; /* zeroed mbuf */
diff --git a/drivers/net/virtio/virtio_user/vhost.h b/drivers/net/virtio/virtio_user/vhost.h
new file mode 100644
index 00000000..7adb55f5
--- /dev/null
+++ b/drivers/net/virtio/virtio_user/vhost.h
@@ -0,0 +1,146 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VHOST_NET_USER_H
+#define _VHOST_NET_USER_H
+
+#include <stdint.h>
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#include "../virtio_pci.h"
+#include "../virtio_logs.h"
+#include "../virtqueue.h"
+
+#define VHOST_MEMORY_MAX_NREGIONS 8
+
+struct vhost_vring_state {
+	unsigned int index;
+	unsigned int num;
+};
+
+struct vhost_vring_file {
+	unsigned int index;
+	int fd;
+};
+
+struct vhost_vring_addr {
+	unsigned int index;
+	/* Option flags. */
+	unsigned int flags;
+	/* Flag values: */
+	/* Whether log address is valid. If set enables logging. */
+#define VHOST_VRING_F_LOG 0
+
+	/* Start of array of descriptors (virtually contiguous) */
+	uint64_t desc_user_addr;
+	/* Used structure address. Must be 32 bit aligned */
+	uint64_t used_user_addr;
+	/* Available structure address. Must be 16 bit aligned */
+	uint64_t avail_user_addr;
+	/* Logging support. */
+	/* Log writes to used structure, at offset calculated from specified
+	 * address. Address must be 32 bit aligned.
+	 */
+	uint64_t log_guest_addr;
+};
+
+enum vhost_user_request {
+	VHOST_USER_NONE = 0,
+	VHOST_USER_GET_FEATURES = 1,
+	VHOST_USER_SET_FEATURES = 2,
+	VHOST_USER_SET_OWNER = 3,
+	VHOST_USER_RESET_OWNER = 4,
+	VHOST_USER_SET_MEM_TABLE = 5,
+	VHOST_USER_SET_LOG_BASE = 6,
+	VHOST_USER_SET_LOG_FD = 7,
+	VHOST_USER_SET_VRING_NUM = 8,
+	VHOST_USER_SET_VRING_ADDR = 9,
+	VHOST_USER_SET_VRING_BASE = 10,
+	VHOST_USER_GET_VRING_BASE = 11,
+	VHOST_USER_SET_VRING_KICK = 12,
+	VHOST_USER_SET_VRING_CALL = 13,
+	VHOST_USER_SET_VRING_ERR = 14,
+	VHOST_USER_GET_PROTOCOL_FEATURES = 15,
+	VHOST_USER_SET_PROTOCOL_FEATURES = 16,
+	VHOST_USER_GET_QUEUE_NUM = 17,
+	VHOST_USER_SET_VRING_ENABLE = 18,
+	VHOST_USER_MAX
+};
+
+struct vhost_memory_region {
+	uint64_t guest_phys_addr;
+	uint64_t memory_size; /* bytes */
+	uint64_t userspace_addr;
+	uint64_t mmap_offset;
+};
+
+struct vhost_memory {
+	uint32_t nregions;
+	uint32_t padding;
+	struct vhost_memory_region regions[VHOST_MEMORY_MAX_NREGIONS];
+};
+
+struct vhost_user_msg {
+	enum vhost_user_request request;
+
+#define VHOST_USER_VERSION_MASK     0x3
+#define VHOST_USER_REPLY_MASK       (0x1 << 2)
+	uint32_t flags;
+	uint32_t size; /* the following payload size */
+	union {
+#define VHOST_USER_VRING_IDX_MASK   0xff
+#define VHOST_USER_VRING_NOFD_MASK  (0x1 << 8)
+		uint64_t u64;
+		struct vhost_vring_state state;
+		struct vhost_vring_addr addr;
+		struct vhost_memory memory;
+	} payload;
+	int fds[VHOST_MEMORY_MAX_NREGIONS];
+} __attribute((packed));
+
+#define VHOST_USER_HDR_SIZE offsetof(struct vhost_user_msg, payload.u64)
+#define VHOST_USER_PAYLOAD_SIZE \
+	(sizeof(struct vhost_user_msg) - VHOST_USER_HDR_SIZE)
+
+/* The version of the protocol we support */
+#define VHOST_USER_VERSION    0x1
+
+#define VHOST_USER_F_PROTOCOL_FEATURES 30
+#define VHOST_USER_MQ (1ULL << VHOST_USER_F_PROTOCOL_FEATURES)
+
+int vhost_user_sock(int vhostfd, enum vhost_user_request req, void *arg);
+int vhost_user_setup(const char *path);
+int vhost_user_enable_queue_pair(int vhostfd, uint16_t pair_idx, int enable);
+
+#endif
diff --git a/drivers/net/virtio/virtio_user/vhost_user.c b/drivers/net/virtio/virtio_user/vhost_user.c
new file mode 100644
index 00000000..a2b0687f
--- /dev/null
+++ b/drivers/net/virtio/virtio_user/vhost_user.c
@@ -0,0 +1,426 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/un.h>
+#include <string.h>
+#include <errno.h>
+
+#include "vhost.h"
+
+static int
+vhost_user_write(int fd, void *buf, int len, int *fds, int fd_num)
+{
+	int r;
+	struct msghdr msgh;
+	struct iovec iov;
+	size_t fd_size = fd_num * sizeof(int);
+	char control[CMSG_SPACE(fd_size)];
+	struct cmsghdr *cmsg;
+
+	memset(&msgh, 0, sizeof(msgh));
+	memset(control, 0, sizeof(control));
+
+	iov.iov_base = (uint8_t *)buf;
+	iov.iov_len = len;
+
+	msgh.msg_iov = &iov;
+	msgh.msg_iovlen = 1;
+	msgh.msg_control = control;
+	msgh.msg_controllen = sizeof(control);
+
+	cmsg = CMSG_FIRSTHDR(&msgh);
+	cmsg->cmsg_len = CMSG_LEN(fd_size);
+	cmsg->cmsg_level = SOL_SOCKET;
+	cmsg->cmsg_type = SCM_RIGHTS;
+	memcpy(CMSG_DATA(cmsg), fds, fd_size);
+
+	do {
+		r = sendmsg(fd, &msgh, 0);
+	} while (r < 0 && errno == EINTR);
+
+	return r;
+}
+
+static int
+vhost_user_read(int fd, struct vhost_user_msg *msg)
+{
+	uint32_t valid_flags = VHOST_USER_REPLY_MASK | VHOST_USER_VERSION;
+	int ret, sz_hdr = VHOST_USER_HDR_SIZE, sz_payload;
+
+	ret = recv(fd, (void *)msg, sz_hdr, 0);
+	if (ret < sz_hdr) {
+		PMD_DRV_LOG(ERR, "Failed to recv msg hdr: %d instead of %d.",
+			    ret, sz_hdr);
+		goto fail;
+	}
+
+	/* validate msg flags */
+	if (msg->flags != (valid_flags)) {
+		PMD_DRV_LOG(ERR, "Failed to recv msg: flags %x instead of %x.",
+			    msg->flags, valid_flags);
+		goto fail;
+	}
+
+	sz_payload = msg->size;
+	if (sz_payload) {
+		ret = recv(fd, (void *)((char *)msg + sz_hdr), sz_payload, 0);
+		if (ret < sz_payload) {
+			PMD_DRV_LOG(ERR,
+				"Failed to recv msg payload: %d instead of %d.",
+				ret, msg->size);
+			goto fail;
+		}
+	}
+
+	return 0;
+
+fail:
+	return -1;
+}
+
+struct hugepage_file_info {
+	uint64_t addr;            /**< virtual addr */
+	size_t   size;            /**< the file size */
+	char     path[PATH_MAX];  /**< path to backing file */
+};
+
+/* Two possible options:
+ * 1. Match HUGEPAGE_INFO_FMT to find the file storing struct hugepage_file
+ * array. This is simple but cannot be used in secondary process because
+ * secondary process will close and munmap that file.
+ * 2. Match HUGEFILE_FMT to find hugepage files directly.
+ *
+ * We choose option 2.
+ */
+static int
+get_hugepage_file_info(struct hugepage_file_info huges[], int max)
+{
+	int idx;
+	FILE *f;
+	char buf[BUFSIZ], *tmp, *tail;
+	char *str_underline, *str_start;
+	int huge_index;
+	uint64_t v_start, v_end;
+
+	f = fopen("/proc/self/maps", "r");
+	if (!f) {
+		PMD_DRV_LOG(ERR, "cannot open /proc/self/maps");
+		return -1;
+	}
+
+	idx = 0;
+	while (fgets(buf, sizeof(buf), f) != NULL) {
+		if (sscanf(buf, "%" PRIx64 "-%" PRIx64, &v_start, &v_end) < 2) {
+			PMD_DRV_LOG(ERR, "Failed to parse address");
+			goto error;
+		}
+
+		tmp = strchr(buf, ' ') + 1; /** skip address */
+		tmp = strchr(tmp, ' ') + 1; /** skip perm */
+		tmp = strchr(tmp, ' ') + 1; /** skip offset */
+		tmp = strchr(tmp, ' ') + 1; /** skip dev */
+		tmp = strchr(tmp, ' ') + 1; /** skip inode */
+		while (*tmp == ' ')         /** skip spaces */
+			tmp++;
+		tail = strrchr(tmp, '\n');  /** remove newline if exists */
+		if (tail)
+			*tail = '\0';
+
+		/* Match HUGEFILE_FMT, aka "%s/%smap_%d",
+		 * which is defined in eal_filesystem.h
+		 */
+		str_underline = strrchr(tmp, '_');
+		if (!str_underline)
+			continue;
+
+		str_start = str_underline - strlen("map");
+		if (str_start < tmp)
+			continue;
+
+		if (sscanf(str_start, "map_%d", &huge_index) != 1)
+			continue;
+
+		if (idx >= max) {
+			PMD_DRV_LOG(ERR, "Exceed maximum of %d", max);
+			goto error;
+		}
+		huges[idx].addr = v_start;
+		huges[idx].size = v_end - v_start;
+		strcpy(huges[idx].path, tmp);
+		idx++;
+	}
+
+	fclose(f);
+	return idx;
+
+error:
+	fclose(f);
+	return -1;
+}
+
+static int
+prepare_vhost_memory_user(struct vhost_user_msg *msg, int fds[])
+{
+	int i, num;
+	struct hugepage_file_info huges[VHOST_MEMORY_MAX_NREGIONS];
+	struct vhost_memory_region *mr;
+
+	num = get_hugepage_file_info(huges, VHOST_MEMORY_MAX_NREGIONS);
+	if (num < 0) {
+		PMD_INIT_LOG(ERR, "Failed to prepare memory for vhost-user");
+		return -1;
+	}
+
+	for (i = 0; i < num; ++i) {
+		mr = &msg->payload.memory.regions[i];
+		mr->guest_phys_addr = huges[i].addr; /* use vaddr! */
+		mr->userspace_addr = huges[i].addr;
+		mr->memory_size = huges[i].size;
+		mr->mmap_offset = 0;
+		fds[i] = open(huges[i].path, O_RDWR);
+	}
+
+	msg->payload.memory.nregions = num;
+	msg->payload.memory.padding = 0;
+
+	return 0;
+}
+
+static struct vhost_user_msg m;
+
+static const char * const vhost_msg_strings[] = {
+	[VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER",
+	[VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER",
+	[VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES",
+	[VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES",
+	[VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL",
+	[VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM",
+	[VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE",
+	[VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE",
+	[VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR",
+	[VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK",
+	[VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE",
+	[VHOST_USER_SET_VRING_ENABLE] = "VHOST_USER_SET_VRING_ENABLE",
+	NULL,
+};
+
+int
+vhost_user_sock(int vhostfd, enum vhost_user_request req, void *arg)
+{
+	struct vhost_user_msg msg;
+	struct vhost_vring_file *file = 0;
+	int need_reply = 0;
+	int fds[VHOST_MEMORY_MAX_NREGIONS];
+	int fd_num = 0;
+	int i, len;
+
+	RTE_SET_USED(m);
+	RTE_SET_USED(vhost_msg_strings);
+
+	PMD_DRV_LOG(INFO, "%s", vhost_msg_strings[req]);
+
+	msg.request = req;
+	msg.flags = VHOST_USER_VERSION;
+	msg.size = 0;
+
+	switch (req) {
+	case VHOST_USER_GET_FEATURES:
+		need_reply = 1;
+		break;
+
+	case VHOST_USER_SET_FEATURES:
+	case VHOST_USER_SET_LOG_BASE:
+		msg.payload.u64 = *((__u64 *)arg);
+		msg.size = sizeof(m.payload.u64);
+		break;
+
+	case VHOST_USER_SET_OWNER:
+	case VHOST_USER_RESET_OWNER:
+		break;
+
+	case VHOST_USER_SET_MEM_TABLE:
+		if (prepare_vhost_memory_user(&msg, fds) < 0)
+			return -1;
+		fd_num = msg.payload.memory.nregions;
+		msg.size = sizeof(m.payload.memory.nregions);
+		msg.size += sizeof(m.payload.memory.padding);
+		msg.size += fd_num * sizeof(struct vhost_memory_region);
+		break;
+
+	case VHOST_USER_SET_LOG_FD:
+		fds[fd_num++] = *((int *)arg);
+		break;
+
+	case VHOST_USER_SET_VRING_NUM:
+	case VHOST_USER_SET_VRING_BASE:
+	case VHOST_USER_SET_VRING_ENABLE:
+		memcpy(&msg.payload.state, arg, sizeof(msg.payload.state));
+		msg.size = sizeof(m.payload.state);
+		break;
+
+	case VHOST_USER_GET_VRING_BASE:
+		memcpy(&msg.payload.state, arg, sizeof(msg.payload.state));
+		msg.size = sizeof(m.payload.state);
+		need_reply = 1;
+		break;
+
+	case VHOST_USER_SET_VRING_ADDR:
+		memcpy(&msg.payload.addr, arg, sizeof(msg.payload.addr));
+		msg.size = sizeof(m.payload.addr);
+		break;
+
+	case VHOST_USER_SET_VRING_KICK:
+	case VHOST_USER_SET_VRING_CALL:
+	case VHOST_USER_SET_VRING_ERR:
+		file = arg;
+		msg.payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK;
+		msg.size = sizeof(m.payload.u64);
+		if (file->fd > 0)
+			fds[fd_num++] = file->fd;
+		else
+			msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK;
+		break;
+
+	default:
+		PMD_DRV_LOG(ERR, "trying to send unhandled msg type");
+		return -1;
+	}
+
+	len = VHOST_USER_HDR_SIZE + msg.size;
+	if (vhost_user_write(vhostfd, &msg, len, fds, fd_num) < 0) {
+		PMD_DRV_LOG(ERR, "%s failed: %s",
+			    vhost_msg_strings[req], strerror(errno));
+		return -1;
+	}
+
+	if (req == VHOST_USER_SET_MEM_TABLE)
+		for (i = 0; i < fd_num; ++i)
+			close(fds[i]);
+
+	if (need_reply) {
+		if (vhost_user_read(vhostfd, &msg) < 0) {
+			PMD_DRV_LOG(ERR, "Received msg failed: %s",
+				    strerror(errno));
+			return -1;
+		}
+
+		if (req != msg.request) {
+			PMD_DRV_LOG(ERR, "Received unexpected msg type");
+			return -1;
+		}
+
+		switch (req) {
+		case VHOST_USER_GET_FEATURES:
+			if (msg.size != sizeof(m.payload.u64)) {
+				PMD_DRV_LOG(ERR, "Received bad msg size");
+				return -1;
+			}
+			*((__u64 *)arg) = msg.payload.u64;
+			break;
+		case VHOST_USER_GET_VRING_BASE:
+			if (msg.size != sizeof(m.payload.state)) {
+				PMD_DRV_LOG(ERR, "Received bad msg size");
+				return -1;
+			}
+			memcpy(arg, &msg.payload.state,
+			       sizeof(struct vhost_vring_state));
+			break;
+		default:
+			PMD_DRV_LOG(ERR, "Received unexpected msg type");
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * Set up environment to talk with a vhost user backend.
+ * @param path
+ *   - The path to vhost user unix socket file.
+ *
+ * @return
+ *   - (-1) if fail to set up;
+ *   - (>=0) if successful, and it is the fd to vhostfd.
+ */
+int
+vhost_user_setup(const char *path)
+{
+	int fd;
+	int flag;
+	struct sockaddr_un un;
+
+	fd = socket(AF_UNIX, SOCK_STREAM, 0);
+	if (fd < 0) {
+		PMD_DRV_LOG(ERR, "socket() error, %s", strerror(errno));
+		return -1;
+	}
+
+	flag = fcntl(fd, F_GETFD);
+	fcntl(fd, F_SETFD, flag | FD_CLOEXEC);
+
+	memset(&un, 0, sizeof(un));
+	un.sun_family = AF_UNIX;
+	snprintf(un.sun_path, sizeof(un.sun_path), "%s", path);
+	if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
+		PMD_DRV_LOG(ERR, "connect error, %s", strerror(errno));
+		close(fd);
+		return -1;
+	}
+
+	return fd;
+}
+
+int
+vhost_user_enable_queue_pair(int vhostfd, uint16_t pair_idx, int enable)
+{
+	int i;
+
+	for (i = 0; i < 2; ++i) {
+		struct vhost_vring_state state = {
+			.index = pair_idx * 2 + i,
+			.num   = enable,
+		};
+
+		if (vhost_user_sock(vhostfd,
+				    VHOST_USER_SET_VRING_ENABLE, &state))
+			return -1;
+	}
+
+	return 0;
+}
diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c b/drivers/net/virtio/virtio_user/virtio_user_dev.c
new file mode 100644
index 00000000..3d12a320
--- /dev/null
+++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c
@@ -0,0 +1,333 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <sys/eventfd.h>
+
+#include "vhost.h"
+#include "virtio_user_dev.h"
+#include "../virtio_ethdev.h"
+
+static int
+virtio_user_kick_queue(struct virtio_user_dev *dev, uint32_t queue_sel)
+{
+	int callfd, kickfd;
+	struct vhost_vring_file file;
+	struct vhost_vring_state state;
+	struct vring *vring = &dev->vrings[queue_sel];
+	struct vhost_vring_addr addr = {
+		.index = queue_sel,
+		.desc_user_addr = (uint64_t)(uintptr_t)vring->desc,
+		.avail_user_addr = (uint64_t)(uintptr_t)vring->avail,
+		.used_user_addr = (uint64_t)(uintptr_t)vring->used,
+		.log_guest_addr = 0,
+		.flags = 0, /* disable log */
+	};
+
+	/* May use invalid flag, but some backend leverages kickfd and callfd as
+	 * criteria to judge if dev is alive. so finally we use real event_fd.
+	 */
+	callfd = eventfd(0, O_CLOEXEC | O_NONBLOCK);
+	if (callfd < 0) {
+		PMD_DRV_LOG(ERR, "callfd error, %s\n", strerror(errno));
+		return -1;
+	}
+	kickfd = eventfd(0, O_CLOEXEC | O_NONBLOCK);
+	if (kickfd < 0) {
+		close(callfd);
+		PMD_DRV_LOG(ERR, "kickfd error, %s\n", strerror(errno));
+		return -1;
+	}
+
+	/* Of all per virtqueue MSGs, make sure VHOST_SET_VRING_CALL come
+	 * firstly because vhost depends on this msg to allocate virtqueue
+	 * pair.
+	 */
+	file.index = queue_sel;
+	file.fd = callfd;
+	vhost_user_sock(dev->vhostfd, VHOST_USER_SET_VRING_CALL, &file);
+	dev->callfds[queue_sel] = callfd;
+
+	state.index = queue_sel;
+	state.num = vring->num;
+	vhost_user_sock(dev->vhostfd, VHOST_USER_SET_VRING_NUM, &state);
+
+	state.num = 0; /* no reservation */
+	vhost_user_sock(dev->vhostfd, VHOST_USER_SET_VRING_BASE, &state);
+
+	vhost_user_sock(dev->vhostfd, VHOST_USER_SET_VRING_ADDR, &addr);
+
+	/* Of all per virtqueue MSGs, make sure VHOST_USER_SET_VRING_KICK comes
+	 * lastly because vhost depends on this msg to judge if
+	 * virtio is ready.
+	 */
+	file.fd = kickfd;
+	vhost_user_sock(dev->vhostfd, VHOST_USER_SET_VRING_KICK, &file);
+	dev->kickfds[queue_sel] = kickfd;
+
+	return 0;
+}
+
+int
+virtio_user_start_device(struct virtio_user_dev *dev)
+{
+	uint64_t features;
+	uint32_t i, queue_sel;
+	int ret;
+
+	/* construct memory region inside each implementation */
+	ret = vhost_user_sock(dev->vhostfd, VHOST_USER_SET_MEM_TABLE, NULL);
+	if (ret < 0)
+		goto error;
+
+	for (i = 0; i < dev->max_queue_pairs; ++i) {
+		queue_sel = 2 * i + VTNET_SQ_RQ_QUEUE_IDX;
+		if (virtio_user_kick_queue(dev, queue_sel) < 0) {
+			PMD_DRV_LOG(INFO, "kick rx vq fails: %u", i);
+			goto error;
+		}
+	}
+	for (i = 0; i < dev->max_queue_pairs; ++i) {
+		queue_sel = 2 * i + VTNET_SQ_TQ_QUEUE_IDX;
+		if (virtio_user_kick_queue(dev, queue_sel) < 0) {
+			PMD_DRV_LOG(INFO, "kick tx vq fails: %u", i);
+			goto error;
+		}
+	}
+
+	/* After setup all virtqueues, we need to set_features so that these
+	 * features can be set into each virtqueue in vhost side. And before
+	 * that, make sure VHOST_USER_F_PROTOCOL_FEATURES is added if mq is
+	 * enabled, and VIRTIO_NET_F_MAC is stripped.
+	 */
+	features = dev->features;
+	if (dev->max_queue_pairs > 1)
+		features |= VHOST_USER_MQ;
+	features &= ~(1ull << VIRTIO_NET_F_MAC);
+	ret = vhost_user_sock(dev->vhostfd, VHOST_USER_SET_FEATURES, &features);
+	if (ret < 0)
+		goto error;
+	PMD_DRV_LOG(INFO, "set features: %" PRIx64, features);
+
+	return 0;
+error:
+	/* TODO: free resource here or caller to check */
+	return -1;
+}
+
+int virtio_user_stop_device(struct virtio_user_dev *dev)
+{
+	return vhost_user_sock(dev->vhostfd, VHOST_USER_RESET_OWNER, NULL);
+}
+
+static inline void
+parse_mac(struct virtio_user_dev *dev, const char *mac)
+{
+	int i, r;
+	uint32_t tmp[ETHER_ADDR_LEN];
+
+	if (!mac)
+		return;
+
+	r = sscanf(mac, "%x:%x:%x:%x:%x:%x", &tmp[0],
+			&tmp[1], &tmp[2], &tmp[3], &tmp[4], &tmp[5]);
+	if (r == ETHER_ADDR_LEN) {
+		for (i = 0; i < ETHER_ADDR_LEN; ++i)
+			dev->mac_addr[i] = (uint8_t)tmp[i];
+		dev->mac_specified = 1;
+	} else {
+		/* ignore the wrong mac, use random mac */
+		PMD_DRV_LOG(ERR, "wrong format of mac: %s", mac);
+	}
+}
+
+int
+virtio_user_dev_init(struct virtio_user_dev *dev, char *path, int queues,
+		     int cq, int queue_size, const char *mac)
+{
+	strncpy(dev->path, path, PATH_MAX);
+	dev->max_queue_pairs = queues;
+	dev->queue_pairs = 1; /* mq disabled by default */
+	dev->queue_size = queue_size;
+	dev->mac_specified = 0;
+	parse_mac(dev, mac);
+	dev->vhostfd = -1;
+
+	dev->vhostfd = vhost_user_setup(dev->path);
+	if (dev->vhostfd < 0) {
+		PMD_INIT_LOG(ERR, "backend set up fails");
+		return -1;
+	}
+	if (vhost_user_sock(dev->vhostfd, VHOST_USER_SET_OWNER, NULL) < 0) {
+		PMD_INIT_LOG(ERR, "set_owner fails: %s", strerror(errno));
+		return -1;
+	}
+
+	if (vhost_user_sock(dev->vhostfd, VHOST_USER_GET_FEATURES,
+			    &dev->features) < 0) {
+		PMD_INIT_LOG(ERR, "get_features failed: %s", strerror(errno));
+		return -1;
+	}
+	if (dev->mac_specified)
+		dev->features |= (1ull << VIRTIO_NET_F_MAC);
+
+	if (!cq) {
+		dev->features &= ~(1ull << VIRTIO_NET_F_CTRL_VQ);
+		/* Also disable features depends on VIRTIO_NET_F_CTRL_VQ */
+		dev->features &= ~(1ull << VIRTIO_NET_F_CTRL_RX);
+		dev->features &= ~(1ull << VIRTIO_NET_F_CTRL_VLAN);
+		dev->features &= ~(1ull << VIRTIO_NET_F_GUEST_ANNOUNCE);
+		dev->features &= ~(1ull << VIRTIO_NET_F_MQ);
+		dev->features &= ~(1ull << VIRTIO_NET_F_CTRL_MAC_ADDR);
+	} else {
+		/* vhost user backend does not need to know ctrl-q, so
+		 * actually we need add this bit into features. However,
+		 * DPDK vhost-user does send features with this bit, so we
+		 * check it instead of OR it for now.
+		 */
+		if (!(dev->features & (1ull << VIRTIO_NET_F_CTRL_VQ)))
+			PMD_INIT_LOG(INFO, "vhost does not support ctrl-q");
+	}
+
+	if (dev->max_queue_pairs > 1) {
+		if (!(dev->features & VHOST_USER_MQ)) {
+			PMD_INIT_LOG(ERR, "MQ not supported by the backend");
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+void
+virtio_user_dev_uninit(struct virtio_user_dev *dev)
+{
+	uint32_t i;
+
+	for (i = 0; i < dev->max_queue_pairs * 2; ++i) {
+		close(dev->callfds[i]);
+		close(dev->kickfds[i]);
+	}
+
+	close(dev->vhostfd);
+}
+
+static uint8_t
+virtio_user_handle_mq(struct virtio_user_dev *dev, uint16_t q_pairs)
+{
+	uint16_t i;
+	uint8_t ret = 0;
+
+	if (q_pairs > dev->max_queue_pairs) {
+		PMD_INIT_LOG(ERR, "multi-q config %u, but only %u supported",
+			     q_pairs, dev->max_queue_pairs);
+		return -1;
+	}
+
+	for (i = 0; i < q_pairs; ++i)
+		ret |= vhost_user_enable_queue_pair(dev->vhostfd, i, 1);
+	for (i = q_pairs; i < dev->max_queue_pairs; ++i)
+		ret |= vhost_user_enable_queue_pair(dev->vhostfd, i, 0);
+
+	dev->queue_pairs = q_pairs;
+
+	return ret;
+}
+
+static uint32_t
+virtio_user_handle_ctrl_msg(struct virtio_user_dev *dev, struct vring *vring,
+			    uint16_t idx_hdr)
+{
+	struct virtio_net_ctrl_hdr *hdr;
+	virtio_net_ctrl_ack status = ~0;
+	uint16_t i, idx_data, idx_status;
+	uint32_t n_descs = 0;
+
+	/* locate desc for header, data, and status */
+	idx_data = vring->desc[idx_hdr].next;
+	n_descs++;
+
+	i = idx_data;
+	while (vring->desc[i].flags == VRING_DESC_F_NEXT) {
+		i = vring->desc[i].next;
+		n_descs++;
+	}
+
+	/* locate desc for status */
+	idx_status = i;
+	n_descs++;
+
+	hdr = (void *)(uintptr_t)vring->desc[idx_hdr].addr;
+	if (hdr->class == VIRTIO_NET_CTRL_MQ &&
+	    hdr->cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
+		uint16_t queues;
+
+		queues = *(uint16_t *)(uintptr_t)vring->desc[idx_data].addr;
+		status = virtio_user_handle_mq(dev, queues);
+	}
+
+	/* Update status */
+	*(virtio_net_ctrl_ack *)(uintptr_t)vring->desc[idx_status].addr = status;
+
+	return n_descs;
+}
+
+void
+virtio_user_handle_cq(struct virtio_user_dev *dev, uint16_t queue_idx)
+{
+	uint16_t avail_idx, desc_idx;
+	struct vring_used_elem *uep;
+	uint32_t n_descs;
+	struct vring *vring = &dev->vrings[queue_idx];
+
+	/* Consume avail ring, using used ring idx as first one */
+	while (vring->used->idx != vring->avail->idx) {
+		avail_idx = (vring->used->idx) & (vring->num - 1);
+		desc_idx = vring->avail->ring[avail_idx];
+
+		n_descs = virtio_user_handle_ctrl_msg(dev, vring, desc_idx);
+
+		/* Update used ring */
+		uep = &vring->used->ring[avail_idx];
+		uep->id = avail_idx;
+		uep->len = n_descs;
+
+		vring->used->idx++;
+	}
+}
diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.h b/drivers/net/virtio/virtio_user/virtio_user_dev.h
new file mode 100644
index 00000000..33690b5c
--- /dev/null
+++ b/drivers/net/virtio/virtio_user/virtio_user_dev.h
@@ -0,0 +1,62 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VIRTIO_USER_DEV_H
+#define _VIRTIO_USER_DEV_H
+
+#include <limits.h>
+#include "../virtio_pci.h"
+#include "../virtio_ring.h"
+
+struct virtio_user_dev {
+	int		vhostfd;
+	int		callfds[VIRTIO_MAX_VIRTQUEUES * 2 + 1];
+	int		kickfds[VIRTIO_MAX_VIRTQUEUES * 2 + 1];
+	int		mac_specified;
+	uint32_t	max_queue_pairs;
+	uint32_t	queue_pairs;
+	uint32_t	queue_size;
+	uint64_t	features;
+	uint8_t		status;
+	uint8_t		mac_addr[ETHER_ADDR_LEN];
+	char		path[PATH_MAX];
+	struct vring	vrings[VIRTIO_MAX_VIRTQUEUES * 2 + 1];
+};
+
+int virtio_user_start_device(struct virtio_user_dev *dev);
+int virtio_user_stop_device(struct virtio_user_dev *dev);
+int virtio_user_dev_init(struct virtio_user_dev *dev, char *path, int queues,
+			 int cq, int queue_size, const char *mac);
+void virtio_user_dev_uninit(struct virtio_user_dev *dev);
+void virtio_user_handle_cq(struct virtio_user_dev *dev, uint16_t queue_idx);
+#endif
diff --git a/drivers/net/virtio/virtio_user_ethdev.c b/drivers/net/virtio/virtio_user_ethdev.c
new file mode 100644
index 00000000..5ab24711
--- /dev/null
+++ b/drivers/net/virtio/virtio_user_ethdev.c
@@ -0,0 +1,440 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <rte_malloc.h>
+#include <rte_kvargs.h>
+
+#include "virtio_ethdev.h"
+#include "virtio_logs.h"
+#include "virtio_pci.h"
+#include "virtqueue.h"
+#include "virtio_rxtx.h"
+#include "virtio_user/virtio_user_dev.h"
+
+#define virtio_user_get_dev(hw) \
+	((struct virtio_user_dev *)(hw)->virtio_user_dev)
+
+static void
+virtio_user_read_dev_config(struct virtio_hw *hw, size_t offset,
+		     void *dst, int length)
+{
+	int i;
+	struct virtio_user_dev *dev = virtio_user_get_dev(hw);
+
+	if (offset == offsetof(struct virtio_net_config, mac) &&
+	    length == ETHER_ADDR_LEN) {
+		for (i = 0; i < ETHER_ADDR_LEN; ++i)
+			((uint8_t *)dst)[i] = dev->mac_addr[i];
+		return;
+	}
+
+	if (offset == offsetof(struct virtio_net_config, status))
+		*(uint16_t *)dst = dev->status;
+
+	if (offset == offsetof(struct virtio_net_config, max_virtqueue_pairs))
+		*(uint16_t *)dst = dev->max_queue_pairs;
+}
+
+static void
+virtio_user_write_dev_config(struct virtio_hw *hw, size_t offset,
+		      const void *src, int length)
+{
+	int i;
+	struct virtio_user_dev *dev = virtio_user_get_dev(hw);
+
+	if ((offset == offsetof(struct virtio_net_config, mac)) &&
+	    (length == ETHER_ADDR_LEN))
+		for (i = 0; i < ETHER_ADDR_LEN; ++i)
+			dev->mac_addr[i] = ((const uint8_t *)src)[i];
+	else
+		PMD_DRV_LOG(ERR, "not supported offset=%zu, len=%d\n",
+			    offset, length);
+}
+
+static void
+virtio_user_set_status(struct virtio_hw *hw, uint8_t status)
+{
+	struct virtio_user_dev *dev = virtio_user_get_dev(hw);
+
+	if (status & VIRTIO_CONFIG_STATUS_DRIVER_OK)
+		virtio_user_start_device(dev);
+	dev->status = status;
+}
+
+static void
+virtio_user_reset(struct virtio_hw *hw)
+{
+	struct virtio_user_dev *dev = virtio_user_get_dev(hw);
+
+	virtio_user_stop_device(dev);
+}
+
+static uint8_t
+virtio_user_get_status(struct virtio_hw *hw)
+{
+	struct virtio_user_dev *dev = virtio_user_get_dev(hw);
+
+	return dev->status;
+}
+
+static uint64_t
+virtio_user_get_features(struct virtio_hw *hw)
+{
+	struct virtio_user_dev *dev = virtio_user_get_dev(hw);
+
+	return dev->features;
+}
+
+static void
+virtio_user_set_features(struct virtio_hw *hw, uint64_t features)
+{
+	struct virtio_user_dev *dev = virtio_user_get_dev(hw);
+
+	dev->features = features;
+}
+
+static uint8_t
+virtio_user_get_isr(struct virtio_hw *hw __rte_unused)
+{
+	/* When config interrupt happens, driver calls this function to query
+	 * what kinds of change happen. Interrupt mode not supported for now.
+	 */
+	return 0;
+}
+
+static uint16_t
+virtio_user_set_config_irq(struct virtio_hw *hw __rte_unused,
+		    uint16_t vec __rte_unused)
+{
+	return VIRTIO_MSI_NO_VECTOR;
+}
+
+/* This function is to get the queue size, aka, number of descs, of a specified
+ * queue. Different with the VHOST_USER_GET_QUEUE_NUM, which is used to get the
+ * max supported queues.
+ */
+static uint16_t
+virtio_user_get_queue_num(struct virtio_hw *hw, uint16_t queue_id __rte_unused)
+{
+	struct virtio_user_dev *dev = virtio_user_get_dev(hw);
+
+	/* Currently, each queue has same queue size */
+	return dev->queue_size;
+}
+
+static int
+virtio_user_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
+{
+	struct virtio_user_dev *dev = virtio_user_get_dev(hw);
+	uint16_t queue_idx = vq->vq_queue_index;
+	uint64_t desc_addr, avail_addr, used_addr;
+
+	desc_addr = (uintptr_t)vq->vq_ring_virt_mem;
+	avail_addr = desc_addr + vq->vq_nentries * sizeof(struct vring_desc);
+	used_addr = RTE_ALIGN_CEIL(avail_addr + offsetof(struct vring_avail,
+							 ring[vq->vq_nentries]),
+				   VIRTIO_PCI_VRING_ALIGN);
+
+	dev->vrings[queue_idx].num = vq->vq_nentries;
+	dev->vrings[queue_idx].desc = (void *)(uintptr_t)desc_addr;
+	dev->vrings[queue_idx].avail = (void *)(uintptr_t)avail_addr;
+	dev->vrings[queue_idx].used = (void *)(uintptr_t)used_addr;
+
+	return 0;
+}
+
+static void
+virtio_user_del_queue(struct virtio_hw *hw, struct virtqueue *vq)
+{
+	/* For legacy devices, write 0 to VIRTIO_PCI_QUEUE_PFN port, QEMU
+	 * correspondingly stops the ioeventfds, and reset the status of
+	 * the device.
+	 * For modern devices, set queue desc, avail, used in PCI bar to 0,
+	 * not see any more behavior in QEMU.
+	 *
+	 * Here we just care about what information to deliver to vhost-user
+	 * or vhost-kernel. So we just close ioeventfd for now.
+	 */
+	struct virtio_user_dev *dev = virtio_user_get_dev(hw);
+
+	close(dev->callfds[vq->vq_queue_index]);
+	close(dev->kickfds[vq->vq_queue_index]);
+}
+
+static void
+virtio_user_notify_queue(struct virtio_hw *hw, struct virtqueue *vq)
+{
+	uint64_t buf = 1;
+	struct virtio_user_dev *dev = virtio_user_get_dev(hw);
+
+	if (hw->cvq && (hw->cvq->vq == vq)) {
+		virtio_user_handle_cq(dev, vq->vq_queue_index);
+		return;
+	}
+
+	if (write(dev->kickfds[vq->vq_queue_index], &buf, sizeof(buf)) < 0)
+		PMD_DRV_LOG(ERR, "failed to kick backend: %s\n",
+			    strerror(errno));
+}
+
+static const struct virtio_pci_ops virtio_user_ops = {
+	.read_dev_cfg	= virtio_user_read_dev_config,
+	.write_dev_cfg	= virtio_user_write_dev_config,
+	.reset		= virtio_user_reset,
+	.get_status	= virtio_user_get_status,
+	.set_status	= virtio_user_set_status,
+	.get_features	= virtio_user_get_features,
+	.set_features	= virtio_user_set_features,
+	.get_isr	= virtio_user_get_isr,
+	.set_config_irq	= virtio_user_set_config_irq,
+	.get_queue_num	= virtio_user_get_queue_num,
+	.setup_queue	= virtio_user_setup_queue,
+	.del_queue	= virtio_user_del_queue,
+	.notify_queue	= virtio_user_notify_queue,
+};
+
+static const char *valid_args[] = {
+#define VIRTIO_USER_ARG_QUEUES_NUM     "queues"
+	VIRTIO_USER_ARG_QUEUES_NUM,
+#define VIRTIO_USER_ARG_CQ_NUM         "cq"
+	VIRTIO_USER_ARG_CQ_NUM,
+#define VIRTIO_USER_ARG_MAC            "mac"
+	VIRTIO_USER_ARG_MAC,
+#define VIRTIO_USER_ARG_PATH           "path"
+	VIRTIO_USER_ARG_PATH,
+#define VIRTIO_USER_ARG_QUEUE_SIZE     "queue_size"
+	VIRTIO_USER_ARG_QUEUE_SIZE,
+	NULL
+};
+
+#define VIRTIO_USER_DEF_CQ_EN	0
+#define VIRTIO_USER_DEF_Q_NUM	1
+#define VIRTIO_USER_DEF_Q_SZ	256
+
+static int
+get_string_arg(const char *key __rte_unused,
+	       const char *value, void *extra_args)
+{
+	if (!value || !extra_args)
+		return -EINVAL;
+
+	*(char **)extra_args = strdup(value);
+
+	return 0;
+}
+
+static int
+get_integer_arg(const char *key __rte_unused,
+		const char *value, void *extra_args)
+{
+	if (!value || !extra_args)
+		return -EINVAL;
+
+	*(uint64_t *)extra_args = strtoull(value, NULL, 0);
+
+	return 0;
+}
+
+static struct rte_eth_dev *
+virtio_user_eth_dev_alloc(const char *name)
+{
+	struct rte_eth_dev *eth_dev;
+	struct rte_eth_dev_data *data;
+	struct virtio_hw *hw;
+	struct virtio_user_dev *dev;
+
+	eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_VIRTUAL);
+	if (!eth_dev) {
+		PMD_INIT_LOG(ERR, "cannot alloc rte_eth_dev");
+		return NULL;
+	}
+
+	data = eth_dev->data;
+
+	hw = rte_zmalloc(NULL, sizeof(*hw), 0);
+	if (!hw) {
+		PMD_INIT_LOG(ERR, "malloc virtio_hw failed");
+		rte_eth_dev_release_port(eth_dev);
+		return NULL;
+	}
+
+	dev = rte_zmalloc(NULL, sizeof(*dev), 0);
+	if (!dev) {
+		PMD_INIT_LOG(ERR, "malloc virtio_user_dev failed");
+		rte_eth_dev_release_port(eth_dev);
+		rte_free(hw);
+		return NULL;
+	}
+
+	hw->vtpci_ops = &virtio_user_ops;
+	hw->use_msix = 0;
+	hw->modern   = 0;
+	hw->virtio_user_dev = dev;
+	data->dev_private = hw;
+	data->numa_node = SOCKET_ID_ANY;
+	data->kdrv = RTE_KDRV_NONE;
+	data->dev_flags = RTE_ETH_DEV_DETACHABLE;
+	eth_dev->pci_dev = NULL;
+	eth_dev->driver = NULL;
+	return eth_dev;
+}
+
+/* Dev initialization routine. Invoked once for each virtio vdev at
+ * EAL init time, see rte_eal_dev_init().
+ * Returns 0 on success.
+ */
+static int
+virtio_user_pmd_devinit(const char *name, const char *params)
+{
+	struct rte_kvargs *kvlist;
+	struct rte_eth_dev *eth_dev;
+	struct virtio_hw *hw;
+	uint64_t queues = VIRTIO_USER_DEF_Q_NUM;
+	uint64_t cq = VIRTIO_USER_DEF_CQ_EN;
+	uint64_t queue_size = VIRTIO_USER_DEF_Q_SZ;
+	char *path = NULL;
+	char *mac_addr = NULL;
+	int ret = -1;
+
+	if (!params || params[0] == '\0') {
+		PMD_INIT_LOG(ERR, "arg %s is mandatory for virtio-user",
+			  VIRTIO_USER_ARG_QUEUE_SIZE);
+		goto end;
+	}
+
+	kvlist = rte_kvargs_parse(params, valid_args);
+	if (!kvlist) {
+		PMD_INIT_LOG(ERR, "error when parsing param");
+		goto end;
+	}
+
+	if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_PATH) == 1)
+		rte_kvargs_process(kvlist, VIRTIO_USER_ARG_PATH,
+				   &get_string_arg, &path);
+	else {
+		PMD_INIT_LOG(ERR, "arg %s is mandatory for virtio-user\n",
+			  VIRTIO_USER_ARG_QUEUE_SIZE);
+		goto end;
+	}
+
+	if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_MAC) == 1)
+		rte_kvargs_process(kvlist, VIRTIO_USER_ARG_MAC,
+				   &get_string_arg, &mac_addr);
+
+	if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_QUEUE_SIZE) == 1)
+		rte_kvargs_process(kvlist, VIRTIO_USER_ARG_QUEUE_SIZE,
+				   &get_integer_arg, &queue_size);
+
+	if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_QUEUES_NUM) == 1)
+		rte_kvargs_process(kvlist, VIRTIO_USER_ARG_QUEUES_NUM,
+				   &get_integer_arg, &queues);
+
+	if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_CQ_NUM) == 1)
+		rte_kvargs_process(kvlist, VIRTIO_USER_ARG_CQ_NUM,
+				   &get_integer_arg, &cq);
+	else if (queues > 1)
+		cq = 1;
+
+	if (queues > 1 && cq == 0) {
+		PMD_INIT_LOG(ERR, "multi-q requires ctrl-q");
+		goto end;
+	}
+
+	eth_dev = virtio_user_eth_dev_alloc(name);
+	if (!eth_dev) {
+		PMD_INIT_LOG(ERR, "virtio-user fails to alloc device");
+		goto end;
+	}
+
+	hw = eth_dev->data->dev_private;
+	if (virtio_user_dev_init(hw->virtio_user_dev, path, queues, cq,
+				 queue_size, mac_addr) < 0)
+		goto end;
+
+	/* previously called by rte_eal_pci_probe() for physical dev */
+	if (eth_virtio_dev_init(eth_dev) < 0) {
+		PMD_INIT_LOG(ERR, "eth_virtio_dev_init fails");
+		goto end;
+	}
+	ret = 0;
+
+end:
+	if (path)
+		free(path);
+	if (mac_addr)
+		free(mac_addr);
+	return ret;
+}
+
+/** Called by rte_eth_dev_detach() */
+static int
+virtio_user_pmd_devuninit(const char *name)
+{
+	struct rte_eth_dev *eth_dev;
+	struct virtio_hw *hw;
+	struct virtio_user_dev *dev;
+
+	if (!name)
+		return -EINVAL;
+
+	PMD_DRV_LOG(INFO, "Un-Initializing %s\n", name);
+	eth_dev = rte_eth_dev_allocated(name);
+	if (!eth_dev)
+		return -ENODEV;
+
+	/* make sure the device is stopped, queues freed */
+	rte_eth_dev_close(eth_dev->data->port_id);
+
+	hw = eth_dev->data->dev_private;
+	dev = hw->virtio_user_dev;
+	virtio_user_dev_uninit(dev);
+
+	rte_free(eth_dev->data->dev_private);
+	rte_free(eth_dev->data);
+	rte_eth_dev_release_port(eth_dev);
+
+	return 0;
+}
+
+static struct rte_driver virtio_user_driver = {
+	.name   = "virtio-user",
+	.type   = PMD_VDEV,
+	.init   = virtio_user_pmd_devinit,
+	.uninit = virtio_user_pmd_devuninit,
+};
+
+PMD_REGISTER_DRIVER(virtio_user_driver);
diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
index 4e9239e0..455aaafe 100644
--- a/drivers/net/virtio/virtqueue.h
+++ b/drivers/net/virtio/virtqueue.h
@@ -66,6 +66,14 @@ struct rte_mbuf;
 
 #define VIRTQUEUE_MAX_NAME_SZ 32
 
+#ifdef RTE_VIRTIO_USER
+#define MBUF_DATA_DMA_ADDR(mb, offset) \
+	((uint64_t)((uintptr_t)(*(void **)((uintptr_t)mb + offset)) \
+			+ (mb)->data_off))
+#else /* RTE_VIRTIO_USER */
+#define MBUF_DATA_DMA_ADDR(mb, offset) rte_mbuf_data_dma_addr(mb)
+#endif /* RTE_VIRTIO_USER */
+
 #define VTNET_SQ_RQ_QUEUE_IDX 0
 #define VTNET_SQ_TQ_QUEUE_IDX 1
 #define VTNET_SQ_CQ_QUEUE_IDX 2
@@ -153,23 +161,30 @@ struct virtio_pmd_ctrl {
 	uint8_t data[VIRTIO_MAX_CTRL_DATA];
 };
 
+struct vq_desc_extra {
+	void *cookie;
+	uint16_t ndescs;
+};
+
 struct virtqueue {
-	struct virtio_hw         *hw;     /**< virtio_hw structure pointer. */
-	const struct rte_memzone *mz;     /**< mem zone to populate RX ring. */
-	const struct rte_memzone *virtio_net_hdr_mz; /**< memzone to populate hdr. */
-	struct rte_mempool       *mpool;  /**< mempool for mbuf allocation */
-	uint16_t    queue_id;             /**< DPDK queue index. */
-	uint8_t     port_id;              /**< Device port identifier. */
-	uint16_t    vq_queue_index;       /**< PCI queue index */
-
-	void        *vq_ring_virt_mem;    /**< linear address of vring*/
+	struct virtio_hw  *hw; /**< virtio_hw structure pointer. */
+	struct vring vq_ring;  /**< vring keeping desc, used and avail */
+	/**
+	 * Last consumed descriptor in the used table,
+	 * trails vq_ring.used->idx.
+	 */
+	uint16_t vq_used_cons_idx;
+	uint16_t vq_nentries;  /**< vring desc numbers */
+	uint16_t vq_free_cnt;  /**< num of desc available */
+	uint16_t vq_avail_idx; /**< sync until needed */
+	uint16_t vq_free_thresh; /**< free threshold */
+
+	void *vq_ring_virt_mem;  /**< linear address of vring*/
 	unsigned int vq_ring_size;
-	phys_addr_t vq_ring_mem;          /**< physical address of vring */
 
-	struct vring vq_ring;    /**< vring keeping desc, used and avail */
-	uint16_t    vq_free_cnt; /**< num of desc available */
-	uint16_t    vq_nentries; /**< vring desc numbers */
-	uint16_t    vq_free_thresh; /**< free threshold */
+	phys_addr_t vq_ring_mem; /**< physical address of vring */
+				/**< use virtual address for virtio-user. */
+
 	/**
 	 * Head of the free chain in the descriptor table. If
 	 * there are no free descriptors, this will be set to
@@ -177,34 +192,12 @@ struct virtqueue {
 	 */
 	uint16_t  vq_desc_head_idx;
 	uint16_t  vq_desc_tail_idx;
-	/**
-	 * Last consumed descriptor in the used table,
-	 * trails vq_ring.used->idx.
-	 */
-	uint16_t vq_used_cons_idx;
-	uint16_t vq_avail_idx;
-	uint64_t mbuf_initializer; /**< value to init mbufs. */
-	phys_addr_t virtio_net_hdr_mem; /**< hdr for each xmit packet */
-
-	struct rte_mbuf **sw_ring; /**< RX software ring. */
-	/* dummy mbuf, for wraparound when processing RX ring. */
-	struct rte_mbuf fake_mbuf;
-
-	/* Statistics */
-	uint64_t	packets;
-	uint64_t	bytes;
-	uint64_t	errors;
-	uint64_t	multicast;
-	uint64_t	broadcast;
-	/* Size bins in array as RFC 2819, undersized [0], 64 [1], etc */
-	uint64_t	size_bins[8];
-
-	uint16_t	*notify_addr;
-
-	struct vq_desc_extra {
-		void              *cookie;
-		uint16_t          ndescs;
-	} vq_descx[0];
+	uint16_t  vq_queue_index;   /**< PCI queue index */
+	uint16_t offset; /**< relative offset to obtain addr in mbuf */
+	uint16_t  *notify_addr;
+	int configured;
+	struct rte_mbuf **sw_ring;  /**< RX software ring. */
+	struct vq_desc_extra vq_descx[0];
 };
 
 /* If multiqueue is provided by host, then we suppport it. */
@@ -302,7 +295,8 @@ vq_update_avail_ring(struct virtqueue *vq, uint16_t desc_idx)
 	 * descriptor.
 	 */
 	avail_idx = (uint16_t)(vq->vq_avail_idx & (vq->vq_nentries - 1));
-	vq->vq_ring.avail->ring[avail_idx] = desc_idx;
+	if (unlikely(vq->vq_ring.avail->ring[avail_idx] != desc_idx))
+		vq->vq_ring.avail->ring[avail_idx] = desc_idx;
 	vq->vq_avail_idx++;
 }