New upstream version 18.08upstream/18.08

Change-Id: I32fdf5e5016556d9c0a6d88ddaf1fc468961790a Signed-off-by: Luca Boccassi <luca.boccassi@gmail.com>
author: Luca Boccassi <luca.boccassi@gmail.com> 2018-08-14 18:52:30 +0100
committer: Luca Boccassi <luca.boccassi@gmail.com> 2018-08-14 18:53:17 +0100
commit: b63264c8342e6a1b6971c79550d2af2024b6a4de (patch)
tree: 83114aac64286fe616506c0b3dfaec2ab86ef835 /lib/librte_vhost
parent: ca33590b6af032bff57d9cc70455660466a654b2 (diff)
19 files changed, 4758 insertions, 789 deletions
diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
index 5d6c6aba..de431fbb 100644
--- a/lib/librte_vhost/Makefile
+++ b/lib/librte_vhost/Makefile
@@ -18,13 +18,20 @@ LDLIBS += -lpthread
 ifeq ($(CONFIG_RTE_LIBRTE_VHOST_NUMA),y)
 LDLIBS += -lnuma
 endif
-LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_ethdev -lrte_net
+LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_ethdev -lrte_net \
+					-lrte_cryptodev -lrte_hash
 
 # all source are stored in SRCS-y
 SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := fd_man.c iotlb.c socket.c vhost.c \
-					vhost_user.c virtio_net.c
+					vhost_user.c virtio_net.c vdpa.c
 
 # install includes
-SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_vhost.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_vhost.h rte_vdpa.h
+
+# only compile vhost crypto when cryptodev is enabled
+ifeq ($(CONFIG_RTE_LIBRTE_CRYPTODEV),y)
+SRCS-$(CONFIG_RTE_LIBRTE_VHOST) += vhost_crypto.c
+SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_vhost_crypto.h
+endif
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_vhost/fd_man.c b/lib/librte_vhost/fd_man.c
index 181711c2..38347ab1 100644
--- a/lib/librte_vhost/fd_man.c
+++ b/lib/librte_vhost/fd_man.c
@@ -16,6 +16,9 @@
 
 #include "fd_man.h"
 
+
+#define RTE_LOGTYPE_VHOST_FDMAN RTE_LOGTYPE_USER1
+
 #define FDPOLLERR (POLLERR | POLLHUP | POLLNVAL)
 
 static int
@@ -171,6 +174,38 @@ fdset_del(struct fdset *pfdset, int fd)
 	return dat;
 }
 
+/**
+ *  Unregister the fd from the fdset.
+ *
+ *  If parameters are invalid, return directly -2.
+ *  And check whether fd is busy, if yes, return -1.
+ *  Otherwise, try to delete the fd from fdset and
+ *  return true.
+ */
+int
+fdset_try_del(struct fdset *pfdset, int fd)
+{
+	int i;
+
+	if (pfdset == NULL || fd == -1)
+		return -2;
+
+	pthread_mutex_lock(&pfdset->fd_mutex);
+	i = fdset_find_fd(pfdset, fd);
+	if (i != -1 && pfdset->fd[i].busy) {
+		pthread_mutex_unlock(&pfdset->fd_mutex);
+		return -1;
+	}
+
+	if (i != -1) {
+		pfdset->fd[i].fd = -1;
+		pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL;
+		pfdset->fd[i].dat = NULL;
+	}
+
+	pthread_mutex_unlock(&pfdset->fd_mutex);
+	return 0;
+}
 
 /**
  * This functions runs in infinite blocking loop until there is no fd in
@@ -258,7 +293,7 @@ fdset_event_dispatch(void *arg)
 			 * because the fd is closed in the cb,
 			 * the old fd val could be reused by when creates new
 			 * listen fd in another thread, we couldn't call
-			 * fd_set_del.
+			 * fdset_del.
 			 */
 			if (remove1 || remove2) {
 				pfdentry->fd = -1;
@@ -272,3 +307,64 @@ fdset_event_dispatch(void *arg)
 
 	return NULL;
 }
+
+static void
+fdset_pipe_read_cb(int readfd, void *dat __rte_unused,
+		   int *remove __rte_unused)
+{
+	char charbuf[16];
+	int r = read(readfd, charbuf, sizeof(charbuf));
+	/*
+	 * Just an optimization, we don't care if read() failed
+	 * so ignore explicitly its return value to make the
+	 * compiler happy
+	 */
+	RTE_SET_USED(r);
+}
+
+void
+fdset_pipe_uninit(struct fdset *fdset)
+{
+	fdset_del(fdset, fdset->u.readfd);
+	close(fdset->u.readfd);
+	close(fdset->u.writefd);
+}
+
+int
+fdset_pipe_init(struct fdset *fdset)
+{
+	int ret;
+
+	if (pipe(fdset->u.pipefd) < 0) {
+		RTE_LOG(ERR, VHOST_FDMAN,
+			"failed to create pipe for vhost fdset\n");
+		return -1;
+	}
+
+	ret = fdset_add(fdset, fdset->u.readfd,
+			fdset_pipe_read_cb, NULL, NULL);
+
+	if (ret < 0) {
+		RTE_LOG(ERR, VHOST_FDMAN,
+			"failed to add pipe readfd %d into vhost server fdset\n",
+			fdset->u.readfd);
+
+		fdset_pipe_uninit(fdset);
+		return -1;
+	}
+
+	return 0;
+}
+
+void
+fdset_pipe_notify(struct fdset *fdset)
+{
+	int r = write(fdset->u.writefd, "1", 1);
+	/*
+	 * Just an optimization, we don't care if write() failed
+	 * so ignore explicitly its return value to make the
+	 * compiler happy
+	 */
+	RTE_SET_USED(r);
+
+}
diff --git a/lib/librte_vhost/fd_man.h b/lib/librte_vhost/fd_man.h
index 3a9276c3..3331bcd9 100644
--- a/lib/librte_vhost/fd_man.h
+++ b/lib/librte_vhost/fd_man.h
@@ -25,6 +25,16 @@ struct fdset {
 	struct fdentry fd[MAX_FDS];
 	pthread_mutex_t fd_mutex;
 	int num;	/* current fd number of this fdset */
+
+	union pipefds {
+		struct {
+			int pipefd[2];
+		};
+		struct {
+			int readfd;
+			int writefd;
+		};
+	} u;
 };
 
 
@@ -34,7 +44,14 @@ int fdset_add(struct fdset *pfdset, int fd,
 	fd_cb rcb, fd_cb wcb, void *dat);
 
 void *fdset_del(struct fdset *pfdset, int fd);
+int fdset_try_del(struct fdset *pfdset, int fd);
 
 void *fdset_event_dispatch(void *arg);
 
+int fdset_pipe_init(struct fdset *fdset);
+
+void fdset_pipe_uninit(struct fdset *fdset);
+
+void fdset_pipe_notify(struct fdset *fdset);
+
 #endif
diff --git a/lib/librte_vhost/iotlb.c b/lib/librte_vhost/iotlb.c
index c11ebcaa..c6354fef 100644
--- a/lib/librte_vhost/iotlb.c
+++ b/lib/librte_vhost/iotlb.c
@@ -303,6 +303,13 @@ out:
 	return vva;
 }
 
+void
+vhost_user_iotlb_flush_all(struct vhost_virtqueue *vq)
+{
+	vhost_user_iotlb_cache_remove_all(vq);
+	vhost_user_iotlb_pending_remove_all(vq);
+}
+
 int
 vhost_user_iotlb_init(struct virtio_net *dev, int vq_index)
 {
@@ -315,8 +322,7 @@ vhost_user_iotlb_init(struct virtio_net *dev, int vq_index)
 		 * The cache has already been initialized,
 		 * just drop all cached and pending entries.
 		 */
-		vhost_user_iotlb_cache_remove_all(vq);
-		vhost_user_iotlb_pending_remove_all(vq);
+		vhost_user_iotlb_flush_all(vq);
 	}
 
 #ifdef RTE_LIBRTE_VHOST_NUMA
diff --git a/lib/librte_vhost/iotlb.h b/lib/librte_vhost/iotlb.h
index e7083e37..60b9e4c5 100644
--- a/lib/librte_vhost/iotlb.h
+++ b/lib/librte_vhost/iotlb.h
@@ -73,7 +73,7 @@ void vhost_user_iotlb_pending_insert(struct vhost_virtqueue *vq, uint64_t iova,
 						uint8_t perm);
 void vhost_user_iotlb_pending_remove(struct vhost_virtqueue *vq, uint64_t iova,
 						uint64_t size, uint8_t perm);
-
+void vhost_user_iotlb_flush_all(struct vhost_virtqueue *vq);
 int vhost_user_iotlb_init(struct virtio_net *dev, int vq_index);
 
 #endif /* _VHOST_IOTLB_H_ */
diff --git a/lib/librte_vhost/meson.build b/lib/librte_vhost/meson.build
index 9e8c0e76..bd62e0e3 100644
--- a/lib/librte_vhost/meson.build
+++ b/lib/librte_vhost/meson.build
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: BSD-3-Clause
-# Copyright(c) 2017 Intel Corporation
+# Copyright(c) 2017-2018 Intel Corporation
 
 if host_machine.system() != 'linux'
 	build = false
@@ -9,7 +9,8 @@ if has_libnuma == 1
 endif
 version = 4
 allow_experimental_apis = true
-sources = files('fd_man.c', 'iotlb.c', 'socket.c', 'vhost.c', 'vhost_user.c',
-		'virtio_net.c')
-headers = files('rte_vhost.h')
-deps += ['ethdev']
+sources = files('fd_man.c', 'iotlb.c', 'socket.c', 'vdpa.c',
+		'vhost.c', 'vhost_user.c',
+		'virtio_net.c', 'vhost_crypto.c')
+headers = files('rte_vhost.h', 'rte_vdpa.h', 'rte_vhost_crypto.h')
+deps += ['ethdev', 'cryptodev', 'hash', 'pci']
diff --git a/lib/librte_vhost/rte_vdpa.h b/lib/librte_vhost/rte_vdpa.h
new file mode 100644
index 00000000..90465ca2
--- /dev/null
+++ b/lib/librte_vhost/rte_vdpa.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _RTE_VDPA_H_
+#define _RTE_VDPA_H_
+
+/**
+ * @file
+ *
+ * Device specific vhost lib
+ */
+
+#include <rte_pci.h>
+#include "rte_vhost.h"
+
+#define MAX_VDPA_NAME_LEN 128
+
+enum vdpa_addr_type {
+	PCI_ADDR,
+	VDPA_ADDR_MAX
+};
+
+struct rte_vdpa_dev_addr {
+	enum vdpa_addr_type type;
+	union {
+		uint8_t __dummy[64];
+		struct rte_pci_addr pci_addr;
+	};
+};
+
+struct rte_vdpa_dev_ops {
+	/* Get capabilities of this device */
+	int (*get_queue_num)(int did, uint32_t *queue_num);
+	int (*get_features)(int did, uint64_t *features);
+	int (*get_protocol_features)(int did, uint64_t *protocol_features);
+
+	/* Driver configure/close the device */
+	int (*dev_conf)(int vid);
+	int (*dev_close)(int vid);
+
+	/* Enable/disable this vring */
+	int (*set_vring_state)(int vid, int vring, int state);
+
+	/* Set features when changed */
+	int (*set_features)(int vid);
+
+	/* Destination operations when migration done */
+	int (*migration_done)(int vid);
+
+	/* Get the vfio group fd */
+	int (*get_vfio_group_fd)(int vid);
+
+	/* Get the vfio device fd */
+	int (*get_vfio_device_fd)(int vid);
+
+	/* Get the notify area info of the queue */
+	int (*get_notify_area)(int vid, int qid,
+			uint64_t *offset, uint64_t *size);
+
+	/* Reserved for future extension */
+	void *reserved[5];
+};
+
+struct rte_vdpa_device {
+	struct rte_vdpa_dev_addr addr;
+	struct rte_vdpa_dev_ops *ops;
+} __rte_cache_aligned;
+
+/* Register a vdpa device, return did if successful, -1 on failure */
+int __rte_experimental
+rte_vdpa_register_device(struct rte_vdpa_dev_addr *addr,
+		struct rte_vdpa_dev_ops *ops);
+
+/* Unregister a vdpa device, return -1 on failure */
+int __rte_experimental
+rte_vdpa_unregister_device(int did);
+
+/* Find did of a vdpa device, return -1 on failure */
+int __rte_experimental
+rte_vdpa_find_device_id(struct rte_vdpa_dev_addr *addr);
+
+/* Find a vdpa device based on did */
+struct rte_vdpa_device * __rte_experimental
+rte_vdpa_get_device(int did);
+
+#endif /* _RTE_VDPA_H_ */
diff --git a/lib/librte_vhost/rte_vhost.h b/lib/librte_vhost/rte_vhost.h
index d3320699..b02673d4 100644
--- a/lib/librte_vhost/rte_vhost.h
+++ b/lib/librte_vhost/rte_vhost.h
@@ -29,6 +29,48 @@ extern "C" {
 #define RTE_VHOST_USER_DEQUEUE_ZERO_COPY	(1ULL << 2)
 #define RTE_VHOST_USER_IOMMU_SUPPORT	(1ULL << 3)
 
+/** Protocol features. */
+#ifndef VHOST_USER_PROTOCOL_F_MQ
+#define VHOST_USER_PROTOCOL_F_MQ	0
+#endif
+
+#ifndef VHOST_USER_PROTOCOL_F_LOG_SHMFD
+#define VHOST_USER_PROTOCOL_F_LOG_SHMFD	1
+#endif
+
+#ifndef VHOST_USER_PROTOCOL_F_RARP
+#define VHOST_USER_PROTOCOL_F_RARP	2
+#endif
+
+#ifndef VHOST_USER_PROTOCOL_F_REPLY_ACK
+#define VHOST_USER_PROTOCOL_F_REPLY_ACK	3
+#endif
+
+#ifndef VHOST_USER_PROTOCOL_F_NET_MTU
+#define VHOST_USER_PROTOCOL_F_NET_MTU	4
+#endif
+
+#ifndef VHOST_USER_PROTOCOL_F_SLAVE_REQ
+#define VHOST_USER_PROTOCOL_F_SLAVE_REQ	5
+#endif
+
+#ifndef VHOST_USER_PROTOCOL_F_CRYPTO_SESSION
+#define VHOST_USER_PROTOCOL_F_CRYPTO_SESSION 7
+#endif
+
+#ifndef VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD
+#define VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD 10
+#endif
+
+#ifndef VHOST_USER_PROTOCOL_F_HOST_NOTIFIER
+#define VHOST_USER_PROTOCOL_F_HOST_NOTIFIER 11
+#endif
+
+/** Indicate whether protocol features negotiation is supported. */
+#ifndef VHOST_USER_F_PROTOCOL_FEATURES
+#define VHOST_USER_F_PROTOCOL_FEATURES	30
+#endif
+
 /**
  * Information relating to memory regions including offsets to
  * addresses in QEMUs memory file.
@@ -90,6 +132,11 @@ struct vhost_device_ops {
 /**
  * Convert guest physical address to host virtual address
  *
+ * This function is deprecated because unsafe.
+ * New rte_vhost_va_from_guest_pa() should be used instead to ensure
+ * guest physical ranges are fully and contiguously mapped into
+ * process virtual address space.
+ *
  * @param mem
  *  the guest memory regions
  * @param gpa
@@ -97,6 +144,7 @@ struct vhost_device_ops {
  * @return
  *  the host virtual address on success, 0 on failure
  */
+__rte_deprecated
 static __rte_always_inline uint64_t
 rte_vhost_gpa_to_vva(struct rte_vhost_memory *mem, uint64_t gpa)
 {
@@ -115,6 +163,46 @@ rte_vhost_gpa_to_vva(struct rte_vhost_memory *mem, uint64_t gpa)
 	return 0;
 }
 
+/**
+ * Convert guest physical address to host virtual address safely
+ *
+ * This variant of rte_vhost_gpa_to_vva() takes care all the
+ * requested length is mapped and contiguous in process address
+ * space.
+ *
+ * @param mem
+ *  the guest memory regions
+ * @param gpa
+ *  the guest physical address for querying
+ * @param len
+ *  the size of the requested area to map, updated with actual size mapped
+ * @return
+ *  the host virtual address on success, 0 on failure
+ */
+static __rte_always_inline uint64_t
+rte_vhost_va_from_guest_pa(struct rte_vhost_memory *mem,
+						   uint64_t gpa, uint64_t *len)
+{
+	struct rte_vhost_mem_region *r;
+	uint32_t i;
+
+	for (i = 0; i < mem->nregions; i++) {
+		r = &mem->regions[i];
+		if (gpa >= r->guest_phys_addr &&
+		    gpa <  r->guest_phys_addr + r->size) {
+
+			if (unlikely(*len > r->guest_phys_addr + r->size - gpa))
+				*len = r->guest_phys_addr + r->size - gpa;
+
+			return gpa - r->guest_phys_addr +
+			       r->host_user_addr;
+		}
+	}
+	*len = 0;
+
+	return 0;
+}
+
 #define RTE_VHOST_NEED_LOG(features)	((features) & (1ULL << VHOST_F_LOG_ALL))
 
 /**
@@ -170,6 +258,41 @@ int rte_vhost_driver_register(const char *path, uint64_t flags);
 int rte_vhost_driver_unregister(const char *path);
 
 /**
+ * Set the vdpa device id, enforce single connection per socket
+ *
+ * @param path
+ *  The vhost-user socket file path
+ * @param did
+ *  Device id
+ * @return
+ *  0 on success, -1 on failure
+ */
+int __rte_experimental
+rte_vhost_driver_attach_vdpa_device(const char *path, int did);
+
+/**
+ * Unset the vdpa device id
+ *
+ * @param path
+ *  The vhost-user socket file path
+ * @return
+ *  0 on success, -1 on failure
+ */
+int __rte_experimental
+rte_vhost_driver_detach_vdpa_device(const char *path);
+
+/**
+ * Get the device id
+ *
+ * @param path
+ *  The vhost-user socket file path
+ * @return
+ *  Device id, -1 on failure
+ */
+int __rte_experimental
+rte_vhost_driver_get_vdpa_device_id(const char *path);
+
+/**
  * Set the feature bits the vhost-user driver supports.
  *
  * @param path
@@ -225,6 +348,33 @@ int rte_vhost_driver_disable_features(const char *path, uint64_t features);
 int rte_vhost_driver_get_features(const char *path, uint64_t *features);
 
 /**
+ * Get the protocol feature bits before feature negotiation.
+ *
+ * @param path
+ *  The vhost-user socket file path
+ * @param protocol_features
+ *  A pointer to store the queried protocol feature bits
+ * @return
+ *  0 on success, -1 on failure
+ */
+int __rte_experimental
+rte_vhost_driver_get_protocol_features(const char *path,
+		uint64_t *protocol_features);
+
+/**
+ * Get the queue number bits before feature negotiation.
+ *
+ * @param path
+ *  The vhost-user socket file path
+ * @param queue_num
+ *  A pointer to store the queried queue number bits
+ * @return
+ *  0 on success, -1 on failure
+ */
+int __rte_experimental
+rte_vhost_driver_get_queue_num(const char *path, uint32_t *queue_num);
+
+/**
  * Get the feature bits after negotiation
  *
  * @param vid
@@ -434,6 +584,68 @@ int rte_vhost_vring_call(int vid, uint16_t vring_idx);
  */
 uint32_t rte_vhost_rx_queue_count(int vid, uint16_t qid);
 
+/**
+ * Get log base and log size of the vhost device
+ *
+ * @param vid
+ *  vhost device ID
+ * @param log_base
+ *  vhost log base
+ * @param log_size
+ *  vhost log size
+ * @return
+ *  0 on success, -1 on failure
+ */
+int __rte_experimental
+rte_vhost_get_log_base(int vid, uint64_t *log_base, uint64_t *log_size);
+
+/**
+ * Get last_avail/used_idx of the vhost virtqueue
+ *
+ * @param vid
+ *  vhost device ID
+ * @param queue_id
+ *  vhost queue index
+ * @param last_avail_idx
+ *  vhost last_avail_idx to get
+ * @param last_used_idx
+ *  vhost last_used_idx to get
+ * @return
+ *  0 on success, -1 on failure
+ */
+int __rte_experimental
+rte_vhost_get_vring_base(int vid, uint16_t queue_id,
+		uint16_t *last_avail_idx, uint16_t *last_used_idx);
+
+/**
+ * Set last_avail/used_idx of the vhost virtqueue
+ *
+ * @param vid
+ *  vhost device ID
+ * @param queue_id
+ *  vhost queue index
+ * @param last_avail_idx
+ *  last_avail_idx to set
+ * @param last_used_idx
+ *  last_used_idx to set
+ * @return
+ *  0 on success, -1 on failure
+ */
+int __rte_experimental
+rte_vhost_set_vring_base(int vid, uint16_t queue_id,
+		uint16_t last_avail_idx, uint16_t last_used_idx);
+
+/**
+ * Get vdpa device id for vhost device.
+ *
+ * @param vid
+ *  vhost device id
+ * @return
+ *  device id
+ */
+int __rte_experimental
+rte_vhost_get_vdpa_device_id(int vid);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_vhost/rte_vhost_crypto.h b/lib/librte_vhost/rte_vhost_crypto.h
new file mode 100644
index 00000000..f9fbc054
--- /dev/null
+++ b/lib/librte_vhost/rte_vhost_crypto.h
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017-2018 Intel Corporation
+ */
+
+#ifndef _VHOST_CRYPTO_H_
+#define _VHOST_CRYPTO_H_
+
+#define VHOST_CRYPTO_MBUF_POOL_SIZE		(8192)
+#define VHOST_CRYPTO_MAX_BURST_SIZE		(64)
+#define VHOST_CRYPTO_SESSION_MAP_ENTRIES	(1024) /**< Max nb sessions */
+/** max nb virtual queues in a burst for finalizing*/
+#define VIRTIO_CRYPTO_MAX_NUM_BURST_VQS		(64)
+
+enum rte_vhost_crypto_zero_copy {
+	RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE = 0,
+	RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE = 1,
+	RTE_VHOST_CRYPTO_MAX_ZERO_COPY_OPTIONS
+};
+
+/**
+ *  Create Vhost-crypto instance
+ *
+ * @param vid
+ *  The identifier of the vhost device.
+ * @param cryptodev_id
+ *  The identifier of DPDK Cryptodev, the same cryptodev_id can be assigned to
+ *  multiple Vhost-crypto devices.
+ * @param sess_pool
+ *  The pointer to the created cryptodev session pool with the private data size
+ *  matches the target DPDK Cryptodev.
+ * @param socket_id
+ *  NUMA Socket ID to allocate resources on. *
+ * @return
+ *  0 if the Vhost Crypto Instance is created successfully.
+ *  Negative integer if otherwise
+ */
+int __rte_experimental
+rte_vhost_crypto_create(int vid, uint8_t cryptodev_id,
+		struct rte_mempool *sess_pool, int socket_id);
+
+/**
+ *  Free the Vhost-crypto instance
+ *
+ * @param vid
+ *  The identifier of the vhost device.
+ * @return
+ *  0 if the Vhost Crypto Instance is created successfully.
+ *  Negative integer if otherwise.
+ */
+int __rte_experimental
+rte_vhost_crypto_free(int vid);
+
+/**
+ *  Enable or disable zero copy feature
+ *
+ * @param vid
+ *  The identifier of the vhost device.
+ * @param option
+ *  Flag of zero copy feature.
+ * @return
+ *  0 if completed successfully.
+ *  Negative integer if otherwise.
+ */
+int __rte_experimental
+rte_vhost_crypto_set_zero_copy(int vid, enum rte_vhost_crypto_zero_copy option);
+
+/**
+ * Fetch a number of vring descriptors from virt-queue and translate to DPDK
+ * crypto operations. After this function is executed, the user can enqueue
+ * the processed ops to the target cryptodev.
+ *
+ * @param vid
+ *  The identifier of the vhost device.
+ * @param qid
+ *  Virtio queue index.
+ * @param ops
+ *  The address of an array of pointers to *rte_crypto_op* structures that must
+ *  be large enough to store *nb_ops* pointers in it.
+ * @param nb_ops
+ *  The maximum number of operations to be fetched and translated.
+ * @return
+ *  The number of fetched and processed vhost crypto request operations.
+ */
+uint16_t __rte_experimental
+rte_vhost_crypto_fetch_requests(int vid, uint32_t qid,
+		struct rte_crypto_op **ops, uint16_t nb_ops);
+/**
+ * Finalize the dequeued crypto ops. After the translated crypto ops are
+ * dequeued from the cryptodev, this function shall be called to write the
+ * processed data back to the vring descriptor (if no-copy is turned off).
+ *
+ * @param ops
+ *  The address of an array of *rte_crypto_op* structure that was dequeued
+ *  from cryptodev.
+ * @param nb_ops
+ *  The number of operations contained in the array.
+ * @callfds
+ *  The callfd number(s) contained in this burst, this shall be an array with
+ *  no less than VIRTIO_CRYPTO_MAX_NUM_BURST_VQS elements.
+ * @nb_callfds
+ *  The number of call_fd numbers exist in the callfds.
+ * @return
+ *  The number of ops processed.
+ */
+uint16_t __rte_experimental
+rte_vhost_crypto_finalize_requests(struct rte_crypto_op **ops,
+		uint16_t nb_ops, int *callfds, uint16_t *nb_callfds);
+
+#endif /**< _VHOST_CRYPTO_H_ */
diff --git a/lib/librte_vhost/rte_vhost_version.map b/lib/librte_vhost/rte_vhost_version.map
index df010312..da220dd0 100644
--- a/lib/librte_vhost/rte_vhost_version.map
+++ b/lib/librte_vhost/rte_vhost_version.map
@@ -59,3 +59,27 @@ DPDK_18.02 {
 	rte_vhost_vring_call;
 
 } DPDK_17.08;
+
+EXPERIMENTAL {
+	global:
+
+	rte_vdpa_register_device;
+	rte_vdpa_unregister_device;
+	rte_vdpa_find_device_id;
+	rte_vdpa_get_device;
+	rte_vhost_driver_attach_vdpa_device;
+	rte_vhost_driver_detach_vdpa_device;
+	rte_vhost_driver_get_vdpa_device_id;
+	rte_vhost_get_vdpa_device_id;
+	rte_vhost_driver_get_protocol_features;
+	rte_vhost_driver_get_queue_num;
+	rte_vhost_get_log_base;
+	rte_vhost_get_vring_base;
+	rte_vhost_set_vring_base;
+	rte_vhost_crypto_create;
+	rte_vhost_crypto_free;
+	rte_vhost_crypto_fetch_requests;
+	rte_vhost_crypto_finalize_requests;
+	rte_vhost_crypto_set_zero_copy;
+	rte_vhost_va_from_guest_pa;
+};
diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c
index 83befdce..d6303174 100644
--- a/lib/librte_vhost/socket.c
+++ b/lib/librte_vhost/socket.c
@@ -4,7 +4,6 @@
 
 #include <stdint.h>
 #include <stdio.h>
-#include <stdbool.h>
 #include <limits.h>
 #include <stdlib.h>
 #include <unistd.h>
@@ -52,6 +51,13 @@ struct vhost_user_socket {
 	uint64_t supported_features;
 	uint64_t features;
 
+	/*
+	 * Device id to identify a specific backend device.
+	 * It's set to -1 for the default software implementation.
+	 * If valid, one socket can have 1 connection only.
+	 */
+	int vdpa_dev_id;
+
 	struct vhost_device_ops const *notify_ops;
 };
 
@@ -97,6 +103,7 @@ read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
 	size_t fdsize = fd_num * sizeof(int);
 	char control[CMSG_SPACE(fdsize)];
 	struct cmsghdr *cmsg;
+	int got_fds = 0;
 	int ret;
 
 	memset(&msgh, 0, sizeof(msgh));
@@ -123,11 +130,16 @@ read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
 		cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
 		if ((cmsg->cmsg_level == SOL_SOCKET) &&
 			(cmsg->cmsg_type == SCM_RIGHTS)) {
-			memcpy(fds, CMSG_DATA(cmsg), fdsize);
+			got_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
+			memcpy(fds, CMSG_DATA(cmsg), got_fds * sizeof(int));
 			break;
 		}
 	}
 
+	/* Clear out unused file descriptors */
+	while (got_fds < fd_num)
+		fds[got_fds++] = -1;
+
 	return ret;
 }
 
@@ -153,6 +165,11 @@ send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
 		msgh.msg_control = control;
 		msgh.msg_controllen = sizeof(control);
 		cmsg = CMSG_FIRSTHDR(&msgh);
+		if (cmsg == NULL) {
+			RTE_LOG(ERR, VHOST_CONFIG, "cmsg == NULL\n");
+			errno = EINVAL;
+			return -1;
+		}
 		cmsg->cmsg_len = CMSG_LEN(fdsize);
 		cmsg->cmsg_level = SOL_SOCKET;
 		cmsg->cmsg_type = SCM_RIGHTS;
@@ -163,7 +180,7 @@ send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
 	}
 
 	do {
-		ret = sendmsg(sockfd, &msgh, 0);
+		ret = sendmsg(sockfd, &msgh, MSG_NOSIGNAL);
 	} while (ret < 0 && errno == EINTR);
 
 	if (ret < 0) {
@@ -182,6 +199,9 @@ vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
 	struct vhost_user_connection *conn;
 	int ret;
 
+	if (vsocket == NULL)
+		return;
+
 	conn = malloc(sizeof(*conn));
 	if (conn == NULL) {
 		close(fd);
@@ -198,6 +218,8 @@ vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
 
 	vhost_set_builtin_virtio_net(vid, vsocket->use_builtin_virtio_net);
 
+	vhost_attach_vdpa_device(vid, vsocket->vdpa_dev_id);
+
 	if (vsocket->dequeue_zero_copy)
 		vhost_enable_dequeue_zero_copy(vid);
 
@@ -232,6 +254,8 @@ vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
 	pthread_mutex_lock(&vsocket->conn_mutex);
 	TAILQ_INSERT_TAIL(&vsocket->conn_list, conn, next);
 	pthread_mutex_unlock(&vsocket->conn_mutex);
+
+	fdset_pipe_notify(&vhost_user.fdset);
 	return;
 
 err:
@@ -318,6 +342,16 @@ vhost_user_start_server(struct vhost_user_socket *vsocket)
 	int fd = vsocket->socket_fd;
 	const char *path = vsocket->path;
 
+	/*
+	 * bind () may fail if the socket file with the same name already
+	 * exists. But the library obviously should not delete the file
+	 * provided by the user, since we can not be sure that it is not
+	 * being used by other applications. Moreover, many applications form
+	 * socket names based on user input, which is prone to errors.
+	 *
+	 * The user must ensure that the socket does not exist before
+	 * registering the vhost driver in server mode.
+	 */
 	ret = bind(fd, (struct sockaddr *)&vsocket->un, sizeof(vsocket->un));
 	if (ret < 0) {
 		RTE_LOG(ERR, VHOST_CONFIG,
@@ -436,7 +470,6 @@ static int
 vhost_user_reconnect_init(void)
 {
 	int ret;
-	char thread_name[RTE_MAX_THREAD_NAME_LEN];
 
 	ret = pthread_mutex_init(&reconn_list.mutex, NULL);
 	if (ret < 0) {
@@ -445,7 +478,7 @@ vhost_user_reconnect_init(void)
 	}
 	TAILQ_INIT(&reconn_list.head);
 
-	ret = pthread_create(&reconn_tid, NULL,
+	ret = rte_ctrl_thread_create(&reconn_tid, "vhost_reconn", NULL,
 			     vhost_user_client_reconnect, NULL);
 	if (ret != 0) {
 		RTE_LOG(ERR, VHOST_CONFIG, "failed to create reconnect thread");
@@ -453,14 +486,6 @@ vhost_user_reconnect_init(void)
 			RTE_LOG(ERR, VHOST_CONFIG,
 				"failed to destroy reconnect mutex");
 		}
-	} else {
-		snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN,
-			 "vhost-reconn");
-
-		if (rte_thread_setname(reconn_tid, thread_name)) {
-			RTE_LOG(DEBUG, VHOST_CONFIG,
-				"failed to set reconnect thread name");
-		}
 	}
 
 	return ret;
@@ -524,6 +549,52 @@ find_vhost_user_socket(const char *path)
 }
 
 int
+rte_vhost_driver_attach_vdpa_device(const char *path, int did)
+{
+	struct vhost_user_socket *vsocket;
+
+	if (rte_vdpa_get_device(did) == NULL)
+		return -1;
+
+	pthread_mutex_lock(&vhost_user.mutex);
+	vsocket = find_vhost_user_socket(path);
+	if (vsocket)
+		vsocket->vdpa_dev_id = did;
+	pthread_mutex_unlock(&vhost_user.mutex);
+
+	return vsocket ? 0 : -1;
+}
+
+int
+rte_vhost_driver_detach_vdpa_device(const char *path)
+{
+	struct vhost_user_socket *vsocket;
+
+	pthread_mutex_lock(&vhost_user.mutex);
+	vsocket = find_vhost_user_socket(path);
+	if (vsocket)
+		vsocket->vdpa_dev_id = -1;
+	pthread_mutex_unlock(&vhost_user.mutex);
+
+	return vsocket ? 0 : -1;
+}
+
+int
+rte_vhost_driver_get_vdpa_device_id(const char *path)
+{
+	struct vhost_user_socket *vsocket;
+	int did = -1;
+
+	pthread_mutex_lock(&vhost_user.mutex);
+	vsocket = find_vhost_user_socket(path);
+	if (vsocket)
+		did = vsocket->vdpa_dev_id;
+	pthread_mutex_unlock(&vhost_user.mutex);
+
+	return did;
+}
+
+int
 rte_vhost_driver_disable_features(const char *path, uint64_t features)
 {
 	struct vhost_user_socket *vsocket;
@@ -591,19 +662,136 @@ int
 rte_vhost_driver_get_features(const char *path, uint64_t *features)
 {
 	struct vhost_user_socket *vsocket;
+	uint64_t vdpa_features;
+	struct rte_vdpa_device *vdpa_dev;
+	int did = -1;
+	int ret = 0;
 
 	pthread_mutex_lock(&vhost_user.mutex);
 	vsocket = find_vhost_user_socket(path);
-	if (vsocket)
+	if (!vsocket) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"socket file %s is not registered yet.\n", path);
+		ret = -1;
+		goto unlock_exit;
+	}
+
+	did = vsocket->vdpa_dev_id;
+	vdpa_dev = rte_vdpa_get_device(did);
+	if (!vdpa_dev || !vdpa_dev->ops->get_features) {
 		*features = vsocket->features;
+		goto unlock_exit;
+	}
+
+	if (vdpa_dev->ops->get_features(did, &vdpa_features) < 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+				"failed to get vdpa features "
+				"for socket file %s.\n", path);
+		ret = -1;
+		goto unlock_exit;
+	}
+
+	*features = vsocket->features & vdpa_features;
+
+unlock_exit:
 	pthread_mutex_unlock(&vhost_user.mutex);
+	return ret;
+}
+
+int
+rte_vhost_driver_get_protocol_features(const char *path,
+		uint64_t *protocol_features)
+{
+	struct vhost_user_socket *vsocket;
+	uint64_t vdpa_protocol_features;
+	struct rte_vdpa_device *vdpa_dev;
+	int did = -1;
+	int ret = 0;
 
+	pthread_mutex_lock(&vhost_user.mutex);
+	vsocket = find_vhost_user_socket(path);
 	if (!vsocket) {
 		RTE_LOG(ERR, VHOST_CONFIG,
 			"socket file %s is not registered yet.\n", path);
-		return -1;
-	} else {
-		return 0;
+		ret = -1;
+		goto unlock_exit;
+	}
+
+	did = vsocket->vdpa_dev_id;
+	vdpa_dev = rte_vdpa_get_device(did);
+	if (!vdpa_dev || !vdpa_dev->ops->get_protocol_features) {
+		*protocol_features = VHOST_USER_PROTOCOL_FEATURES;
+		goto unlock_exit;
+	}
+
+	if (vdpa_dev->ops->get_protocol_features(did,
+				&vdpa_protocol_features) < 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+				"failed to get vdpa protocol features "
+				"for socket file %s.\n", path);
+		ret = -1;
+		goto unlock_exit;
+	}
+
+	*protocol_features = VHOST_USER_PROTOCOL_FEATURES
+		& vdpa_protocol_features;
+
+unlock_exit:
+	pthread_mutex_unlock(&vhost_user.mutex);
+	return ret;
+}
+
+int
+rte_vhost_driver_get_queue_num(const char *path, uint32_t *queue_num)
+{
+	struct vhost_user_socket *vsocket;
+	uint32_t vdpa_queue_num;
+	struct rte_vdpa_device *vdpa_dev;
+	int did = -1;
+	int ret = 0;
+
+	pthread_mutex_lock(&vhost_user.mutex);
+	vsocket = find_vhost_user_socket(path);
+	if (!vsocket) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"socket file %s is not registered yet.\n", path);
+		ret = -1;
+		goto unlock_exit;
+	}
+
+	did = vsocket->vdpa_dev_id;
+	vdpa_dev = rte_vdpa_get_device(did);
+	if (!vdpa_dev || !vdpa_dev->ops->get_queue_num) {
+		*queue_num = VHOST_MAX_QUEUE_PAIRS;
+		goto unlock_exit;
+	}
+
+	if (vdpa_dev->ops->get_queue_num(did, &vdpa_queue_num) < 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+				"failed to get vdpa queue number "
+				"for socket file %s.\n", path);
+		ret = -1;
+		goto unlock_exit;
+	}
+
+	*queue_num = RTE_MIN((uint32_t)VHOST_MAX_QUEUE_PAIRS, vdpa_queue_num);
+
+unlock_exit:
+	pthread_mutex_unlock(&vhost_user.mutex);
+	return ret;
+}
+
+static void
+vhost_user_socket_mem_free(struct vhost_user_socket *vsocket)
+{
+	if (vsocket && vsocket->path) {
+		free(vsocket->path);
+		vsocket->path = NULL;
+	}
+
+	if (vsocket) {
+		free(vsocket);
+		vsocket = NULL;
 	}
 }
 
@@ -637,7 +825,7 @@ rte_vhost_driver_register(const char *path, uint64_t flags)
 	if (vsocket->path == NULL) {
 		RTE_LOG(ERR, VHOST_CONFIG,
 			"error: failed to copy socket path string\n");
-		free(vsocket);
+		vhost_user_socket_mem_free(vsocket);
 		goto out;
 	}
 	TAILQ_INIT(&vsocket->conn_list);
@@ -665,6 +853,12 @@ rte_vhost_driver_register(const char *path, uint64_t flags)
 	vsocket->supported_features = VIRTIO_NET_SUPPORTED_FEATURES;
 	vsocket->features           = VIRTIO_NET_SUPPORTED_FEATURES;
 
+	/* Dequeue zero copy can't assure descriptors returned in order */
+	if (vsocket->dequeue_zero_copy) {
+		vsocket->supported_features &= ~(1ULL << VIRTIO_F_IN_ORDER);
+		vsocket->features &= ~(1ULL << VIRTIO_F_IN_ORDER);
+	}
+
 	if (!(flags & RTE_VHOST_USER_IOMMU_SUPPORT)) {
 		vsocket->supported_features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
 		vsocket->features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
@@ -695,8 +889,7 @@ out_mutex:
 			"error: failed to destroy connection mutex\n");
 	}
 out_free:
-	free(vsocket->path);
-	free(vsocket);
+	vhost_user_socket_mem_free(vsocket);
 out:
 	pthread_mutex_unlock(&vhost_user.mutex);
 
@@ -743,21 +936,25 @@ rte_vhost_driver_unregister(const char *path)
 		struct vhost_user_socket *vsocket = vhost_user.vsockets[i];
 
 		if (!strcmp(vsocket->path, path)) {
-			if (vsocket->is_server) {
-				fdset_del(&vhost_user.fdset, vsocket->socket_fd);
-				close(vsocket->socket_fd);
-				unlink(path);
-			} else if (vsocket->reconnect) {
-				vhost_user_remove_reconnect(vsocket);
-			}
-
+again:
 			pthread_mutex_lock(&vsocket->conn_mutex);
 			for (conn = TAILQ_FIRST(&vsocket->conn_list);
 			     conn != NULL;
 			     conn = next) {
 				next = TAILQ_NEXT(conn, next);
 
-				fdset_del(&vhost_user.fdset, conn->connfd);
+				/*
+				 * If r/wcb is executing, release the
+				 * conn_mutex lock, and try again since
+				 * the r/wcb may use the conn_mutex lock.
+				 */
+				if (fdset_try_del(&vhost_user.fdset,
+						  conn->connfd) == -1) {
+					pthread_mutex_unlock(
+							&vsocket->conn_mutex);
+					goto again;
+				}
+
 				RTE_LOG(INFO, VHOST_CONFIG,
 					"free connfd = %d for device '%s'\n",
 					conn->connfd, path);
@@ -768,9 +965,17 @@ rte_vhost_driver_unregister(const char *path)
 			}
 			pthread_mutex_unlock(&vsocket->conn_mutex);
 
+			if (vsocket->is_server) {
+				fdset_del(&vhost_user.fdset,
+						vsocket->socket_fd);
+				close(vsocket->socket_fd);
+				unlink(path);
+			} else if (vsocket->reconnect) {
+				vhost_user_remove_reconnect(vsocket);
+			}
+
 			pthread_mutex_destroy(&vsocket->conn_mutex);
-			free(vsocket->path);
-			free(vsocket);
+			vhost_user_socket_mem_free(vsocket);
 
 			count = --vhost_user.vsocket_cnt;
 			vhost_user.vsockets[i] = vhost_user.vsockets[count];
@@ -829,11 +1034,26 @@ rte_vhost_driver_start(const char *path)
 		return -1;
 
 	if (fdset_tid == 0) {
-		int ret = pthread_create(&fdset_tid, NULL, fdset_event_dispatch,
-				     &vhost_user.fdset);
-		if (ret != 0)
+		/**
+		 * create a pipe which will be waited by poll and notified to
+		 * rebuild the wait list of poll.
+		 */
+		if (fdset_pipe_init(&vhost_user.fdset) < 0) {
+			RTE_LOG(ERR, VHOST_CONFIG,
+				"failed to create pipe for vhost fdset\n");
+			return -1;
+		}
+
+		int ret = rte_ctrl_thread_create(&fdset_tid,
+			"vhost-events", NULL, fdset_event_dispatch,
+			&vhost_user.fdset);
+		if (ret != 0) {
 			RTE_LOG(ERR, VHOST_CONFIG,
 				"failed to create fdset handling thread");
+
+			fdset_pipe_uninit(&vhost_user.fdset);
+			return -1;
+		}
 	}
 
 	if (vsocket->is_server)
diff --git a/lib/librte_vhost/vdpa.c b/lib/librte_vhost/vdpa.c
new file mode 100644
index 00000000..c82fd437
--- /dev/null
+++ b/lib/librte_vhost/vdpa.c
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+/**
+ * @file
+ *
+ * Device specific vhost lib
+ */
+
+#include <stdbool.h>
+
+#include <rte_malloc.h>
+#include "rte_vdpa.h"
+#include "vhost.h"
+
+static struct rte_vdpa_device *vdpa_devices[MAX_VHOST_DEVICE];
+static uint32_t vdpa_device_num;
+
+static bool
+is_same_vdpa_device(struct rte_vdpa_dev_addr *a,
+		struct rte_vdpa_dev_addr *b)
+{
+	bool ret = true;
+
+	if (a->type != b->type)
+		return false;
+
+	switch (a->type) {
+	case PCI_ADDR:
+		if (a->pci_addr.domain != b->pci_addr.domain ||
+				a->pci_addr.bus != b->pci_addr.bus ||
+				a->pci_addr.devid != b->pci_addr.devid ||
+				a->pci_addr.function != b->pci_addr.function)
+			ret = false;
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+int
+rte_vdpa_register_device(struct rte_vdpa_dev_addr *addr,
+		struct rte_vdpa_dev_ops *ops)
+{
+	struct rte_vdpa_device *dev;
+	char device_name[MAX_VDPA_NAME_LEN];
+	int i;
+
+	if (vdpa_device_num >= MAX_VHOST_DEVICE)
+		return -1;
+
+	for (i = 0; i < MAX_VHOST_DEVICE; i++) {
+		dev = vdpa_devices[i];
+		if (dev && is_same_vdpa_device(&dev->addr, addr))
+			return -1;
+	}
+
+	for (i = 0; i < MAX_VHOST_DEVICE; i++) {
+		if (vdpa_devices[i] == NULL)
+			break;
+	}
+
+	sprintf(device_name, "vdpa-dev-%d", i);
+	dev = rte_zmalloc(device_name, sizeof(struct rte_vdpa_device),
+			RTE_CACHE_LINE_SIZE);
+	if (!dev)
+		return -1;
+
+	memcpy(&dev->addr, addr, sizeof(struct rte_vdpa_dev_addr));
+	dev->ops = ops;
+	vdpa_devices[i] = dev;
+	vdpa_device_num++;
+
+	return i;
+}
+
+int
+rte_vdpa_unregister_device(int did)
+{
+	if (did < 0 || did >= MAX_VHOST_DEVICE || vdpa_devices[did] == NULL)
+		return -1;
+
+	rte_free(vdpa_devices[did]);
+	vdpa_devices[did] = NULL;
+	vdpa_device_num--;
+
+	return did;
+}
+
+int
+rte_vdpa_find_device_id(struct rte_vdpa_dev_addr *addr)
+{
+	struct rte_vdpa_device *dev;
+	int i;
+
+	for (i = 0; i < MAX_VHOST_DEVICE; ++i) {
+		dev = vdpa_devices[i];
+		if (dev && is_same_vdpa_device(&dev->addr, addr))
+			return i;
+	}
+
+	return -1;
+}
+
+struct rte_vdpa_device *
+rte_vdpa_get_device(int did)
+{
+	if (did < 0 || did >= MAX_VHOST_DEVICE)
+		return NULL;
+
+	return vdpa_devices[did];
+}
diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index a407067e..3c9be10a 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2010-2016 Intel Corporation
+ * Copyright(c) 2010-2017 Intel Corporation
  */
 
 #include <linux/vhost.h>
@@ -29,17 +29,17 @@ struct virtio_net *vhost_devices[MAX_VHOST_DEVICE];
 /* Called with iotlb_lock read-locked */
 uint64_t
 __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
-		    uint64_t iova, uint64_t size, uint8_t perm)
+		    uint64_t iova, uint64_t *size, uint8_t perm)
 {
 	uint64_t vva, tmp_size;
 
-	if (unlikely(!size))
+	if (unlikely(!*size))
 		return 0;
 
-	tmp_size = size;
+	tmp_size = *size;
 
 	vva = vhost_user_iotlb_cache_find(vq, iova, &tmp_size, perm);
-	if (tmp_size == size)
+	if (tmp_size == *size)
 		return vva;
 
 	iova += tmp_size;
@@ -68,19 +68,6 @@ __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	return 0;
 }
 
-struct virtio_net *
-get_device(int vid)
-{
-	struct virtio_net *dev = vhost_devices[vid];
-
-	if (unlikely(!dev)) {
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"(%d) device not found.\n", vid);
-	}
-
-	return dev;
-}
-
 void
 cleanup_vq(struct vhost_virtqueue *vq, int destroy)
 {
@@ -106,9 +93,12 @@ cleanup_device(struct virtio_net *dev, int destroy)
 }
 
 void
-free_vq(struct vhost_virtqueue *vq)
+free_vq(struct virtio_net *dev, struct vhost_virtqueue *vq)
 {
-	rte_free(vq->shadow_used_ring);
+	if (vq_is_packed(dev))
+		rte_free(vq->shadow_used_packed);
+	else
+		rte_free(vq->shadow_used_split);
 	rte_free(vq->batch_copy_elems);
 	rte_mempool_free(vq->iotlb_pool);
 	rte_free(vq);
@@ -123,42 +113,95 @@ free_device(struct virtio_net *dev)
 	uint32_t i;
 
 	for (i = 0; i < dev->nr_vring; i++)
-		free_vq(dev->virtqueue[i]);
+		free_vq(dev, dev->virtqueue[i]);
 
 	rte_free(dev);
 }
 
-int
-vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
+static int
+vring_translate_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
 {
-	uint64_t size;
+	uint64_t req_size, size;
 
-	if (!(dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)))
-		goto out;
-
-	size = sizeof(struct vring_desc) * vq->size;
+	req_size = sizeof(struct vring_desc) * vq->size;
+	size = req_size;
 	vq->desc = (struct vring_desc *)(uintptr_t)vhost_iova_to_vva(dev, vq,
 						vq->ring_addrs.desc_user_addr,
-						size, VHOST_ACCESS_RW);
-	if (!vq->desc)
+						&size, VHOST_ACCESS_RW);
+	if (!vq->desc || size != req_size)
 		return -1;
 
-	size = sizeof(struct vring_avail);
-	size += sizeof(uint16_t) * vq->size;
+	req_size = sizeof(struct vring_avail);
+	req_size += sizeof(uint16_t) * vq->size;
+	if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
+		req_size += sizeof(uint16_t);
+	size = req_size;
 	vq->avail = (struct vring_avail *)(uintptr_t)vhost_iova_to_vva(dev, vq,
 						vq->ring_addrs.avail_user_addr,
-						size, VHOST_ACCESS_RW);
-	if (!vq->avail)
+						&size, VHOST_ACCESS_RW);
+	if (!vq->avail || size != req_size)
 		return -1;
 
-	size = sizeof(struct vring_used);
-	size += sizeof(struct vring_used_elem) * vq->size;
+	req_size = sizeof(struct vring_used);
+	req_size += sizeof(struct vring_used_elem) * vq->size;
+	if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
+		req_size += sizeof(uint16_t);
+	size = req_size;
 	vq->used = (struct vring_used *)(uintptr_t)vhost_iova_to_vva(dev, vq,
 						vq->ring_addrs.used_user_addr,
-						size, VHOST_ACCESS_RW);
-	if (!vq->used)
+						&size, VHOST_ACCESS_RW);
+	if (!vq->used || size != req_size)
+		return -1;
+
+	return 0;
+}
+
+static int
+vring_translate_packed(struct virtio_net *dev, struct vhost_virtqueue *vq)
+{
+	uint64_t req_size, size;
+
+	req_size = sizeof(struct vring_packed_desc) * vq->size;
+	size = req_size;
+	vq->desc_packed = (struct vring_packed_desc *)(uintptr_t)
+		vhost_iova_to_vva(dev, vq, vq->ring_addrs.desc_user_addr,
+				&size, VHOST_ACCESS_RW);
+	if (!vq->desc_packed || size != req_size)
 		return -1;
 
+	req_size = sizeof(struct vring_packed_desc_event);
+	size = req_size;
+	vq->driver_event = (struct vring_packed_desc_event *)(uintptr_t)
+		vhost_iova_to_vva(dev, vq, vq->ring_addrs.avail_user_addr,
+				&size, VHOST_ACCESS_RW);
+	if (!vq->driver_event || size != req_size)
+		return -1;
+
+	req_size = sizeof(struct vring_packed_desc_event);
+	size = req_size;
+	vq->device_event = (struct vring_packed_desc_event *)(uintptr_t)
+		vhost_iova_to_vva(dev, vq, vq->ring_addrs.used_user_addr,
+				&size, VHOST_ACCESS_RW);
+	if (!vq->device_event || size != req_size)
+		return -1;
+
+	return 0;
+}
+
+int
+vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
+{
+
+	if (!(dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)))
+		goto out;
+
+	if (vq_is_packed(dev)) {
+		if (vring_translate_packed(dev, vq) < 0)
+			return -1;
+	} else {
+		if (vring_translate_split(dev, vq) < 0)
+			return -1;
+	}
 out:
 	vq->access_ok = 1;
 
@@ -240,6 +283,9 @@ alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
 	dev->virtqueue[vring_idx] = vq;
 	init_vring_queue(dev, vring_idx);
 	rte_spinlock_init(&vq->access_lock);
+	vq->avail_wrap_counter = 1;
+	vq->used_wrap_counter = 1;
+	vq->signalled_used_valid = false;
 
 	dev->nr_vring += 1;
 
@@ -274,21 +320,21 @@ vhost_new_device(void)
 	struct virtio_net *dev;
 	int i;
 
-	dev = rte_zmalloc(NULL, sizeof(struct virtio_net), 0);
-	if (dev == NULL) {
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"Failed to allocate memory for new dev.\n");
-		return -1;
-	}
-
 	for (i = 0; i < MAX_VHOST_DEVICE; i++) {
 		if (vhost_devices[i] == NULL)
 			break;
 	}
+
 	if (i == MAX_VHOST_DEVICE) {
 		RTE_LOG(ERR, VHOST_CONFIG,
 			"Failed to find a free slot for new device.\n");
-		rte_free(dev);
+		return -1;
+	}
+
+	dev = rte_zmalloc(NULL, sizeof(struct virtio_net), 0);
+	if (dev == NULL) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Failed to allocate memory for new dev.\n");
 		return -1;
 	}
 
@@ -296,10 +342,28 @@ vhost_new_device(void)
 	dev->vid = i;
 	dev->flags = VIRTIO_DEV_BUILTIN_VIRTIO_NET;
 	dev->slave_req_fd = -1;
+	dev->vdpa_dev_id = -1;
+	rte_spinlock_init(&dev->slave_req_lock);
 
 	return i;
 }
 
+void
+vhost_destroy_device_notify(struct virtio_net *dev)
+{
+	struct rte_vdpa_device *vdpa_dev;
+	int did;
+
+	if (dev->flags & VIRTIO_DEV_RUNNING) {
+		did = dev->vdpa_dev_id;
+		vdpa_dev = rte_vdpa_get_device(did);
+		if (vdpa_dev && vdpa_dev->ops->dev_close)
+			vdpa_dev->ops->dev_close(dev->vid);
+		dev->flags &= ~VIRTIO_DEV_RUNNING;
+		dev->notify_ops->destroy_device(dev->vid);
+	}
+}
+
 /*
  * Invoked when there is the vhost-user connection is broken (when
  * the virtio device is being detached).
@@ -312,10 +376,7 @@ vhost_destroy_device(int vid)
 	if (dev == NULL)
 		return;
 
-	if (dev->flags & VIRTIO_DEV_RUNNING) {
-		dev->flags &= ~VIRTIO_DEV_RUNNING;
-		dev->notify_ops->destroy_device(vid);
-	}
+	vhost_destroy_device_notify(dev);
 
 	cleanup_device(dev, 1);
 	free_device(dev);
@@ -324,6 +385,33 @@ vhost_destroy_device(int vid)
 }
 
 void
+vhost_attach_vdpa_device(int vid, int did)
+{
+	struct virtio_net *dev = get_device(vid);
+
+	if (dev == NULL)
+		return;
+
+	if (rte_vdpa_get_device(did) == NULL)
+		return;
+
+	dev->vdpa_dev_id = did;
+}
+
+void
+vhost_detach_vdpa_device(int vid)
+{
+	struct virtio_net *dev = get_device(vid);
+
+	if (dev == NULL)
+		return;
+
+	vhost_user_host_notifier_ctrl(vid, false);
+
+	dev->vdpa_dev_id = -1;
+}
+
+void
 vhost_set_ifname(int vid, const char *if_name, unsigned int if_len)
 {
 	struct virtio_net *dev;
@@ -532,7 +620,11 @@ rte_vhost_vring_call(int vid, uint16_t vring_idx)
 	if (!vq)
 		return -1;
 
-	vhost_vring_call(dev, vq);
+	if (vq_is_packed(dev))
+		vhost_vring_call_packed(dev, vq);
+	else
+		vhost_vring_call_split(dev, vq);
+
 	return 0;
 }
 
@@ -553,21 +645,52 @@ rte_vhost_avail_entries(int vid, uint16_t queue_id)
 	return *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx;
 }
 
+static inline void
+vhost_enable_notify_split(struct vhost_virtqueue *vq, int enable)
+{
+	if (enable)
+		vq->used->flags &= ~VRING_USED_F_NO_NOTIFY;
+	else
+		vq->used->flags |= VRING_USED_F_NO_NOTIFY;
+}
+
+static inline void
+vhost_enable_notify_packed(struct virtio_net *dev,
+		struct vhost_virtqueue *vq, int enable)
+{
+	uint16_t flags;
+
+	if (!enable)
+		vq->device_event->flags = VRING_EVENT_F_DISABLE;
+
+	flags = VRING_EVENT_F_ENABLE;
+	if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) {
+		flags = VRING_EVENT_F_DESC;
+		vq->device_event->off_wrap = vq->last_avail_idx |
+			vq->avail_wrap_counter << 15;
+	}
+
+	rte_smp_wmb();
+
+	vq->device_event->flags = flags;
+}
+
 int
 rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable)
 {
 	struct virtio_net *dev = get_device(vid);
+	struct vhost_virtqueue *vq;
 
-	if (dev == NULL)
+	if (!dev)
 		return -1;
 
-	if (enable) {
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"guest notification isn't supported.\n");
-		return -1;
-	}
+	vq = dev->virtqueue[queue_id];
+
+	if (vq_is_packed(dev))
+		vhost_enable_notify_packed(dev, vq, enable);
+	else
+		vhost_enable_notify_split(vq, enable);
 
-	dev->virtqueue[queue_id]->used->flags = VRING_USED_F_NO_NOTIFY;
 	return 0;
 }
 
@@ -627,3 +750,76 @@ rte_vhost_rx_queue_count(int vid, uint16_t qid)
 
 	return *((volatile uint16_t *)&vq->avail->idx) - vq->last_avail_idx;
 }
+
+int rte_vhost_get_vdpa_device_id(int vid)
+{
+	struct virtio_net *dev = get_device(vid);
+
+	if (dev == NULL)
+		return -1;
+
+	return dev->vdpa_dev_id;
+}
+
+int rte_vhost_get_log_base(int vid, uint64_t *log_base,
+		uint64_t *log_size)
+{
+	struct virtio_net *dev = get_device(vid);
+
+	if (!dev)
+		return -1;
+
+	if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) {
+		RTE_LOG(ERR, VHOST_DATA,
+			"(%d) %s: built-in vhost net backend is disabled.\n",
+			dev->vid, __func__);
+		return -1;
+	}
+
+	*log_base = dev->log_base;
+	*log_size = dev->log_size;
+
+	return 0;
+}
+
+int rte_vhost_get_vring_base(int vid, uint16_t queue_id,
+		uint16_t *last_avail_idx, uint16_t *last_used_idx)
+{
+	struct virtio_net *dev = get_device(vid);
+
+	if (!dev)
+		return -1;
+
+	if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) {
+		RTE_LOG(ERR, VHOST_DATA,
+			"(%d) %s: built-in vhost net backend is disabled.\n",
+			dev->vid, __func__);
+		return -1;
+	}
+
+	*last_avail_idx = dev->virtqueue[queue_id]->last_avail_idx;
+	*last_used_idx = dev->virtqueue[queue_id]->last_used_idx;
+
+	return 0;
+}
+
+int rte_vhost_set_vring_base(int vid, uint16_t queue_id,
+		uint16_t last_avail_idx, uint16_t last_used_idx)
+{
+	struct virtio_net *dev = get_device(vid);
+
+	if (!dev)
+		return -1;
+
+	if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) {
+		RTE_LOG(ERR, VHOST_DATA,
+			"(%d) %s: built-in vhost net backend is disabled.\n",
+			dev->vid, __func__);
+		return -1;
+	}
+
+	dev->virtqueue[queue_id]->last_avail_idx = last_avail_idx;
+	dev->virtqueue[queue_id]->last_used_idx = last_used_idx;
+
+	return 0;
+}
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index d947bc9e..760a09c0 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -1,11 +1,12 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2010-2014 Intel Corporation
+ * Copyright(c) 2010-2018 Intel Corporation
  */
 
 #ifndef _VHOST_NET_CDEV_H_
 #define _VHOST_NET_CDEV_H_
 #include <stdint.h>
 #include <stdio.h>
+#include <stdbool.h>
 #include <sys/types.h>
 #include <sys/queue.h>
 #include <unistd.h>
@@ -19,6 +20,7 @@
 #include <rte_rwlock.h>
 
 #include "rte_vhost.h"
+#include "rte_vdpa.h"
 
 /* Used to indicate that the device is running on a data core */
 #define VIRTIO_DEV_RUNNING 1
@@ -26,17 +28,22 @@
 #define VIRTIO_DEV_READY 2
 /* Used to indicate that the built-in vhost net device backend is enabled */
 #define VIRTIO_DEV_BUILTIN_VIRTIO_NET 4
+/* Used to indicate that the device has its own data path and configured */
+#define VIRTIO_DEV_VDPA_CONFIGURED 8
 
 /* Backend value set by guest. */
 #define VIRTIO_DEV_STOPPED -1
 
 #define BUF_VECTOR_MAX 256
 
+#define VHOST_LOG_CACHE_NR 32
+
 /**
  * Structure contains buffer address, length and descriptor index
  * from vring to do scatter RX.
  */
 struct buf_vector {
+	uint64_t buf_iova;
 	uint64_t buf_addr;
 	uint32_t buf_len;
 	uint32_t desc_idx;
@@ -49,6 +56,7 @@ struct buf_vector {
 struct zcopy_mbuf {
 	struct rte_mbuf *mbuf;
 	uint32_t desc_idx;
+	uint16_t desc_count;
 	uint16_t in_use;
 
 	TAILQ_ENTRY(zcopy_mbuf) next;
@@ -65,19 +73,43 @@ struct batch_copy_elem {
 	uint64_t log_addr;
 };
 
+/*
+ * Structure that contains the info for batched dirty logging.
+ */
+struct log_cache_entry {
+	uint32_t offset;
+	unsigned long val;
+};
+
+struct vring_used_elem_packed {
+	uint16_t id;
+	uint32_t len;
+	uint32_t count;
+};
+
 /**
  * Structure contains variables relevant to RX/TX virtqueues.
  */
 struct vhost_virtqueue {
-	struct vring_desc	*desc;
-	struct vring_avail	*avail;
-	struct vring_used	*used;
+	union {
+		struct vring_desc	*desc;
+		struct vring_packed_desc   *desc_packed;
+	};
+	union {
+		struct vring_avail	*avail;
+		struct vring_packed_desc_event *driver_event;
+	};
+	union {
+		struct vring_used	*used;
+		struct vring_packed_desc_event *device_event;
+	};
 	uint32_t		size;
 
 	uint16_t		last_avail_idx;
 	uint16_t		last_used_idx;
 	/* Last used index we notify to front end. */
 	uint16_t		signalled_used;
+	bool			signalled_used_valid;
 #define VIRTIO_INVALID_EVENTFD		(-1)
 #define VIRTIO_UNINITIALIZED_EVENTFD	(-2)
 
@@ -101,12 +133,20 @@ struct vhost_virtqueue {
 	struct zcopy_mbuf	*zmbufs;
 	struct zcopy_mbuf_list	zmbuf_list;
 
-	struct vring_used_elem  *shadow_used_ring;
+	union {
+		struct vring_used_elem  *shadow_used_split;
+		struct vring_used_elem_packed *shadow_used_packed;
+	};
 	uint16_t                shadow_used_idx;
 	struct vhost_vring_addr ring_addrs;
 
 	struct batch_copy_elem	*batch_copy_elems;
 	uint16_t		batch_copy_nb_elems;
+	bool			used_wrap_counter;
+	bool			avail_wrap_counter;
+
+	struct log_cache_entry log_cache[VHOST_LOG_CACHE_NR];
+	uint16_t log_cache_nb_elem;
 
 	rte_rwlock_t	iotlb_lock;
 	rte_rwlock_t	iotlb_pending_lock;
@@ -174,7 +214,41 @@ struct vhost_msg {
  #define VIRTIO_F_VERSION_1 32
 #endif
 
-#define VHOST_USER_F_PROTOCOL_FEATURES	30
+/* Declare packed ring related bits for older kernels */
+#ifndef VIRTIO_F_RING_PACKED
+
+#define VIRTIO_F_RING_PACKED 34
+
+#define VRING_DESC_F_NEXT	1
+#define VRING_DESC_F_WRITE	2
+#define VRING_DESC_F_INDIRECT	4
+
+#define VRING_DESC_F_AVAIL	(1ULL << 7)
+#define VRING_DESC_F_USED	(1ULL << 15)
+
+struct vring_packed_desc {
+	uint64_t addr;
+	uint32_t len;
+	uint16_t id;
+	uint16_t flags;
+};
+
+#define VRING_EVENT_F_ENABLE 0x0
+#define VRING_EVENT_F_DISABLE 0x1
+#define VRING_EVENT_F_DESC 0x2
+
+struct vring_packed_desc_event {
+	uint16_t off_wrap;
+	uint16_t flags;
+};
+#endif
+
+/*
+ * Available and used descs are in same order
+ */
+#ifndef VIRTIO_F_IN_ORDER
+#define VIRTIO_F_IN_ORDER      35
+#endif
 
 /* Features supported by this builtin vhost-user net driver. */
 #define VIRTIO_NET_SUPPORTED_FEATURES ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | \
@@ -199,7 +273,8 @@ struct vhost_msg {
 				(1ULL << VIRTIO_NET_F_GUEST_ECN) | \
 				(1ULL << VIRTIO_RING_F_INDIRECT_DESC) | \
 				(1ULL << VIRTIO_RING_F_EVENT_IDX) | \
-				(1ULL << VIRTIO_NET_F_MTU) | \
+				(1ULL << VIRTIO_NET_F_MTU)  | \
+				(1ULL << VIRTIO_F_IN_ORDER) | \
 				(1ULL << VIRTIO_F_IOMMU_PLATFORM))
 
 
@@ -210,6 +285,51 @@ struct guest_page {
 };
 
 /**
+ * function prototype for the vhost backend to handler specific vhost user
+ * messages prior to the master message handling
+ *
+ * @param vid
+ *  vhost device id
+ * @param msg
+ *  Message pointer.
+ * @param require_reply
+ *  If the handler requires sending a reply, this varaible shall be written 1,
+ *  otherwise 0.
+ * @param skip_master
+ *  If the handler requires skipping the master message handling, this variable
+ *  shall be written 1, otherwise 0.
+ * @return
+ *  0 on success, -1 on failure
+ */
+typedef int (*vhost_msg_pre_handle)(int vid, void *msg,
+		uint32_t *require_reply, uint32_t *skip_master);
+
+/**
+ * function prototype for the vhost backend to handler specific vhost user
+ * messages after the master message handling is done
+ *
+ * @param vid
+ *  vhost device id
+ * @param msg
+ *  Message pointer.
+ * @param require_reply
+ *  If the handler requires sending a reply, this varaible shall be written 1,
+ *  otherwise 0.
+ * @return
+ *  0 on success, -1 on failure
+ */
+typedef int (*vhost_msg_post_handle)(int vid, void *msg,
+		uint32_t *require_reply);
+
+/**
+ * pre and post vhost user message handlers
+ */
+struct vhost_user_extern_ops {
+	vhost_msg_pre_handle pre_msg_handle;
+	vhost_msg_post_handle post_msg_handle;
+};
+
+/**
  * Device structure contains all configuration information relating
  * to the device.
  */
@@ -241,8 +361,32 @@ struct virtio_net {
 	struct guest_page       *guest_pages;
 
 	int			slave_req_fd;
+	rte_spinlock_t		slave_req_lock;
+
+	/*
+	 * Device id to identify a specific backend device.
+	 * It's set to -1 for the default software implementation.
+	 */
+	int			vdpa_dev_id;
+
+	/* private data for virtio device */
+	void			*extern_data;
+	/* pre and post vhost user message handlers for the device */
+	struct vhost_user_extern_ops extern_ops;
 } __rte_cache_aligned;
 
+static __rte_always_inline bool
+vq_is_packed(struct virtio_net *dev)
+{
+	return dev->features & (1ull << VIRTIO_F_RING_PACKED);
+}
+
+static inline bool
+desc_is_avail(struct vring_packed_desc *desc, bool wrap_counter)
+{
+	return wrap_counter == !!(desc->flags & VRING_DESC_F_AVAIL) &&
+		wrap_counter != !!(desc->flags & VRING_DESC_F_USED);
+}
 
 #define VHOST_LOG_PAGE	4096
 
@@ -252,7 +396,15 @@ struct virtio_net {
 static __rte_always_inline void
 vhost_set_bit(unsigned int nr, volatile uint8_t *addr)
 {
-	__sync_fetch_and_or_8(addr, (1U << nr));
+#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
+	/*
+	 * __sync_ built-ins are deprecated, but __atomic_ ones
+	 * are sub-optimized in older GCC versions.
+	 */
+	__sync_fetch_and_or_1(addr, (1U << nr));
+#else
+	__atomic_fetch_or(addr, (1U << nr), __ATOMIC_RELAXED);
+#endif
 }
 
 static __rte_always_inline void
@@ -284,6 +436,103 @@ vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len)
 }
 
 static __rte_always_inline void
+vhost_log_cache_sync(struct virtio_net *dev, struct vhost_virtqueue *vq)
+{
+	unsigned long *log_base;
+	int i;
+
+	if (likely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) == 0) ||
+		   !dev->log_base))
+		return;
+
+	log_base = (unsigned long *)(uintptr_t)dev->log_base;
+
+	/*
+	 * It is expected a write memory barrier has been issued
+	 * before this function is called.
+	 */
+
+	for (i = 0; i < vq->log_cache_nb_elem; i++) {
+		struct log_cache_entry *elem = vq->log_cache + i;
+
+#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
+		/*
+		 * '__sync' builtins are deprecated, but '__atomic' ones
+		 * are sub-optimized in older GCC versions.
+		 */
+		__sync_fetch_and_or(log_base + elem->offset, elem->val);
+#else
+		__atomic_fetch_or(log_base + elem->offset, elem->val,
+				__ATOMIC_RELAXED);
+#endif
+	}
+
+	rte_smp_wmb();
+
+	vq->log_cache_nb_elem = 0;
+}
+
+static __rte_always_inline void
+vhost_log_cache_page(struct virtio_net *dev, struct vhost_virtqueue *vq,
+			uint64_t page)
+{
+	uint32_t bit_nr = page % (sizeof(unsigned long) << 3);
+	uint32_t offset = page / (sizeof(unsigned long) << 3);
+	int i;
+
+	for (i = 0; i < vq->log_cache_nb_elem; i++) {
+		struct log_cache_entry *elem = vq->log_cache + i;
+
+		if (elem->offset == offset) {
+			elem->val |= (1UL << bit_nr);
+			return;
+		}
+	}
+
+	if (unlikely(i >= VHOST_LOG_CACHE_NR)) {
+		/*
+		 * No more room for a new log cache entry,
+		 * so write the dirty log map directly.
+		 */
+		rte_smp_wmb();
+		vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
+
+		return;
+	}
+
+	vq->log_cache[i].offset = offset;
+	vq->log_cache[i].val = (1UL << bit_nr);
+	vq->log_cache_nb_elem++;
+}
+
+static __rte_always_inline void
+vhost_log_cache_write(struct virtio_net *dev, struct vhost_virtqueue *vq,
+			uint64_t addr, uint64_t len)
+{
+	uint64_t page;
+
+	if (likely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) == 0) ||
+		   !dev->log_base || !len))
+		return;
+
+	if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
+		return;
+
+	page = addr / VHOST_LOG_PAGE;
+	while (page * VHOST_LOG_PAGE < addr + len) {
+		vhost_log_cache_page(dev, vq, page);
+		page += 1;
+	}
+}
+
+static __rte_always_inline void
+vhost_log_cache_used_vring(struct virtio_net *dev, struct vhost_virtqueue *vq,
+			uint64_t offset, uint64_t len)
+{
+	vhost_log_cache_write(dev, vq, vq->log_guest_addr + offset, len);
+}
+
+static __rte_always_inline void
 vhost_log_used_vring(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		     uint64_t offset, uint64_t len)
 {
@@ -296,8 +545,8 @@ vhost_log_used_vring(struct virtio_net *dev, struct vhost_virtqueue *vq,
 
 #ifdef RTE_LIBRTE_VHOST_DEBUG
 #define VHOST_MAX_PRINT_BUFF 6072
-#define LOG_LEVEL RTE_LOG_DEBUG
-#define LOG_DEBUG(log_type, fmt, args...) RTE_LOG(DEBUG, log_type, fmt, ##args)
+#define VHOST_LOG_DEBUG(log_type, fmt, args...) \
+	RTE_LOG(DEBUG, log_type, fmt, ##args)
 #define PRINT_PACKET(device, addr, size, header) do { \
 	char *pkt_addr = (char *)(addr); \
 	unsigned int index; \
@@ -313,11 +562,10 @@ vhost_log_used_vring(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	} \
 	snprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), "\n"); \
 	\
-	LOG_DEBUG(VHOST_DATA, "%s", packet); \
+	VHOST_LOG_DEBUG(VHOST_DATA, "%s", packet); \
 } while (0)
 #else
-#define LOG_LEVEL RTE_LOG_INFO
-#define LOG_DEBUG(log_type, fmt, args...) do {} while (0)
+#define VHOST_LOG_DEBUG(log_type, fmt, args...) do {} while (0)
 #define PRINT_PACKET(device, addr, size, header) do {} while (0)
 #endif
 
@@ -345,18 +593,33 @@ gpa_to_hpa(struct virtio_net *dev, uint64_t gpa, uint64_t size)
 	return 0;
 }
 
-struct virtio_net *get_device(int vid);
+static __rte_always_inline struct virtio_net *
+get_device(int vid)
+{
+	struct virtio_net *dev = vhost_devices[vid];
+
+	if (unlikely(!dev)) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"(%d) device not found.\n", vid);
+	}
+
+	return dev;
+}
 
 int vhost_new_device(void);
 void cleanup_device(struct virtio_net *dev, int destroy);
 void reset_device(struct virtio_net *dev);
 void vhost_destroy_device(int);
+void vhost_destroy_device_notify(struct virtio_net *dev);
 
 void cleanup_vq(struct vhost_virtqueue *vq, int destroy);
-void free_vq(struct vhost_virtqueue *vq);
+void free_vq(struct virtio_net *dev, struct vhost_virtqueue *vq);
 
 int alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx);
 
+void vhost_attach_vdpa_device(int vid, int did);
+void vhost_detach_vdpa_device(int vid);
+
 void vhost_set_ifname(int, const char *if_name, unsigned int if_len);
 void vhost_enable_dequeue_zero_copy(int vid);
 void vhost_set_builtin_virtio_net(int vid, bool enable);
@@ -371,18 +634,18 @@ struct vhost_device_ops const *vhost_driver_callback_get(const char *path);
 void vhost_backend_cleanup(struct virtio_net *dev);
 
 uint64_t __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
-			uint64_t iova, uint64_t size, uint8_t perm);
+			uint64_t iova, uint64_t *len, uint8_t perm);
 int vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq);
 void vring_invalidate(struct virtio_net *dev, struct vhost_virtqueue *vq);
 
 static __rte_always_inline uint64_t
 vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
-			uint64_t iova, uint64_t size, uint8_t perm)
+			uint64_t iova, uint64_t *len, uint8_t perm)
 {
 	if (!(dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)))
-		return rte_vhost_gpa_to_vva(dev->mem, iova);
+		return rte_vhost_va_from_guest_pa(dev->mem, iova, len);
 
-	return __vhost_iova_to_vva(dev, vq, iova, size, perm);
+	return __vhost_iova_to_vva(dev, vq, iova, len, perm);
 }
 
 #define vhost_used_event(vr) \
@@ -401,17 +664,17 @@ vhost_need_event(uint16_t event_idx, uint16_t new_idx, uint16_t old)
 }
 
 static __rte_always_inline void
-vhost_vring_call(struct virtio_net *dev, struct vhost_virtqueue *vq)
+vhost_vring_call_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
 {
 	/* Flush used->idx update before we read avail->flags. */
-	rte_mb();
+	rte_smp_mb();
 
 	/* Don't kick guest if we don't reach index specified by guest. */
 	if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) {
 		uint16_t old = vq->signalled_used;
 		uint16_t new = vq->last_used_idx;
 
-		LOG_DEBUG(VHOST_DATA, "%s: used_event_idx=%d, old=%d, new=%d\n",
+		VHOST_LOG_DEBUG(VHOST_DATA, "%s: used_event_idx=%d, old=%d, new=%d\n",
 			__func__,
 			vhost_used_event(vq),
 			old, new);
@@ -428,4 +691,55 @@ vhost_vring_call(struct virtio_net *dev, struct vhost_virtqueue *vq)
 	}
 }
 
+static __rte_always_inline void
+vhost_vring_call_packed(struct virtio_net *dev, struct vhost_virtqueue *vq)
+{
+	uint16_t old, new, off, off_wrap;
+	bool signalled_used_valid, kick = false;
+
+	/* Flush used desc update. */
+	rte_smp_mb();
+
+	if (!(dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))) {
+		if (vq->driver_event->flags !=
+				VRING_EVENT_F_DISABLE)
+			kick = true;
+		goto kick;
+	}
+
+	old = vq->signalled_used;
+	new = vq->last_used_idx;
+	vq->signalled_used = new;
+	signalled_used_valid = vq->signalled_used_valid;
+	vq->signalled_used_valid = true;
+
+	if (vq->driver_event->flags != VRING_EVENT_F_DESC) {
+		if (vq->driver_event->flags != VRING_EVENT_F_DISABLE)
+			kick = true;
+		goto kick;
+	}
+
+	if (unlikely(!signalled_used_valid)) {
+		kick = true;
+		goto kick;
+	}
+
+	rte_smp_rmb();
+
+	off_wrap = vq->driver_event->off_wrap;
+	off = off_wrap & ~(1 << 15);
+
+	if (new <= old)
+		old -= vq->size;
+
+	if (vq->used_wrap_counter != off_wrap >> 15)
+		off -= vq->size;
+
+	if (vhost_need_event(off, new, old))
+		kick = true;
+kick:
+	if (kick)
+		eventfd_write(vq->callfd, (eventfd_t)1);
+}
+
 #endif /* _VHOST_NET_CDEV_H_ */
diff --git a/lib/librte_vhost/vhost_crypto.c b/lib/librte_vhost/vhost_crypto.c
new file mode 100644
index 00000000..57341ef8
--- /dev/null
+++ b/lib/librte_vhost/vhost_crypto.c
@@ -0,0 +1,1372 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017-2018 Intel Corporation
+ */
+#include <rte_malloc.h>
+#include <rte_hash.h>
+#include <rte_jhash.h>
+#include <rte_mbuf.h>
+#include <rte_cryptodev.h>
+
+#include "rte_vhost_crypto.h"
+#include "vhost.h"
+#include "vhost_user.h"
+#include "virtio_crypto.h"
+
+#define INHDR_LEN		(sizeof(struct virtio_crypto_inhdr))
+#define IV_OFFSET		(sizeof(struct rte_crypto_op) + \
+				sizeof(struct rte_crypto_sym_op))
+
+#ifdef RTE_LIBRTE_VHOST_DEBUG
+#define VC_LOG_ERR(fmt, args...)				\
+	RTE_LOG(ERR, USER1, "[%s] %s() line %u: " fmt "\n",	\
+		"Vhost-Crypto",	__func__, __LINE__, ## args)
+#define VC_LOG_INFO(fmt, args...)				\
+	RTE_LOG(INFO, USER1, "[%s] %s() line %u: " fmt "\n",	\
+		"Vhost-Crypto",	__func__, __LINE__, ## args)
+
+#define VC_LOG_DBG(fmt, args...)				\
+	RTE_LOG(DEBUG, USER1, "[%s] %s() line %u: " fmt "\n",	\
+		"Vhost-Crypto",	__func__, __LINE__, ## args)
+#else
+#define VC_LOG_ERR(fmt, args...)				\
+	RTE_LOG(ERR, USER1, "[VHOST-Crypto]: " fmt "\n", ## args)
+#define VC_LOG_INFO(fmt, args...)				\
+	RTE_LOG(INFO, USER1, "[VHOST-Crypto]: " fmt "\n", ## args)
+#define VC_LOG_DBG(fmt, args...)
+#endif
+
+#define VIRTIO_CRYPTO_FEATURES ((1 << VIRTIO_F_NOTIFY_ON_EMPTY) |	\
+		(1 << VIRTIO_RING_F_INDIRECT_DESC) |			\
+		(1 << VIRTIO_RING_F_EVENT_IDX) |			\
+		(1 << VIRTIO_CRYPTO_SERVICE_CIPHER) |			\
+		(1 << VIRTIO_CRYPTO_SERVICE_MAC) |			\
+		(1 << VIRTIO_NET_F_CTRL_VQ))
+
+#define IOVA_TO_VVA(t, r, a, l, p)					\
+	((t)(uintptr_t)vhost_iova_to_vva(r->dev, r->vq, a, l, p))
+
+static int
+cipher_algo_transform(uint32_t virtio_cipher_algo)
+{
+	int ret;
+
+	switch (virtio_cipher_algo) {
+	case VIRTIO_CRYPTO_CIPHER_AES_CBC:
+		ret = RTE_CRYPTO_CIPHER_AES_CBC;
+		break;
+	case VIRTIO_CRYPTO_CIPHER_AES_CTR:
+		ret = RTE_CRYPTO_CIPHER_AES_CTR;
+		break;
+	case VIRTIO_CRYPTO_CIPHER_DES_ECB:
+		ret = -VIRTIO_CRYPTO_NOTSUPP;
+		break;
+	case VIRTIO_CRYPTO_CIPHER_DES_CBC:
+		ret = RTE_CRYPTO_CIPHER_DES_CBC;
+		break;
+	case VIRTIO_CRYPTO_CIPHER_3DES_ECB:
+		ret = RTE_CRYPTO_CIPHER_3DES_ECB;
+		break;
+	case VIRTIO_CRYPTO_CIPHER_3DES_CBC:
+		ret = RTE_CRYPTO_CIPHER_3DES_CBC;
+		break;
+	case VIRTIO_CRYPTO_CIPHER_3DES_CTR:
+		ret = RTE_CRYPTO_CIPHER_3DES_CTR;
+		break;
+	case VIRTIO_CRYPTO_CIPHER_KASUMI_F8:
+		ret = RTE_CRYPTO_CIPHER_KASUMI_F8;
+		break;
+	case VIRTIO_CRYPTO_CIPHER_SNOW3G_UEA2:
+		ret = RTE_CRYPTO_CIPHER_SNOW3G_UEA2;
+		break;
+	case VIRTIO_CRYPTO_CIPHER_AES_F8:
+		ret = RTE_CRYPTO_CIPHER_AES_F8;
+		break;
+	case VIRTIO_CRYPTO_CIPHER_AES_XTS:
+		ret = RTE_CRYPTO_CIPHER_AES_XTS;
+		break;
+	case VIRTIO_CRYPTO_CIPHER_ZUC_EEA3:
+		ret = RTE_CRYPTO_CIPHER_ZUC_EEA3;
+		break;
+	default:
+		ret = -VIRTIO_CRYPTO_BADMSG;
+		break;
+	}
+
+	return ret;
+}
+
+static int
+auth_algo_transform(uint32_t virtio_auth_algo)
+{
+	int ret;
+
+	switch (virtio_auth_algo) {
+
+	case VIRTIO_CRYPTO_NO_MAC:
+		ret = RTE_CRYPTO_AUTH_NULL;
+		break;
+	case VIRTIO_CRYPTO_MAC_HMAC_MD5:
+		ret = RTE_CRYPTO_AUTH_MD5_HMAC;
+		break;
+	case VIRTIO_CRYPTO_MAC_HMAC_SHA1:
+		ret = RTE_CRYPTO_AUTH_SHA1_HMAC;
+		break;
+	case VIRTIO_CRYPTO_MAC_HMAC_SHA_224:
+		ret = RTE_CRYPTO_AUTH_SHA224_HMAC;
+		break;
+	case VIRTIO_CRYPTO_MAC_HMAC_SHA_256:
+		ret = RTE_CRYPTO_AUTH_SHA256_HMAC;
+		break;
+	case VIRTIO_CRYPTO_MAC_HMAC_SHA_384:
+		ret = RTE_CRYPTO_AUTH_SHA384_HMAC;
+		break;
+	case VIRTIO_CRYPTO_MAC_HMAC_SHA_512:
+		ret = RTE_CRYPTO_AUTH_SHA512_HMAC;
+		break;
+	case VIRTIO_CRYPTO_MAC_CMAC_3DES:
+		ret = -VIRTIO_CRYPTO_NOTSUPP;
+		break;
+	case VIRTIO_CRYPTO_MAC_CMAC_AES:
+		ret = RTE_CRYPTO_AUTH_AES_CMAC;
+		break;
+	case VIRTIO_CRYPTO_MAC_KASUMI_F9:
+		ret = RTE_CRYPTO_AUTH_KASUMI_F9;
+		break;
+	case VIRTIO_CRYPTO_MAC_SNOW3G_UIA2:
+		ret = RTE_CRYPTO_AUTH_SNOW3G_UIA2;
+		break;
+	case VIRTIO_CRYPTO_MAC_GMAC_AES:
+		ret = RTE_CRYPTO_AUTH_AES_GMAC;
+		break;
+	case VIRTIO_CRYPTO_MAC_GMAC_TWOFISH:
+		ret = -VIRTIO_CRYPTO_NOTSUPP;
+		break;
+	case VIRTIO_CRYPTO_MAC_CBCMAC_AES:
+		ret = RTE_CRYPTO_AUTH_AES_CBC_MAC;
+		break;
+	case VIRTIO_CRYPTO_MAC_CBCMAC_KASUMI_F9:
+		ret = -VIRTIO_CRYPTO_NOTSUPP;
+		break;
+	case VIRTIO_CRYPTO_MAC_XCBC_AES:
+		ret = RTE_CRYPTO_AUTH_AES_XCBC_MAC;
+		break;
+	default:
+		ret = -VIRTIO_CRYPTO_BADMSG;
+		break;
+	}
+
+	return ret;
+}
+
+static int get_iv_len(enum rte_crypto_cipher_algorithm algo)
+{
+	int len;
+
+	switch (algo) {
+	case RTE_CRYPTO_CIPHER_3DES_CBC:
+		len = 8;
+		break;
+	case RTE_CRYPTO_CIPHER_3DES_CTR:
+		len = 8;
+		break;
+	case RTE_CRYPTO_CIPHER_3DES_ECB:
+		len = 8;
+		break;
+	case RTE_CRYPTO_CIPHER_AES_CBC:
+		len = 16;
+		break;
+
+	/* TODO: add common algos */
+
+	default:
+		len = -1;
+		break;
+	}
+
+	return len;
+}
+
+/**
+ * vhost_crypto struct is used to maintain a number of virtio_cryptos and
+ * one DPDK crypto device that deals with all crypto workloads. It is declared
+ * here and defined in vhost_crypto.c
+ */
+struct vhost_crypto {
+	/** Used to lookup DPDK Cryptodev Session based on VIRTIO crypto
+	 *  session ID.
+	 */
+	struct rte_hash *session_map;
+	struct rte_mempool *mbuf_pool;
+	struct rte_mempool *sess_pool;
+
+	/** DPDK cryptodev ID */
+	uint8_t cid;
+	uint16_t nb_qps;
+
+	uint64_t last_session_id;
+
+	uint64_t cache_session_id;
+	struct rte_cryptodev_sym_session *cache_session;
+	/** socket id for the device */
+	int socket_id;
+
+	struct virtio_net *dev;
+
+	uint8_t option;
+} __rte_cache_aligned;
+
+struct vhost_crypto_data_req {
+	struct vring_desc *head;
+	struct virtio_net *dev;
+	struct virtio_crypto_inhdr *inhdr;
+	struct vhost_virtqueue *vq;
+	struct vring_desc *wb_desc;
+	uint16_t wb_len;
+	uint16_t desc_idx;
+	uint16_t len;
+	uint16_t zero_copy;
+};
+
+static int
+transform_cipher_param(struct rte_crypto_sym_xform *xform,
+		VhostUserCryptoSessionParam *param)
+{
+	int ret;
+
+	ret = cipher_algo_transform(param->cipher_algo);
+	if (unlikely(ret < 0))
+		return ret;
+
+	xform->type = RTE_CRYPTO_SYM_XFORM_CIPHER;
+	xform->cipher.algo = (uint32_t)ret;
+	xform->cipher.key.length = param->cipher_key_len;
+	if (xform->cipher.key.length > 0)
+		xform->cipher.key.data = param->cipher_key_buf;
+	if (param->dir == VIRTIO_CRYPTO_OP_ENCRYPT)
+		xform->cipher.op = RTE_CRYPTO_CIPHER_OP_ENCRYPT;
+	else if (param->dir == VIRTIO_CRYPTO_OP_DECRYPT)
+		xform->cipher.op = RTE_CRYPTO_CIPHER_OP_DECRYPT;
+	else {
+		VC_LOG_DBG("Bad operation type");
+		return -VIRTIO_CRYPTO_BADMSG;
+	}
+
+	ret = get_iv_len(xform->cipher.algo);
+	if (unlikely(ret < 0))
+		return ret;
+	xform->cipher.iv.length = (uint16_t)ret;
+	xform->cipher.iv.offset = IV_OFFSET;
+	return 0;
+}
+
+static int
+transform_chain_param(struct rte_crypto_sym_xform *xforms,
+		VhostUserCryptoSessionParam *param)
+{
+	struct rte_crypto_sym_xform *xform_cipher, *xform_auth;
+	int ret;
+
+	switch (param->chaining_dir) {
+	case VIRTIO_CRYPTO_SYM_ALG_CHAIN_ORDER_HASH_THEN_CIPHER:
+		xform_auth = xforms;
+		xform_cipher = xforms->next;
+		xform_cipher->cipher.op = RTE_CRYPTO_CIPHER_OP_DECRYPT;
+		xform_auth->auth.op = RTE_CRYPTO_AUTH_OP_VERIFY;
+		break;
+	case VIRTIO_CRYPTO_SYM_ALG_CHAIN_ORDER_CIPHER_THEN_HASH:
+		xform_cipher = xforms;
+		xform_auth = xforms->next;
+		xform_cipher->cipher.op = RTE_CRYPTO_CIPHER_OP_ENCRYPT;
+		xform_auth->auth.op = RTE_CRYPTO_AUTH_OP_GENERATE;
+		break;
+	default:
+		return -VIRTIO_CRYPTO_BADMSG;
+	}
+
+	/* cipher */
+	ret = cipher_algo_transform(param->cipher_algo);
+	if (unlikely(ret < 0))
+		return ret;
+	xform_cipher->type = RTE_CRYPTO_SYM_XFORM_CIPHER;
+	xform_cipher->cipher.algo = (uint32_t)ret;
+	xform_cipher->cipher.key.length = param->cipher_key_len;
+	xform_cipher->cipher.key.data = param->cipher_key_buf;
+	ret = get_iv_len(xform_cipher->cipher.algo);
+	if (unlikely(ret < 0))
+		return ret;
+	xform_cipher->cipher.iv.length = (uint16_t)ret;
+	xform_cipher->cipher.iv.offset = IV_OFFSET;
+
+	/* auth */
+	xform_auth->type = RTE_CRYPTO_SYM_XFORM_AUTH;
+	ret = auth_algo_transform(param->hash_algo);
+	if (unlikely(ret < 0))
+		return ret;
+	xform_auth->auth.algo = (uint32_t)ret;
+	xform_auth->auth.digest_length = param->digest_len;
+	xform_auth->auth.key.length = param->auth_key_len;
+	xform_auth->auth.key.data = param->auth_key_buf;
+
+	return 0;
+}
+
+static void
+vhost_crypto_create_sess(struct vhost_crypto *vcrypto,
+		VhostUserCryptoSessionParam *sess_param)
+{
+	struct rte_crypto_sym_xform xform1 = {0}, xform2 = {0};
+	struct rte_cryptodev_sym_session *session;
+	int ret;
+
+	switch (sess_param->op_type) {
+	case VIRTIO_CRYPTO_SYM_OP_NONE:
+	case VIRTIO_CRYPTO_SYM_OP_CIPHER:
+		ret = transform_cipher_param(&xform1, sess_param);
+		if (unlikely(ret)) {
+			VC_LOG_ERR("Error transform session msg (%i)", ret);
+			sess_param->session_id = ret;
+			return;
+		}
+		break;
+	case VIRTIO_CRYPTO_SYM_OP_ALGORITHM_CHAINING:
+		if (unlikely(sess_param->hash_mode !=
+				VIRTIO_CRYPTO_SYM_HASH_MODE_AUTH)) {
+			sess_param->session_id = -VIRTIO_CRYPTO_NOTSUPP;
+			VC_LOG_ERR("Error transform session message (%i)",
+					-VIRTIO_CRYPTO_NOTSUPP);
+			return;
+		}
+
+		xform1.next = &xform2;
+
+		ret = transform_chain_param(&xform1, sess_param);
+		if (unlikely(ret)) {
+			VC_LOG_ERR("Error transform session message (%i)", ret);
+			sess_param->session_id = ret;
+			return;
+		}
+
+		break;
+	default:
+		VC_LOG_ERR("Algorithm not yet supported");
+		sess_param->session_id = -VIRTIO_CRYPTO_NOTSUPP;
+		return;
+	}
+
+	session = rte_cryptodev_sym_session_create(vcrypto->sess_pool);
+	if (!session) {
+		VC_LOG_ERR("Failed to create session");
+		sess_param->session_id = -VIRTIO_CRYPTO_ERR;
+		return;
+	}
+
+	if (rte_cryptodev_sym_session_init(vcrypto->cid, session, &xform1,
+			vcrypto->sess_pool) < 0) {
+		VC_LOG_ERR("Failed to initialize session");
+		sess_param->session_id = -VIRTIO_CRYPTO_ERR;
+		return;
+	}
+
+	/* insert hash to map */
+	if (rte_hash_add_key_data(vcrypto->session_map,
+			&vcrypto->last_session_id, session) < 0) {
+		VC_LOG_ERR("Failed to insert session to hash table");
+
+		if (rte_cryptodev_sym_session_clear(vcrypto->cid, session) < 0)
+			VC_LOG_ERR("Failed to clear session");
+		else {
+			if (rte_cryptodev_sym_session_free(session) < 0)
+				VC_LOG_ERR("Failed to free session");
+		}
+		sess_param->session_id = -VIRTIO_CRYPTO_ERR;
+		return;
+	}
+
+	VC_LOG_INFO("Session %"PRIu64" created for vdev %i.",
+			vcrypto->last_session_id, vcrypto->dev->vid);
+
+	sess_param->session_id = vcrypto->last_session_id;
+	vcrypto->last_session_id++;
+}
+
+static int
+vhost_crypto_close_sess(struct vhost_crypto *vcrypto, uint64_t session_id)
+{
+	struct rte_cryptodev_sym_session *session;
+	uint64_t sess_id = session_id;
+	int ret;
+
+	ret = rte_hash_lookup_data(vcrypto->session_map, &sess_id,
+			(void **)&session);
+
+	if (unlikely(ret < 0)) {
+		VC_LOG_ERR("Failed to delete session %"PRIu64".", session_id);
+		return -VIRTIO_CRYPTO_INVSESS;
+	}
+
+	if (rte_cryptodev_sym_session_clear(vcrypto->cid, session) < 0) {
+		VC_LOG_DBG("Failed to clear session");
+		return -VIRTIO_CRYPTO_ERR;
+	}
+
+	if (rte_cryptodev_sym_session_free(session) < 0) {
+		VC_LOG_DBG("Failed to free session");
+		return -VIRTIO_CRYPTO_ERR;
+	}
+
+	if (rte_hash_del_key(vcrypto->session_map, &sess_id) < 0) {
+		VC_LOG_DBG("Failed to delete session from hash table.");
+		return -VIRTIO_CRYPTO_ERR;
+	}
+
+	VC_LOG_INFO("Session %"PRIu64" deleted for vdev %i.", sess_id,
+			vcrypto->dev->vid);
+
+	return 0;
+}
+
+static int
+vhost_crypto_msg_post_handler(int vid, void *msg, uint32_t *require_reply)
+{
+	struct virtio_net *dev = get_device(vid);
+	struct vhost_crypto *vcrypto;
+	VhostUserMsg *vmsg = msg;
+	int ret = 0;
+
+	if (dev == NULL || require_reply == NULL) {
+		VC_LOG_ERR("Invalid vid %i", vid);
+		return -EINVAL;
+	}
+
+	vcrypto = dev->extern_data;
+	if (vcrypto == NULL) {
+		VC_LOG_ERR("Cannot find required data, is it initialized?");
+		return -ENOENT;
+	}
+
+	*require_reply = 0;
+
+	if (vmsg->request.master == VHOST_USER_CRYPTO_CREATE_SESS) {
+		vhost_crypto_create_sess(vcrypto,
+				&vmsg->payload.crypto_session);
+		*require_reply = 1;
+	} else if (vmsg->request.master == VHOST_USER_CRYPTO_CLOSE_SESS)
+		ret = vhost_crypto_close_sess(vcrypto, vmsg->payload.u64);
+	else
+		ret = -EINVAL;
+
+	return ret;
+}
+
+static __rte_always_inline struct vring_desc *
+find_write_desc(struct vring_desc *head, struct vring_desc *desc)
+{
+	if (desc->flags & VRING_DESC_F_WRITE)
+		return desc;
+
+	while (desc->flags & VRING_DESC_F_NEXT) {
+		desc = &head[desc->next];
+		if (desc->flags & VRING_DESC_F_WRITE)
+			return desc;
+	}
+
+	return NULL;
+}
+
+static struct virtio_crypto_inhdr *
+reach_inhdr(struct vhost_crypto_data_req *vc_req, struct vring_desc *desc)
+{
+	uint64_t dlen;
+	struct virtio_crypto_inhdr *inhdr;
+
+	while (desc->flags & VRING_DESC_F_NEXT)
+		desc = &vc_req->head[desc->next];
+
+	dlen = desc->len;
+	inhdr = IOVA_TO_VVA(struct virtio_crypto_inhdr *, vc_req, desc->addr,
+			&dlen, VHOST_ACCESS_WO);
+	if (unlikely(!inhdr || dlen != desc->len))
+		return NULL;
+
+	return inhdr;
+}
+
+static __rte_always_inline int
+move_desc(struct vring_desc *head, struct vring_desc **cur_desc,
+		uint32_t size)
+{
+	struct vring_desc *desc = *cur_desc;
+	int left = size;
+
+	rte_prefetch0(&head[desc->next]);
+	left -= desc->len;
+
+	while ((desc->flags & VRING_DESC_F_NEXT) && left > 0) {
+		desc = &head[desc->next];
+		rte_prefetch0(&head[desc->next]);
+		left -= desc->len;
+	}
+
+	if (unlikely(left > 0)) {
+		VC_LOG_ERR("Incorrect virtio descriptor");
+		return -1;
+	}
+
+	*cur_desc = &head[desc->next];
+	return 0;
+}
+
+static int
+copy_data(void *dst_data, struct vhost_crypto_data_req *vc_req,
+		struct vring_desc **cur_desc, uint32_t size)
+{
+	struct vring_desc *desc = *cur_desc;
+	uint64_t remain, addr, dlen, len;
+	uint32_t to_copy;
+	uint8_t *data = dst_data;
+	uint8_t *src;
+	int left = size;
+
+	rte_prefetch0(&vc_req->head[desc->next]);
+	to_copy = RTE_MIN(desc->len, (uint32_t)left);
+	dlen = to_copy;
+	src = IOVA_TO_VVA(uint8_t *, vc_req, desc->addr, &dlen,
+			VHOST_ACCESS_RO);
+	if (unlikely(!src || !dlen)) {
+		VC_LOG_ERR("Failed to map descriptor");
+		return -1;
+	}
+
+	rte_memcpy((uint8_t *)data, src, dlen);
+	data += dlen;
+
+	if (unlikely(dlen < to_copy)) {
+		remain = to_copy - dlen;
+		addr = desc->addr + dlen;
+
+		while (remain) {
+			len = remain;
+			src = IOVA_TO_VVA(uint8_t *, vc_req, addr, &len,
+					VHOST_ACCESS_RO);
+			if (unlikely(!src || !len)) {
+				VC_LOG_ERR("Failed to map descriptor");
+				return -1;
+			}
+
+			rte_memcpy(data, src, len);
+			addr += len;
+			remain -= len;
+			data += len;
+		}
+	}
+
+	left -= to_copy;
+
+	while ((desc->flags & VRING_DESC_F_NEXT) && left > 0) {
+		desc = &vc_req->head[desc->next];
+		rte_prefetch0(&vc_req->head[desc->next]);
+		to_copy = RTE_MIN(desc->len, (uint32_t)left);
+		dlen = desc->len;
+		src = IOVA_TO_VVA(uint8_t *, vc_req, desc->addr, &dlen,
+				VHOST_ACCESS_RO);
+		if (unlikely(!src || !dlen)) {
+			VC_LOG_ERR("Failed to map descriptor");
+			return -1;
+		}
+
+		rte_memcpy(data, src, dlen);
+		data += dlen;
+
+		if (unlikely(dlen < to_copy)) {
+			remain = to_copy - dlen;
+			addr = desc->addr + dlen;
+
+			while (remain) {
+				len = remain;
+				src = IOVA_TO_VVA(uint8_t *, vc_req, addr, &len,
+						VHOST_ACCESS_RO);
+				if (unlikely(!src || !len)) {
+					VC_LOG_ERR("Failed to map descriptor");
+					return -1;
+				}
+
+				rte_memcpy(data, src, len);
+				addr += len;
+				remain -= len;
+				data += len;
+			}
+		}
+
+		left -= to_copy;
+	}
+
+	if (unlikely(left > 0)) {
+		VC_LOG_ERR("Incorrect virtio descriptor");
+		return -1;
+	}
+
+	*cur_desc = &vc_req->head[desc->next];
+
+	return 0;
+}
+
+static __rte_always_inline void *
+get_data_ptr(struct vhost_crypto_data_req *vc_req, struct vring_desc **cur_desc,
+		uint32_t size, uint8_t perm)
+{
+	void *data;
+	uint64_t dlen = (*cur_desc)->len;
+
+	data = IOVA_TO_VVA(void *, vc_req, (*cur_desc)->addr, &dlen, perm);
+	if (unlikely(!data || dlen != (*cur_desc)->len)) {
+		VC_LOG_ERR("Failed to map object");
+		return NULL;
+	}
+
+	if (unlikely(move_desc(vc_req->head, cur_desc, size) < 0))
+		return NULL;
+
+	return data;
+}
+
+static int
+write_back_data(struct rte_crypto_op *op, struct vhost_crypto_data_req *vc_req)
+{
+	struct rte_mbuf *mbuf = op->sym->m_dst;
+	struct vring_desc *head = vc_req->head;
+	struct vring_desc *desc = vc_req->wb_desc;
+	int left = vc_req->wb_len;
+	uint32_t to_write;
+	uint8_t *src_data = mbuf->buf_addr, *dst;
+	uint64_t dlen;
+
+	rte_prefetch0(&head[desc->next]);
+	to_write = RTE_MIN(desc->len, (uint32_t)left);
+	dlen = desc->len;
+	dst = IOVA_TO_VVA(uint8_t *, vc_req, desc->addr, &dlen,
+			VHOST_ACCESS_RW);
+	if (unlikely(!dst || dlen != desc->len)) {
+		VC_LOG_ERR("Failed to map descriptor");
+		return -1;
+	}
+
+	rte_memcpy(dst, src_data, to_write);
+	left -= to_write;
+	src_data += to_write;
+
+	while ((desc->flags & VRING_DESC_F_NEXT) && left > 0) {
+		desc = &head[desc->next];
+		rte_prefetch0(&head[desc->next]);
+		to_write = RTE_MIN(desc->len, (uint32_t)left);
+		dlen = desc->len;
+		dst = IOVA_TO_VVA(uint8_t *, vc_req, desc->addr, &dlen,
+				VHOST_ACCESS_RW);
+		if (unlikely(!dst || dlen != desc->len)) {
+			VC_LOG_ERR("Failed to map descriptor");
+			return -1;
+		}
+
+		rte_memcpy(dst, src_data, to_write);
+		left -= to_write;
+		src_data += to_write;
+	}
+
+	if (unlikely(left < 0)) {
+		VC_LOG_ERR("Incorrect virtio descriptor");
+		return -1;
+	}
+
+	return 0;
+}
+
+static uint8_t
+prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
+		struct vhost_crypto_data_req *vc_req,
+		struct virtio_crypto_cipher_data_req *cipher,
+		struct vring_desc *cur_desc)
+{
+	struct vring_desc *desc = cur_desc;
+	struct rte_mbuf *m_src = op->sym->m_src, *m_dst = op->sym->m_dst;
+	uint8_t *iv_data = rte_crypto_op_ctod_offset(op, uint8_t *, IV_OFFSET);
+	uint8_t ret = 0;
+
+	/* prepare */
+	/* iv */
+	if (unlikely(copy_data(iv_data, vc_req, &desc,
+			cipher->para.iv_len) < 0)) {
+		ret = VIRTIO_CRYPTO_BADMSG;
+		goto error_exit;
+	}
+
+	m_src->data_len = cipher->para.src_data_len;
+
+	switch (vcrypto->option) {
+	case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE:
+		m_src->buf_iova = gpa_to_hpa(vcrypto->dev, desc->addr,
+				cipher->para.src_data_len);
+		m_src->buf_addr = get_data_ptr(vc_req, &desc,
+				cipher->para.src_data_len, VHOST_ACCESS_RO);
+		if (unlikely(m_src->buf_iova == 0 ||
+				m_src->buf_addr == NULL)) {
+			VC_LOG_ERR("zero_copy may fail due to cross page data");
+			ret = VIRTIO_CRYPTO_ERR;
+			goto error_exit;
+		}
+		break;
+	case RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE:
+		if (unlikely(cipher->para.src_data_len >
+				RTE_MBUF_DEFAULT_BUF_SIZE)) {
+			VC_LOG_ERR("Not enough space to do data copy");
+			ret = VIRTIO_CRYPTO_ERR;
+			goto error_exit;
+		}
+		if (unlikely(copy_data(rte_pktmbuf_mtod(m_src, uint8_t *),
+				vc_req, &desc, cipher->para.src_data_len)
+				< 0)) {
+			ret = VIRTIO_CRYPTO_BADMSG;
+			goto error_exit;
+		}
+		break;
+	default:
+		ret = VIRTIO_CRYPTO_BADMSG;
+		goto error_exit;
+	}
+
+	/* dst */
+	desc = find_write_desc(vc_req->head, desc);
+	if (unlikely(!desc)) {
+		VC_LOG_ERR("Cannot find write location");
+		ret = VIRTIO_CRYPTO_BADMSG;
+		goto error_exit;
+	}
+
+	switch (vcrypto->option) {
+	case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE:
+		m_dst->buf_iova = gpa_to_hpa(vcrypto->dev,
+				desc->addr, cipher->para.dst_data_len);
+		m_dst->buf_addr = get_data_ptr(vc_req, &desc,
+				cipher->para.dst_data_len, VHOST_ACCESS_RW);
+		if (unlikely(m_dst->buf_iova == 0 || m_dst->buf_addr == NULL)) {
+			VC_LOG_ERR("zero_copy may fail due to cross page data");
+			ret = VIRTIO_CRYPTO_ERR;
+			goto error_exit;
+		}
+
+		m_dst->data_len = cipher->para.dst_data_len;
+		break;
+	case RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE:
+		vc_req->wb_desc = desc;
+		vc_req->wb_len = cipher->para.dst_data_len;
+		if (unlikely(move_desc(vc_req->head, &desc,
+				vc_req->wb_len) < 0)) {
+			ret = VIRTIO_CRYPTO_ERR;
+			goto error_exit;
+		}
+		break;
+	default:
+		ret = VIRTIO_CRYPTO_BADMSG;
+		goto error_exit;
+	}
+
+	/* src data */
+	op->type = RTE_CRYPTO_OP_TYPE_SYMMETRIC;
+	op->sess_type = RTE_CRYPTO_OP_WITH_SESSION;
+
+	op->sym->cipher.data.offset = 0;
+	op->sym->cipher.data.length = cipher->para.src_data_len;
+
+	vc_req->inhdr = get_data_ptr(vc_req, &desc, INHDR_LEN, VHOST_ACCESS_WO);
+	if (unlikely(vc_req->inhdr == NULL)) {
+		ret = VIRTIO_CRYPTO_BADMSG;
+		goto error_exit;
+	}
+
+	vc_req->inhdr->status = VIRTIO_CRYPTO_OK;
+	vc_req->len = cipher->para.dst_data_len + INHDR_LEN;
+
+	return 0;
+
+error_exit:
+	vc_req->len = INHDR_LEN;
+	return ret;
+}
+
+static uint8_t
+prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
+		struct vhost_crypto_data_req *vc_req,
+		struct virtio_crypto_alg_chain_data_req *chain,
+		struct vring_desc *cur_desc)
+{
+	struct vring_desc *desc = cur_desc;
+	struct rte_mbuf *m_src = op->sym->m_src, *m_dst = op->sym->m_dst;
+	uint8_t *iv_data = rte_crypto_op_ctod_offset(op, uint8_t *, IV_OFFSET);
+	uint32_t digest_offset;
+	void *digest_addr;
+	uint8_t ret = 0;
+
+	/* prepare */
+	/* iv */
+	if (unlikely(copy_data(iv_data, vc_req, &desc,
+			chain->para.iv_len) < 0)) {
+		ret = VIRTIO_CRYPTO_BADMSG;
+		goto error_exit;
+	}
+
+	m_src->data_len = chain->para.src_data_len;
+	m_dst->data_len = chain->para.dst_data_len;
+
+	switch (vcrypto->option) {
+	case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE:
+		m_src->buf_iova = gpa_to_hpa(vcrypto->dev, desc->addr,
+				chain->para.src_data_len);
+		m_src->buf_addr = get_data_ptr(vc_req, &desc,
+				chain->para.src_data_len, VHOST_ACCESS_RO);
+		if (unlikely(m_src->buf_iova == 0 || m_src->buf_addr == NULL)) {
+			VC_LOG_ERR("zero_copy may fail due to cross page data");
+			ret = VIRTIO_CRYPTO_ERR;
+			goto error_exit;
+		}
+		break;
+	case RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE:
+		if (unlikely(chain->para.src_data_len >
+				RTE_MBUF_DEFAULT_BUF_SIZE)) {
+			VC_LOG_ERR("Not enough space to do data copy");
+			ret = VIRTIO_CRYPTO_ERR;
+			goto error_exit;
+		}
+		if (unlikely(copy_data(rte_pktmbuf_mtod(m_src, uint8_t *),
+				vc_req, &desc, chain->para.src_data_len)) < 0) {
+			ret = VIRTIO_CRYPTO_BADMSG;
+			goto error_exit;
+		}
+		break;
+	default:
+		ret = VIRTIO_CRYPTO_BADMSG;
+		goto error_exit;
+	}
+
+	/* dst */
+	desc = find_write_desc(vc_req->head, desc);
+	if (unlikely(!desc)) {
+		VC_LOG_ERR("Cannot find write location");
+		ret = VIRTIO_CRYPTO_BADMSG;
+		goto error_exit;
+	}
+
+	switch (vcrypto->option) {
+	case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE:
+		m_dst->buf_iova = gpa_to_hpa(vcrypto->dev,
+				desc->addr, chain->para.dst_data_len);
+		m_dst->buf_addr = get_data_ptr(vc_req, &desc,
+				chain->para.dst_data_len, VHOST_ACCESS_RW);
+		if (unlikely(m_dst->buf_iova == 0 || m_dst->buf_addr == NULL)) {
+			VC_LOG_ERR("zero_copy may fail due to cross page data");
+			ret = VIRTIO_CRYPTO_ERR;
+			goto error_exit;
+		}
+
+		op->sym->auth.digest.phys_addr = gpa_to_hpa(vcrypto->dev,
+				desc->addr, chain->para.hash_result_len);
+		op->sym->auth.digest.data = get_data_ptr(vc_req, &desc,
+				chain->para.hash_result_len, VHOST_ACCESS_RW);
+		if (unlikely(op->sym->auth.digest.phys_addr == 0)) {
+			VC_LOG_ERR("zero_copy may fail due to cross page data");
+			ret = VIRTIO_CRYPTO_ERR;
+			goto error_exit;
+		}
+		break;
+	case RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE:
+		digest_offset = m_dst->data_len;
+		digest_addr = rte_pktmbuf_mtod_offset(m_dst, void *,
+				digest_offset);
+
+		vc_req->wb_desc = desc;
+		vc_req->wb_len = m_dst->data_len + chain->para.hash_result_len;
+
+		if (unlikely(move_desc(vc_req->head, &desc,
+				chain->para.dst_data_len) < 0)) {
+			ret = VIRTIO_CRYPTO_BADMSG;
+			goto error_exit;
+		}
+
+		if (unlikely(copy_data(digest_addr, vc_req, &desc,
+				chain->para.hash_result_len)) < 0) {
+			ret = VIRTIO_CRYPTO_BADMSG;
+			goto error_exit;
+		}
+
+		op->sym->auth.digest.data = digest_addr;
+		op->sym->auth.digest.phys_addr = rte_pktmbuf_iova_offset(m_dst,
+				digest_offset);
+		break;
+	default:
+		ret = VIRTIO_CRYPTO_BADMSG;
+		goto error_exit;
+	}
+
+	/* record inhdr */
+	vc_req->inhdr = get_data_ptr(vc_req, &desc, INHDR_LEN, VHOST_ACCESS_WO);
+	if (unlikely(vc_req->inhdr == NULL)) {
+		ret = VIRTIO_CRYPTO_BADMSG;
+		goto error_exit;
+	}
+
+	vc_req->inhdr->status = VIRTIO_CRYPTO_OK;
+
+	op->type = RTE_CRYPTO_OP_TYPE_SYMMETRIC;
+	op->sess_type = RTE_CRYPTO_OP_WITH_SESSION;
+
+	op->sym->cipher.data.offset = chain->para.cipher_start_src_offset;
+	op->sym->cipher.data.length = chain->para.src_data_len -
+			chain->para.cipher_start_src_offset;
+
+	op->sym->auth.data.offset = chain->para.hash_start_src_offset;
+	op->sym->auth.data.length = chain->para.len_to_hash;
+
+	vc_req->len = chain->para.dst_data_len + chain->para.hash_result_len +
+			INHDR_LEN;
+	return 0;
+
+error_exit:
+	vc_req->len = INHDR_LEN;
+	return ret;
+}
+
+/**
+ * Process on descriptor
+ */
+static __rte_always_inline int
+vhost_crypto_process_one_req(struct vhost_crypto *vcrypto,
+		struct vhost_virtqueue *vq, struct rte_crypto_op *op,
+		struct vring_desc *head, uint16_t desc_idx)
+{
+	struct vhost_crypto_data_req *vc_req = rte_mbuf_to_priv(op->sym->m_src);
+	struct rte_cryptodev_sym_session *session;
+	struct virtio_crypto_op_data_req *req, tmp_req;
+	struct virtio_crypto_inhdr *inhdr;
+	struct vring_desc *desc = NULL;
+	uint64_t session_id;
+	uint64_t dlen;
+	int err = 0;
+
+	vc_req->desc_idx = desc_idx;
+	vc_req->dev = vcrypto->dev;
+	vc_req->vq = vq;
+
+	if (likely(head->flags & VRING_DESC_F_INDIRECT)) {
+		dlen = head->len;
+		desc = IOVA_TO_VVA(struct vring_desc *, vc_req, head->addr,
+				&dlen, VHOST_ACCESS_RO);
+		if (unlikely(!desc || dlen != head->len))
+			return -1;
+		desc_idx = 0;
+		head = desc;
+	} else {
+		desc = head;
+	}
+
+	vc_req->head = head;
+	vc_req->zero_copy = vcrypto->option;
+
+	req = get_data_ptr(vc_req, &desc, sizeof(*req), VHOST_ACCESS_RO);
+	if (unlikely(req == NULL)) {
+		switch (vcrypto->option) {
+		case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE:
+			err = VIRTIO_CRYPTO_BADMSG;
+			VC_LOG_ERR("Invalid descriptor");
+			goto error_exit;
+		case RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE:
+			req = &tmp_req;
+			if (unlikely(copy_data(req, vc_req, &desc, sizeof(*req))
+					< 0)) {
+				err = VIRTIO_CRYPTO_BADMSG;
+				VC_LOG_ERR("Invalid descriptor");
+				goto error_exit;
+			}
+			break;
+		default:
+			err = VIRTIO_CRYPTO_ERR;
+			VC_LOG_ERR("Invalid option");
+			goto error_exit;
+		}
+	}
+
+	switch (req->header.opcode) {
+	case VIRTIO_CRYPTO_CIPHER_ENCRYPT:
+	case VIRTIO_CRYPTO_CIPHER_DECRYPT:
+		session_id = req->header.session_id;
+
+		/* one branch to avoid unnecessary table lookup */
+		if (vcrypto->cache_session_id != session_id) {
+			err = rte_hash_lookup_data(vcrypto->session_map,
+					&session_id, (void **)&session);
+			if (unlikely(err < 0)) {
+				err = VIRTIO_CRYPTO_ERR;
+				VC_LOG_ERR("Failed to find session %"PRIu64,
+						session_id);
+				goto error_exit;
+			}
+
+			vcrypto->cache_session = session;
+			vcrypto->cache_session_id = session_id;
+		}
+
+		session = vcrypto->cache_session;
+
+		err = rte_crypto_op_attach_sym_session(op, session);
+		if (unlikely(err < 0)) {
+			err = VIRTIO_CRYPTO_ERR;
+			VC_LOG_ERR("Failed to attach session to op");
+			goto error_exit;
+		}
+
+		switch (req->u.sym_req.op_type) {
+		case VIRTIO_CRYPTO_SYM_OP_NONE:
+			err = VIRTIO_CRYPTO_NOTSUPP;
+			break;
+		case VIRTIO_CRYPTO_SYM_OP_CIPHER:
+			err = prepare_sym_cipher_op(vcrypto, op, vc_req,
+					&req->u.sym_req.u.cipher, desc);
+			break;
+		case VIRTIO_CRYPTO_SYM_OP_ALGORITHM_CHAINING:
+			err = prepare_sym_chain_op(vcrypto, op, vc_req,
+					&req->u.sym_req.u.chain, desc);
+			break;
+		}
+		if (unlikely(err != 0)) {
+			VC_LOG_ERR("Failed to process sym request");
+			goto error_exit;
+		}
+		break;
+	default:
+		VC_LOG_ERR("Unsupported symmetric crypto request type %u",
+				req->header.opcode);
+		goto error_exit;
+	}
+
+	return 0;
+
+error_exit:
+
+	inhdr = reach_inhdr(vc_req, desc);
+	if (likely(inhdr != NULL))
+		inhdr->status = (uint8_t)err;
+
+	return -1;
+}
+
+static __rte_always_inline struct vhost_virtqueue *
+vhost_crypto_finalize_one_request(struct rte_crypto_op *op,
+		struct vhost_virtqueue *old_vq)
+{
+	struct rte_mbuf *m_src = op->sym->m_src;
+	struct rte_mbuf *m_dst = op->sym->m_dst;
+	struct vhost_crypto_data_req *vc_req = rte_mbuf_to_priv(m_src);
+	uint16_t desc_idx;
+	int ret = 0;
+
+	if (unlikely(!vc_req)) {
+		VC_LOG_ERR("Failed to retrieve vc_req");
+		return NULL;
+	}
+
+	if (old_vq && (vc_req->vq != old_vq))
+		return vc_req->vq;
+
+	desc_idx = vc_req->desc_idx;
+
+	if (unlikely(op->status != RTE_CRYPTO_OP_STATUS_SUCCESS))
+		vc_req->inhdr->status = VIRTIO_CRYPTO_ERR;
+	else {
+		if (vc_req->zero_copy == 0) {
+			ret = write_back_data(op, vc_req);
+			if (unlikely(ret != 0))
+				vc_req->inhdr->status = VIRTIO_CRYPTO_ERR;
+		}
+	}
+
+	vc_req->vq->used->ring[desc_idx].id = desc_idx;
+	vc_req->vq->used->ring[desc_idx].len = vc_req->len;
+
+	rte_mempool_put(m_dst->pool, (void *)m_dst);
+	rte_mempool_put(m_src->pool, (void *)m_src);
+
+	return vc_req->vq;
+}
+
+static __rte_always_inline uint16_t
+vhost_crypto_complete_one_vm_requests(struct rte_crypto_op **ops,
+		uint16_t nb_ops, int *callfd)
+{
+	uint16_t processed = 1;
+	struct vhost_virtqueue *vq, *tmp_vq;
+
+	if (unlikely(nb_ops == 0))
+		return 0;
+
+	vq = vhost_crypto_finalize_one_request(ops[0], NULL);
+	if (unlikely(vq == NULL))
+		return 0;
+	tmp_vq = vq;
+
+	while ((processed < nb_ops)) {
+		tmp_vq = vhost_crypto_finalize_one_request(ops[processed],
+				tmp_vq);
+
+		if (unlikely(vq != tmp_vq))
+			break;
+
+		processed++;
+	}
+
+	*callfd = vq->callfd;
+
+	*(volatile uint16_t *)&vq->used->idx += processed;
+
+	return processed;
+}
+
+int __rte_experimental
+rte_vhost_crypto_create(int vid, uint8_t cryptodev_id,
+		struct rte_mempool *sess_pool, int socket_id)
+{
+	struct virtio_net *dev = get_device(vid);
+	struct rte_hash_parameters params = {0};
+	struct vhost_crypto *vcrypto;
+	char name[128];
+	int ret;
+
+	if (!dev) {
+		VC_LOG_ERR("Invalid vid %i", vid);
+		return -EINVAL;
+	}
+
+	ret = rte_vhost_driver_set_features(dev->ifname,
+			VIRTIO_CRYPTO_FEATURES);
+	if (ret < 0) {
+		VC_LOG_ERR("Error setting features");
+		return -1;
+	}
+
+	vcrypto = rte_zmalloc_socket(NULL, sizeof(*vcrypto),
+			RTE_CACHE_LINE_SIZE, socket_id);
+	if (!vcrypto) {
+		VC_LOG_ERR("Insufficient memory");
+		return -ENOMEM;
+	}
+
+	vcrypto->sess_pool = sess_pool;
+	vcrypto->cid = cryptodev_id;
+	vcrypto->cache_session_id = UINT64_MAX;
+	vcrypto->last_session_id = 1;
+	vcrypto->dev = dev;
+	vcrypto->option = RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE;
+
+	snprintf(name, 127, "HASH_VHOST_CRYPT_%u", (uint32_t)vid);
+	params.name = name;
+	params.entries = VHOST_CRYPTO_SESSION_MAP_ENTRIES;
+	params.hash_func = rte_jhash;
+	params.key_len = sizeof(uint64_t);
+	params.socket_id = socket_id;
+	vcrypto->session_map = rte_hash_create(&params);
+	if (!vcrypto->session_map) {
+		VC_LOG_ERR("Failed to creath session map");
+		ret = -ENOMEM;
+		goto error_exit;
+	}
+
+	snprintf(name, 127, "MBUF_POOL_VM_%u", (uint32_t)vid);
+	vcrypto->mbuf_pool = rte_pktmbuf_pool_create(name,
+			VHOST_CRYPTO_MBUF_POOL_SIZE, 512,
+			sizeof(struct vhost_crypto_data_req),
+			RTE_MBUF_DEFAULT_DATAROOM * 2 + RTE_PKTMBUF_HEADROOM,
+			rte_socket_id());
+	if (!vcrypto->mbuf_pool) {
+		VC_LOG_ERR("Failed to creath mbuf pool");
+		ret = -ENOMEM;
+		goto error_exit;
+	}
+
+	dev->extern_data = vcrypto;
+	dev->extern_ops.pre_msg_handle = NULL;
+	dev->extern_ops.post_msg_handle = vhost_crypto_msg_post_handler;
+
+	return 0;
+
+error_exit:
+	if (vcrypto->session_map)
+		rte_hash_free(vcrypto->session_map);
+	if (vcrypto->mbuf_pool)
+		rte_mempool_free(vcrypto->mbuf_pool);
+
+	rte_free(vcrypto);
+
+	return ret;
+}
+
+int __rte_experimental
+rte_vhost_crypto_free(int vid)
+{
+	struct virtio_net *dev = get_device(vid);
+	struct vhost_crypto *vcrypto;
+
+	if (unlikely(dev == NULL)) {
+		VC_LOG_ERR("Invalid vid %i", vid);
+		return -EINVAL;
+	}
+
+	vcrypto = dev->extern_data;
+	if (unlikely(vcrypto == NULL)) {
+		VC_LOG_ERR("Cannot find required data, is it initialized?");
+		return -ENOENT;
+	}
+
+	rte_hash_free(vcrypto->session_map);
+	rte_mempool_free(vcrypto->mbuf_pool);
+	rte_free(vcrypto);
+
+	dev->extern_data = NULL;
+	dev->extern_ops.pre_msg_handle = NULL;
+	dev->extern_ops.post_msg_handle = NULL;
+
+	return 0;
+}
+
+int __rte_experimental
+rte_vhost_crypto_set_zero_copy(int vid, enum rte_vhost_crypto_zero_copy option)
+{
+	struct virtio_net *dev = get_device(vid);
+	struct vhost_crypto *vcrypto;
+
+	if (unlikely(dev == NULL)) {
+		VC_LOG_ERR("Invalid vid %i", vid);
+		return -EINVAL;
+	}
+
+	if (unlikely((uint32_t)option >=
+				RTE_VHOST_CRYPTO_MAX_ZERO_COPY_OPTIONS)) {
+		VC_LOG_ERR("Invalid option %i", option);
+		return -EINVAL;
+	}
+
+	vcrypto = (struct vhost_crypto *)dev->extern_data;
+	if (unlikely(vcrypto == NULL)) {
+		VC_LOG_ERR("Cannot find required data, is it initialized?");
+		return -ENOENT;
+	}
+
+	if (vcrypto->option == (uint8_t)option)
+		return 0;
+
+	if (!(rte_mempool_full(vcrypto->mbuf_pool))) {
+		VC_LOG_ERR("Cannot update zero copy as mempool is not full");
+		return -EINVAL;
+	}
+
+	vcrypto->option = (uint8_t)option;
+
+	return 0;
+}
+
+uint16_t __rte_experimental
+rte_vhost_crypto_fetch_requests(int vid, uint32_t qid,
+		struct rte_crypto_op **ops, uint16_t nb_ops)
+{
+	struct rte_mbuf *mbufs[VHOST_CRYPTO_MAX_BURST_SIZE * 2];
+	struct virtio_net *dev = get_device(vid);
+	struct vhost_crypto *vcrypto;
+	struct vhost_virtqueue *vq;
+	uint16_t avail_idx;
+	uint16_t start_idx;
+	uint16_t required;
+	uint16_t count;
+	uint16_t i;
+
+	if (unlikely(dev == NULL)) {
+		VC_LOG_ERR("Invalid vid %i", vid);
+		return -EINVAL;
+	}
+
+	if (unlikely(qid >= VHOST_MAX_QUEUE_PAIRS)) {
+		VC_LOG_ERR("Invalid qid %u", qid);
+		return -EINVAL;
+	}
+
+	vcrypto = (struct vhost_crypto *)dev->extern_data;
+	if (unlikely(vcrypto == NULL)) {
+		VC_LOG_ERR("Cannot find required data, is it initialized?");
+		return -ENOENT;
+	}
+
+	vq = dev->virtqueue[qid];
+
+	avail_idx = *((volatile uint16_t *)&vq->avail->idx);
+	start_idx = vq->last_used_idx;
+	count = avail_idx - start_idx;
+	count = RTE_MIN(count, VHOST_CRYPTO_MAX_BURST_SIZE);
+	count = RTE_MIN(count, nb_ops);
+
+	if (unlikely(count == 0))
+		return 0;
+
+	/* for zero copy, we need 2 empty mbufs for src and dst, otherwise
+	 * we need only 1 mbuf as src and dst
+	 */
+	required = count * 2;
+	if (unlikely(rte_mempool_get_bulk(vcrypto->mbuf_pool, (void **)mbufs,
+			required) < 0)) {
+		VC_LOG_ERR("Insufficient memory");
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < count; i++) {
+		uint16_t used_idx = (start_idx + i) & (vq->size - 1);
+		uint16_t desc_idx = vq->avail->ring[used_idx];
+		struct vring_desc *head = &vq->desc[desc_idx];
+		struct rte_crypto_op *op = ops[i];
+
+		op->sym->m_src = mbufs[i * 2];
+		op->sym->m_dst = mbufs[i * 2 + 1];
+		op->sym->m_src->data_off = 0;
+		op->sym->m_dst->data_off = 0;
+
+		if (unlikely(vhost_crypto_process_one_req(vcrypto, vq, op, head,
+				desc_idx)) < 0)
+			break;
+	}
+
+	vq->last_used_idx += i;
+
+	return i;
+}
+
+uint16_t __rte_experimental
+rte_vhost_crypto_finalize_requests(struct rte_crypto_op **ops,
+		uint16_t nb_ops, int *callfds, uint16_t *nb_callfds)
+{
+	struct rte_crypto_op **tmp_ops = ops;
+	uint16_t count = 0, left = nb_ops;
+	int callfd;
+	uint16_t idx = 0;
+
+	while (left) {
+		count = vhost_crypto_complete_one_vm_requests(tmp_ops, left,
+				&callfd);
+		if (unlikely(count == 0))
+			break;
+
+		tmp_ops = &tmp_ops[count];
+		left -= count;
+
+		callfds[idx++] = callfd;
+
+		if (unlikely(idx >= VIRTIO_CRYPTO_MAX_NUM_BURST_VQS)) {
+			VC_LOG_ERR("Too many vqs");
+			break;
+		}
+	}
+
+	*nb_callfds = idx;
+
+	return nb_ops - left;
+}
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index 90ed2112..a2d4c9ff 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -1,5 +1,22 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2010-2016 Intel Corporation
+ * Copyright(c) 2010-2018 Intel Corporation
+ */
+
+/* Security model
+ * --------------
+ * The vhost-user protocol connection is an external interface, so it must be
+ * robust against invalid inputs.
+ *
+ * This is important because the vhost-user master is only one step removed
+ * from the guest.  Malicious guests that have escaped will then launch further
+ * attacks from the vhost-user master.
+ *
+ * Even in deployments where guests are trusted, a bug in the vhost-user master
+ * can still cause invalid messages to be sent.  Such messages must not
+ * compromise the stability of the DPDK application by causing crashes, memory
+ * corruption, or other problematic behavior.
+ *
+ * Do not assume received VhostUserMsg fields contain sensible values!
  */
 
 #include <stdint.h>
@@ -50,6 +67,8 @@ static const char *vhost_message_str[VHOST_USER_MAX] = {
 	[VHOST_USER_NET_SET_MTU]  = "VHOST_USER_NET_SET_MTU",
 	[VHOST_USER_SET_SLAVE_REQ_FD]  = "VHOST_USER_SET_SLAVE_REQ_FD",
 	[VHOST_USER_IOTLB_MSG]  = "VHOST_USER_IOTLB_MSG",
+	[VHOST_USER_CRYPTO_CREATE_SESS] = "VHOST_USER_CRYPTO_CREATE_SESS",
+	[VHOST_USER_CRYPTO_CLOSE_SESS] = "VHOST_USER_CRYPTO_CLOSE_SESS",
 };
 
 static uint64_t
@@ -116,10 +135,7 @@ vhost_user_set_owner(void)
 static int
 vhost_user_reset_owner(struct virtio_net *dev)
 {
-	if (dev->flags & VIRTIO_DEV_RUNNING) {
-		dev->flags &= ~VIRTIO_DEV_RUNNING;
-		dev->notify_ops->destroy_device(dev->vid);
-	}
+	vhost_destroy_device_notify(dev);
 
 	cleanup_device(dev, 0);
 	reset_device(dev);
@@ -139,12 +155,26 @@ vhost_user_get_features(struct virtio_net *dev)
 }
 
 /*
+ * The queue number that we support are requested.
+ */
+static uint32_t
+vhost_user_get_queue_num(struct virtio_net *dev)
+{
+	uint32_t queue_num = 0;
+
+	rte_vhost_driver_get_queue_num(dev->ifname, &queue_num);
+	return queue_num;
+}
+
+/*
  * We receive the negotiated features supported by us and the virtio device.
  */
 static int
 vhost_user_set_features(struct virtio_net *dev, uint64_t features)
 {
 	uint64_t vhost_features = 0;
+	struct rte_vdpa_device *vdpa_dev;
+	int did = -1;
 
 	rte_vhost_driver_get_features(dev->ifname, &vhost_features);
 	if (features & ~vhost_features) {
@@ -181,7 +211,7 @@ vhost_user_set_features(struct virtio_net *dev, uint64_t features)
 	} else {
 		dev->vhost_hlen = sizeof(struct virtio_net_hdr);
 	}
-	LOG_DEBUG(VHOST_CONFIG,
+	VHOST_LOG_DEBUG(VHOST_CONFIG,
 		"(%d) mergeable RX buffers %s, virtio 1 %s\n",
 		dev->vid,
 		(dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)) ? "on" : "off",
@@ -203,10 +233,15 @@ vhost_user_set_features(struct virtio_net *dev, uint64_t features)
 
 			dev->virtqueue[dev->nr_vring] = NULL;
 			cleanup_vq(vq, 1);
-			free_vq(vq);
+			free_vq(dev, vq);
 		}
 	}
 
+	did = dev->vdpa_dev_id;
+	vdpa_dev = rte_vdpa_get_device(did);
+	if (vdpa_dev && vdpa_dev->ops->set_features)
+		vdpa_dev->ops->set_features(dev->vid);
+
 	return 0;
 }
 
@@ -221,6 +256,17 @@ vhost_user_set_vring_num(struct virtio_net *dev,
 
 	vq->size = msg->payload.state.num;
 
+	/* VIRTIO 1.0, 2.4 Virtqueues says:
+	 *
+	 *   Queue Size value is always a power of 2. The maximum Queue Size
+	 *   value is 32768.
+	 */
+	if ((vq->size & (vq->size - 1)) || vq->size > 32768) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"invalid virtqueue size %u\n", vq->size);
+		return -1;
+	}
+
 	if (dev->dequeue_zero_copy) {
 		vq->nr_zmbuf = 0;
 		vq->last_zmbuf_idx = 0;
@@ -236,13 +282,26 @@ vhost_user_set_vring_num(struct virtio_net *dev,
 		TAILQ_INIT(&vq->zmbuf_list);
 	}
 
-	vq->shadow_used_ring = rte_malloc(NULL,
+	if (vq_is_packed(dev)) {
+		vq->shadow_used_packed = rte_malloc(NULL,
+				vq->size *
+				sizeof(struct vring_used_elem_packed),
+				RTE_CACHE_LINE_SIZE);
+		if (!vq->shadow_used_packed) {
+			RTE_LOG(ERR, VHOST_CONFIG,
+					"failed to allocate memory for shadow used ring.\n");
+			return -1;
+		}
+
+	} else {
+		vq->shadow_used_split = rte_malloc(NULL,
 				vq->size * sizeof(struct vring_used_elem),
 				RTE_CACHE_LINE_SIZE);
-	if (!vq->shadow_used_ring) {
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"failed to allocate memory for shadow used ring.\n");
-		return -1;
+		if (!vq->shadow_used_split) {
+			RTE_LOG(ERR, VHOST_CONFIG,
+					"failed to allocate memory for shadow used ring.\n");
+			return -1;
+		}
 	}
 
 	vq->batch_copy_elems = rte_malloc(NULL,
@@ -269,7 +328,8 @@ numa_realloc(struct virtio_net *dev, int index)
 	struct virtio_net *old_dev;
 	struct vhost_virtqueue *old_vq, *vq;
 	struct zcopy_mbuf *new_zmbuf;
-	struct vring_used_elem *new_shadow_used_ring;
+	struct vring_used_elem *new_shadow_used_split;
+	struct vring_used_elem_packed *new_shadow_used_packed;
 	struct batch_copy_elem *new_batch_copy_elems;
 	int ret;
 
@@ -304,13 +364,26 @@ numa_realloc(struct virtio_net *dev, int index)
 			vq->zmbufs = new_zmbuf;
 		}
 
-		new_shadow_used_ring = rte_malloc_socket(NULL,
-			vq->size * sizeof(struct vring_used_elem),
-			RTE_CACHE_LINE_SIZE,
-			newnode);
-		if (new_shadow_used_ring) {
-			rte_free(vq->shadow_used_ring);
-			vq->shadow_used_ring = new_shadow_used_ring;
+		if (vq_is_packed(dev)) {
+			new_shadow_used_packed = rte_malloc_socket(NULL,
+					vq->size *
+					sizeof(struct vring_used_elem_packed),
+					RTE_CACHE_LINE_SIZE,
+					newnode);
+			if (new_shadow_used_packed) {
+				rte_free(vq->shadow_used_packed);
+				vq->shadow_used_packed = new_shadow_used_packed;
+			}
+		} else {
+			new_shadow_used_split = rte_malloc_socket(NULL,
+					vq->size *
+					sizeof(struct vring_used_elem),
+					RTE_CACHE_LINE_SIZE,
+					newnode);
+			if (new_shadow_used_split) {
+				rte_free(vq->shadow_used_split);
+				vq->shadow_used_split = new_shadow_used_split;
+			}
 		}
 
 		new_batch_copy_elems = rte_malloc_socket(NULL,
@@ -366,21 +439,26 @@ numa_realloc(struct virtio_net *dev, int index __rte_unused)
 
 /* Converts QEMU virtual address to Vhost virtual address. */
 static uint64_t
-qva_to_vva(struct virtio_net *dev, uint64_t qva)
+qva_to_vva(struct virtio_net *dev, uint64_t qva, uint64_t *len)
 {
-	struct rte_vhost_mem_region *reg;
+	struct rte_vhost_mem_region *r;
 	uint32_t i;
 
 	/* Find the region where the address lives. */
 	for (i = 0; i < dev->mem->nregions; i++) {
-		reg = &dev->mem->regions[i];
+		r = &dev->mem->regions[i];
+
+		if (qva >= r->guest_user_addr &&
+		    qva <  r->guest_user_addr + r->size) {
 
-		if (qva >= reg->guest_user_addr &&
-		    qva <  reg->guest_user_addr + reg->size) {
-			return qva - reg->guest_user_addr +
-			       reg->host_user_addr;
+			if (unlikely(*len > r->guest_user_addr + r->size - qva))
+				*len = r->guest_user_addr + r->size - qva;
+
+			return qva - r->guest_user_addr +
+			       r->host_user_addr;
 		}
 	}
+	*len = 0;
 
 	return 0;
 }
@@ -393,20 +471,20 @@ qva_to_vva(struct virtio_net *dev, uint64_t qva)
  */
 static uint64_t
 ring_addr_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
-		uint64_t ra, uint64_t size)
+		uint64_t ra, uint64_t *size)
 {
 	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) {
 		uint64_t vva;
 
 		vva = vhost_user_iotlb_cache_find(vq, ra,
-					&size, VHOST_ACCESS_RW);
+					size, VHOST_ACCESS_RW);
 		if (!vva)
 			vhost_user_iotlb_miss(dev, ra, VHOST_ACCESS_RW);
 
 		return vva;
 	}
 
-	return qva_to_vva(dev, ra);
+	return qva_to_vva(dev, ra, size);
 }
 
 static struct virtio_net *
@@ -414,16 +492,63 @@ translate_ring_addresses(struct virtio_net *dev, int vq_index)
 {
 	struct vhost_virtqueue *vq = dev->virtqueue[vq_index];
 	struct vhost_vring_addr *addr = &vq->ring_addrs;
+	uint64_t len;
+
+	if (vq_is_packed(dev)) {
+		len = sizeof(struct vring_packed_desc) * vq->size;
+		vq->desc_packed = (struct vring_packed_desc *)(uintptr_t)
+			ring_addr_to_vva(dev, vq, addr->desc_user_addr, &len);
+		vq->log_guest_addr = 0;
+		if (vq->desc_packed == NULL ||
+				len != sizeof(struct vring_packed_desc) *
+				vq->size) {
+			RTE_LOG(DEBUG, VHOST_CONFIG,
+				"(%d) failed to map desc_packed ring.\n",
+				dev->vid);
+			return dev;
+		}
+
+		dev = numa_realloc(dev, vq_index);
+		vq = dev->virtqueue[vq_index];
+		addr = &vq->ring_addrs;
+
+		len = sizeof(struct vring_packed_desc_event);
+		vq->driver_event = (struct vring_packed_desc_event *)
+					(uintptr_t)ring_addr_to_vva(dev,
+					vq, addr->avail_user_addr, &len);
+		if (vq->driver_event == NULL ||
+				len != sizeof(struct vring_packed_desc_event)) {
+			RTE_LOG(DEBUG, VHOST_CONFIG,
+				"(%d) failed to find driver area address.\n",
+				dev->vid);
+			return dev;
+		}
+
+		len = sizeof(struct vring_packed_desc_event);
+		vq->device_event = (struct vring_packed_desc_event *)
+					(uintptr_t)ring_addr_to_vva(dev,
+					vq, addr->used_user_addr, &len);
+		if (vq->device_event == NULL ||
+				len != sizeof(struct vring_packed_desc_event)) {
+			RTE_LOG(DEBUG, VHOST_CONFIG,
+				"(%d) failed to find device area address.\n",
+				dev->vid);
+			return dev;
+		}
+
+		return dev;
+	}
 
 	/* The addresses are converted from QEMU virtual to Vhost virtual. */
 	if (vq->desc && vq->avail && vq->used)
 		return dev;
 
+	len = sizeof(struct vring_desc) * vq->size;
 	vq->desc = (struct vring_desc *)(uintptr_t)ring_addr_to_vva(dev,
-			vq, addr->desc_user_addr, sizeof(struct vring_desc));
-	if (vq->desc == 0) {
+			vq, addr->desc_user_addr, &len);
+	if (vq->desc == 0 || len != sizeof(struct vring_desc) * vq->size) {
 		RTE_LOG(DEBUG, VHOST_CONFIG,
-			"(%d) failed to find desc ring address.\n",
+			"(%d) failed to map desc ring.\n",
 			dev->vid);
 		return dev;
 	}
@@ -432,20 +557,26 @@ translate_ring_addresses(struct virtio_net *dev, int vq_index)
 	vq = dev->virtqueue[vq_index];
 	addr = &vq->ring_addrs;
 
+	len = sizeof(struct vring_avail) + sizeof(uint16_t) * vq->size;
 	vq->avail = (struct vring_avail *)(uintptr_t)ring_addr_to_vva(dev,
-			vq, addr->avail_user_addr, sizeof(struct vring_avail));
-	if (vq->avail == 0) {
+			vq, addr->avail_user_addr, &len);
+	if (vq->avail == 0 ||
+			len != sizeof(struct vring_avail) +
+			sizeof(uint16_t) * vq->size) {
 		RTE_LOG(DEBUG, VHOST_CONFIG,
-			"(%d) failed to find avail ring address.\n",
+			"(%d) failed to map avail ring.\n",
 			dev->vid);
 		return dev;
 	}
 
+	len = sizeof(struct vring_used) +
+		sizeof(struct vring_used_elem) * vq->size;
 	vq->used = (struct vring_used *)(uintptr_t)ring_addr_to_vva(dev,
-			vq, addr->used_user_addr, sizeof(struct vring_used));
-	if (vq->used == 0) {
+			vq, addr->used_user_addr, &len);
+	if (vq->used == 0 || len != sizeof(struct vring_used) +
+			sizeof(struct vring_used_elem) * vq->size) {
 		RTE_LOG(DEBUG, VHOST_CONFIG,
-			"(%d) failed to find used ring address.\n",
+			"(%d) failed to map used ring.\n",
 			dev->vid);
 		return dev;
 	}
@@ -461,13 +592,13 @@ translate_ring_addresses(struct virtio_net *dev, int vq_index)
 
 	vq->log_guest_addr = addr->log_guest_addr;
 
-	LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address desc: %p\n",
+	VHOST_LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address desc: %p\n",
 			dev->vid, vq->desc);
-	LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address avail: %p\n",
+	VHOST_LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address avail: %p\n",
 			dev->vid, vq->avail);
-	LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address used: %p\n",
+	VHOST_LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address used: %p\n",
 			dev->vid, vq->used);
-	LOG_DEBUG(VHOST_CONFIG, "(%d) log_guest_addr: %" PRIx64 "\n",
+	VHOST_LOG_DEBUG(VHOST_CONFIG, "(%d) log_guest_addr: %" PRIx64 "\n",
 			dev->vid, vq->log_guest_addr);
 
 	return dev;
@@ -500,7 +631,7 @@ vhost_user_set_vring_addr(struct virtio_net **pdev, VhostUserMsg *msg)
 
 	if (vq->enabled && (dev->features &
 				(1ULL << VHOST_USER_F_PROTOCOL_FEATURES))) {
-		dev = translate_ring_addresses(dev, msg->payload.state.index);
+		dev = translate_ring_addresses(dev, msg->payload.addr.index);
 		if (!dev)
 			return -1;
 
@@ -525,7 +656,7 @@ vhost_user_set_vring_base(struct virtio_net *dev,
 	return 0;
 }
 
-static void
+static int
 add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
 		   uint64_t host_phys_addr, uint64_t size)
 {
@@ -535,6 +666,10 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
 		dev->max_guest_pages *= 2;
 		dev->guest_pages = realloc(dev->guest_pages,
 					dev->max_guest_pages * sizeof(*page));
+		if (!dev->guest_pages) {
+			RTE_LOG(ERR, VHOST_CONFIG, "cannot realloc guest_pages\n");
+			return -1;
+		}
 	}
 
 	if (dev->nr_guest_pages > 0) {
@@ -543,7 +678,7 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
 		if (host_phys_addr == last_page->host_phys_addr +
 				      last_page->size) {
 			last_page->size += size;
-			return;
+			return 0;
 		}
 	}
 
@@ -551,9 +686,11 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
 	page->guest_phys_addr = guest_phys_addr;
 	page->host_phys_addr  = host_phys_addr;
 	page->size = size;
+
+	return 0;
 }
 
-static void
+static int
 add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg,
 		uint64_t page_size)
 {
@@ -567,7 +704,9 @@ add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg,
 	size = page_size - (guest_phys_addr & (page_size - 1));
 	size = RTE_MIN(size, reg_size);
 
-	add_one_guest_page(dev, guest_phys_addr, host_phys_addr, size);
+	if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr, size) < 0)
+		return -1;
+
 	host_user_addr  += size;
 	guest_phys_addr += size;
 	reg_size -= size;
@@ -576,12 +715,16 @@ add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg,
 		size = RTE_MIN(reg_size, page_size);
 		host_phys_addr = rte_mem_virt2iova((void *)(uintptr_t)
 						  host_user_addr);
-		add_one_guest_page(dev, guest_phys_addr, host_phys_addr, size);
+		if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr,
+				size) < 0)
+			return -1;
 
 		host_user_addr  += size;
 		guest_phys_addr += size;
 		reg_size -= size;
 	}
+
+	return 0;
 }
 
 #ifdef RTE_LIBRTE_VHOST_DEBUG
@@ -635,8 +778,9 @@ vhost_memory_changed(struct VhostUserMemory *new,
 }
 
 static int
-vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg)
+vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *pmsg)
 {
+	struct virtio_net *dev = *pdev;
 	struct VhostUserMemory memory = pmsg->payload.memory;
 	struct rte_vhost_mem_region *reg;
 	void *mmap_addr;
@@ -644,8 +788,15 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg)
 	uint64_t mmap_offset;
 	uint64_t alignment;
 	uint32_t i;
+	int populate;
 	int fd;
 
+	if (memory.nregions > VHOST_MEMORY_MAX_NREGIONS) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"too many memory regions (%u)\n", memory.nregions);
+		return -1;
+	}
+
 	if (dev->mem && !vhost_memory_changed(&memory, dev->mem)) {
 		RTE_LOG(INFO, VHOST_CONFIG,
 			"(%d) memory regions not changed\n", dev->vid);
@@ -662,6 +813,11 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg)
 		dev->mem = NULL;
 	}
 
+	/* Flush IOTLB cache as previous HVAs are now invalid */
+	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+		for (i = 0; i < dev->nr_vring; i++)
+			vhost_user_iotlb_flush_all(dev->virtqueue[i]);
+
 	dev->nr_guest_pages = 0;
 	if (!dev->guest_pages) {
 		dev->max_guest_pages = 8;
@@ -696,7 +852,17 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg)
 		reg->fd              = fd;
 
 		mmap_offset = memory.regions[i].mmap_offset;
-		mmap_size   = reg->size + mmap_offset;
+
+		/* Check for memory_size + mmap_offset overflow */
+		if (mmap_offset >= -reg->size) {
+			RTE_LOG(ERR, VHOST_CONFIG,
+				"mmap_offset (%#"PRIx64") and memory_size "
+				"(%#"PRIx64") overflow\n",
+				mmap_offset, reg->size);
+			goto err_mmap;
+		}
+
+		mmap_size = reg->size + mmap_offset;
 
 		/* mmap() without flag of MAP_ANONYMOUS, should be called
 		 * with length argument aligned with hugepagesz at older
@@ -714,8 +880,9 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg)
 		}
 		mmap_size = RTE_ALIGN_CEIL(mmap_size, alignment);
 
+		populate = (dev->dequeue_zero_copy) ? MAP_POPULATE : 0;
 		mmap_addr = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
-				 MAP_SHARED | MAP_POPULATE, fd, 0);
+				 MAP_SHARED | populate, fd, 0);
 
 		if (mmap_addr == MAP_FAILED) {
 			RTE_LOG(ERR, VHOST_CONFIG,
@@ -729,7 +896,12 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg)
 				      mmap_offset;
 
 		if (dev->dequeue_zero_copy)
-			add_guest_pages(dev, reg, alignment);
+			if (add_guest_pages(dev, reg, alignment) < 0) {
+				RTE_LOG(ERR, VHOST_CONFIG,
+					"adding guest pages to region %u failed.\n",
+					i);
+				goto err_mmap;
+			}
 
 		RTE_LOG(INFO, VHOST_CONFIG,
 			"guest memory region %u, size: 0x%" PRIx64 "\n"
@@ -750,6 +922,25 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg)
 			mmap_offset);
 	}
 
+	for (i = 0; i < dev->nr_vring; i++) {
+		struct vhost_virtqueue *vq = dev->virtqueue[i];
+
+		if (vq->desc || vq->avail || vq->used) {
+			/*
+			 * If the memory table got updated, the ring addresses
+			 * need to be translated again as virtual addresses have
+			 * changed.
+			 */
+			vring_invalidate(dev, vq);
+
+			dev = translate_ring_addresses(dev, i);
+			if (!dev)
+				return -1;
+
+			*pdev = dev;
+		}
+	}
+
 	dump_guest_pages(dev);
 
 	return 0;
@@ -761,10 +952,20 @@ err_mmap:
 	return -1;
 }
 
-static int
-vq_is_ready(struct vhost_virtqueue *vq)
+static bool
+vq_is_ready(struct virtio_net *dev, struct vhost_virtqueue *vq)
 {
-	return vq && vq->desc && vq->avail && vq->used &&
+	bool rings_ok;
+
+	if (!vq)
+		return false;
+
+	if (vq_is_packed(dev))
+		rings_ok = !!vq->desc_packed;
+	else
+		rings_ok = vq->desc && vq->avail && vq->used;
+
+	return rings_ok &&
 	       vq->kickfd != VIRTIO_UNINITIALIZED_EVENTFD &&
 	       vq->callfd != VIRTIO_UNINITIALIZED_EVENTFD;
 }
@@ -781,7 +982,7 @@ virtio_is_ready(struct virtio_net *dev)
 	for (i = 0; i < dev->nr_vring; i++) {
 		vq = dev->virtqueue[i];
 
-		if (!vq_is_ready(vq))
+		if (!vq_is_ready(dev, vq))
 			return 0;
 	}
 
@@ -874,15 +1075,13 @@ vhost_user_get_vring_base(struct virtio_net *dev,
 	struct vhost_virtqueue *vq = dev->virtqueue[msg->payload.state.index];
 
 	/* We have to stop the queue (virtio) if it is running. */
-	if (dev->flags & VIRTIO_DEV_RUNNING) {
-		dev->flags &= ~VIRTIO_DEV_RUNNING;
-		dev->notify_ops->destroy_device(dev->vid);
-	}
+	vhost_destroy_device_notify(dev);
 
 	dev->flags &= ~VIRTIO_DEV_READY;
+	dev->flags &= ~VIRTIO_DEV_VDPA_CONFIGURED;
 
-	/* Here we are safe to get the last used index */
-	msg->payload.state.num = vq->last_used_idx;
+	/* Here we are safe to get the last avail index */
+	msg->payload.state.num = vq->last_avail_idx;
 
 	RTE_LOG(INFO, VHOST_CONFIG,
 		"vring base idx:%d file:%d\n", msg->payload.state.index,
@@ -897,10 +1096,20 @@ vhost_user_get_vring_base(struct virtio_net *dev,
 
 	vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
 
+	if (vq->callfd >= 0)
+		close(vq->callfd);
+
+	vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
+
 	if (dev->dequeue_zero_copy)
 		free_zmbufs(vq);
-	rte_free(vq->shadow_used_ring);
-	vq->shadow_used_ring = NULL;
+	if (vq_is_packed(dev)) {
+		rte_free(vq->shadow_used_packed);
+		vq->shadow_used_packed = NULL;
+	} else {
+		rte_free(vq->shadow_used_split);
+		vq->shadow_used_split = NULL;
+	}
 
 	rte_free(vq->batch_copy_elems);
 	vq->batch_copy_elems = NULL;
@@ -917,16 +1126,24 @@ vhost_user_set_vring_enable(struct virtio_net *dev,
 			    VhostUserMsg *msg)
 {
 	int enable = (int)msg->payload.state.num;
+	int index = (int)msg->payload.state.index;
+	struct rte_vdpa_device *vdpa_dev;
+	int did = -1;
 
 	RTE_LOG(INFO, VHOST_CONFIG,
 		"set queue enable: %d to qp idx: %d\n",
-		enable, msg->payload.state.index);
+		enable, index);
+
+	did = dev->vdpa_dev_id;
+	vdpa_dev = rte_vdpa_get_device(did);
+	if (vdpa_dev && vdpa_dev->ops->set_vring_state)
+		vdpa_dev->ops->set_vring_state(dev->vid, index, enable);
 
 	if (dev->notify_ops->vring_state_changed)
 		dev->notify_ops->vring_state_changed(dev->vid,
-				msg->payload.state.index, enable);
+				index, enable);
 
-	dev->virtqueue[msg->payload.state.index]->enabled = enable;
+	dev->virtqueue[index]->enabled = enable;
 
 	return 0;
 }
@@ -935,9 +1152,10 @@ static void
 vhost_user_get_protocol_features(struct virtio_net *dev,
 				 struct VhostUserMsg *msg)
 {
-	uint64_t features, protocol_features = VHOST_USER_PROTOCOL_FEATURES;
+	uint64_t features, protocol_features;
 
 	rte_vhost_driver_get_features(dev->ifname, &features);
+	rte_vhost_driver_get_protocol_features(dev->ifname, &protocol_features);
 
 	/*
 	 * REPLY_ACK protocol feature is only mandatory for now
@@ -983,6 +1201,15 @@ vhost_user_set_log_base(struct virtio_net *dev, struct VhostUserMsg *msg)
 
 	size = msg->payload.log.mmap_size;
 	off  = msg->payload.log.mmap_offset;
+
+	/* Don't allow mmap_offset to point outside the mmap region */
+	if (off > size) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"log offset %#"PRIx64" exceeds log size %#"PRIx64"\n",
+			off, size);
+		return -1;
+	}
+
 	RTE_LOG(INFO, VHOST_CONFIG,
 		"log mmap size: %"PRId64", offset: %"PRId64"\n",
 		size, off);
@@ -991,7 +1218,7 @@ vhost_user_set_log_base(struct virtio_net *dev, struct VhostUserMsg *msg)
 	 * mmap from 0 to workaround a hugepage mmap bug: mmap will
 	 * fail when offset is not page size aligned.
 	 */
-	addr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+	addr = mmap(0, size + off, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
 	close(fd);
 	if (addr == MAP_FAILED) {
 		RTE_LOG(ERR, VHOST_CONFIG, "mmap log base failed!\n");
@@ -1024,6 +1251,8 @@ static int
 vhost_user_send_rarp(struct virtio_net *dev, struct VhostUserMsg *msg)
 {
 	uint8_t *mac = (uint8_t *)&msg->payload.u64;
+	struct rte_vdpa_device *vdpa_dev;
+	int did = -1;
 
 	RTE_LOG(DEBUG, VHOST_CONFIG,
 		":: mac: %02x:%02x:%02x:%02x:%02x:%02x\n",
@@ -1039,6 +1268,10 @@ vhost_user_send_rarp(struct virtio_net *dev, struct VhostUserMsg *msg)
 	 */
 	rte_smp_wmb();
 	rte_atomic16_set(&dev->broadcast_rarp, 1);
+	did = dev->vdpa_dev_id;
+	vdpa_dev = rte_vdpa_get_device(did);
+	if (vdpa_dev && vdpa_dev->ops->migration_done)
+		vdpa_dev->ops->migration_done(dev->vid);
 
 	return 0;
 }
@@ -1131,11 +1364,12 @@ vhost_user_iotlb_msg(struct virtio_net **pdev, struct VhostUserMsg *msg)
 	struct virtio_net *dev = *pdev;
 	struct vhost_iotlb_msg *imsg = &msg->payload.iotlb;
 	uint16_t i;
-	uint64_t vva;
+	uint64_t vva, len;
 
 	switch (imsg->type) {
 	case VHOST_IOTLB_UPDATE:
-		vva = qva_to_vva(dev, imsg->uaddr);
+		len = imsg->size;
+		vva = qva_to_vva(dev, imsg->uaddr, &len);
 		if (!vva)
 			return -1;
 
@@ -1143,7 +1377,7 @@ vhost_user_iotlb_msg(struct virtio_net **pdev, struct VhostUserMsg *msg)
 			struct vhost_virtqueue *vq = dev->virtqueue[i];
 
 			vhost_user_iotlb_cache_insert(vq, imsg->iova, vva,
-					imsg->size, imsg->perm);
+					len, imsg->perm);
 
 			if (is_vring_iotlb_update(vq, imsg))
 				*pdev = dev = translate_ring_addresses(dev, i);
@@ -1200,13 +1434,13 @@ read_vhost_message(int sockfd, struct VhostUserMsg *msg)
 }
 
 static int
-send_vhost_message(int sockfd, struct VhostUserMsg *msg)
+send_vhost_message(int sockfd, struct VhostUserMsg *msg, int *fds, int fd_num)
 {
 	if (!msg)
 		return 0;
 
 	return send_fd_message(sockfd, (char *)msg,
-		VHOST_USER_HDR_SIZE + msg->size, NULL, 0);
+		VHOST_USER_HDR_SIZE + msg->size, fds, fd_num);
 }
 
 static int
@@ -1220,7 +1454,23 @@ send_vhost_reply(int sockfd, struct VhostUserMsg *msg)
 	msg->flags |= VHOST_USER_VERSION;
 	msg->flags |= VHOST_USER_REPLY_MASK;
 
-	return send_vhost_message(sockfd, msg);
+	return send_vhost_message(sockfd, msg, NULL, 0);
+}
+
+static int
+send_vhost_slave_message(struct virtio_net *dev, struct VhostUserMsg *msg,
+			 int *fds, int fd_num)
+{
+	int ret;
+
+	if (msg->flags & VHOST_USER_NEED_REPLY)
+		rte_spinlock_lock(&dev->slave_req_lock);
+
+	ret = send_vhost_message(dev->slave_req_fd, msg, fds, fd_num);
+	if (ret < 0 && (msg->flags & VHOST_USER_NEED_REPLY))
+		rte_spinlock_unlock(&dev->slave_req_lock);
+
+	return ret;
 }
 
 /*
@@ -1300,8 +1550,11 @@ vhost_user_msg_handler(int vid, int fd)
 {
 	struct virtio_net *dev;
 	struct VhostUserMsg msg;
+	struct rte_vdpa_device *vdpa_dev;
+	int did = -1;
 	int ret;
 	int unlock_required = 0;
+	uint32_t skip_master = 0;
 
 	dev = get_device(vid);
 	if (dev == NULL)
@@ -1379,6 +1632,21 @@ vhost_user_msg_handler(int vid, int fd)
 
 	}
 
+	if (dev->extern_ops.pre_msg_handle) {
+		uint32_t need_reply;
+
+		ret = (*dev->extern_ops.pre_msg_handle)(dev->vid,
+				(void *)&msg, &need_reply, &skip_master);
+		if (ret < 0)
+			goto skip_to_reply;
+
+		if (need_reply)
+			send_vhost_reply(fd, &msg);
+
+		if (skip_master)
+			goto skip_to_post_handle;
+	}
+
 	switch (msg.request.master) {
 	case VHOST_USER_GET_FEATURES:
 		msg.payload.u64 = vhost_user_get_features(dev);
@@ -1407,7 +1675,7 @@ vhost_user_msg_handler(int vid, int fd)
 		break;
 
 	case VHOST_USER_SET_MEM_TABLE:
-		ret = vhost_user_set_mem_table(dev, &msg);
+		ret = vhost_user_set_mem_table(&dev, &msg);
 		break;
 
 	case VHOST_USER_SET_LOG_BASE:
@@ -1452,7 +1720,7 @@ vhost_user_msg_handler(int vid, int fd)
 		break;
 
 	case VHOST_USER_GET_QUEUE_NUM:
-		msg.payload.u64 = VHOST_MAX_QUEUE_PAIRS;
+		msg.payload.u64 = (uint64_t)vhost_user_get_queue_num(dev);
 		msg.size = sizeof(msg.payload.u64);
 		send_vhost_reply(fd, &msg);
 		break;
@@ -1479,9 +1747,22 @@ vhost_user_msg_handler(int vid, int fd)
 	default:
 		ret = -1;
 		break;
+	}
+
+skip_to_post_handle:
+	if (dev->extern_ops.post_msg_handle) {
+		uint32_t need_reply;
 
+		ret = (*dev->extern_ops.post_msg_handle)(
+				dev->vid, (void *)&msg, &need_reply);
+		if (ret < 0)
+			goto skip_to_reply;
+
+		if (need_reply)
+			send_vhost_reply(fd, &msg);
 	}
 
+skip_to_reply:
 	if (unlock_required)
 		vhost_user_unlock_all_queue_pairs(dev);
 
@@ -1505,9 +1786,53 @@ vhost_user_msg_handler(int vid, int fd)
 		}
 	}
 
+	did = dev->vdpa_dev_id;
+	vdpa_dev = rte_vdpa_get_device(did);
+	if (vdpa_dev && virtio_is_ready(dev) &&
+			!(dev->flags & VIRTIO_DEV_VDPA_CONFIGURED) &&
+			msg.request.master == VHOST_USER_SET_VRING_ENABLE) {
+		if (vdpa_dev->ops->dev_conf)
+			vdpa_dev->ops->dev_conf(dev->vid);
+		dev->flags |= VIRTIO_DEV_VDPA_CONFIGURED;
+		if (vhost_user_host_notifier_ctrl(dev->vid, true) != 0) {
+			RTE_LOG(INFO, VHOST_CONFIG,
+				"(%d) software relay is used for vDPA, performance may be low.\n",
+				dev->vid);
+		}
+	}
+
 	return 0;
 }
 
+static int process_slave_message_reply(struct virtio_net *dev,
+				       const VhostUserMsg *msg)
+{
+	VhostUserMsg msg_reply;
+	int ret;
+
+	if ((msg->flags & VHOST_USER_NEED_REPLY) == 0)
+		return 0;
+
+	if (read_vhost_message(dev->slave_req_fd, &msg_reply) < 0) {
+		ret = -1;
+		goto out;
+	}
+
+	if (msg_reply.request.slave != msg->request.slave) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Received unexpected msg type (%u), expected %u\n",
+			msg_reply.request.slave, msg->request.slave);
+		ret = -1;
+		goto out;
+	}
+
+	ret = msg_reply.payload.u64 ? -1 : 0;
+
+out:
+	rte_spinlock_unlock(&dev->slave_req_lock);
+	return ret;
+}
+
 int
 vhost_user_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm)
 {
@@ -1523,7 +1848,7 @@ vhost_user_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm)
 		},
 	};
 
-	ret = send_vhost_message(dev->slave_req_fd, &msg);
+	ret = send_vhost_message(dev->slave_req_fd, &msg, NULL, 0);
 	if (ret < 0) {
 		RTE_LOG(ERR, VHOST_CONFIG,
 				"Failed to send IOTLB miss message (%d)\n",
@@ -1533,3 +1858,101 @@ vhost_user_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm)
 
 	return 0;
 }
+
+static int vhost_user_slave_set_vring_host_notifier(struct virtio_net *dev,
+						    int index, int fd,
+						    uint64_t offset,
+						    uint64_t size)
+{
+	int *fdp = NULL;
+	size_t fd_num = 0;
+	int ret;
+	struct VhostUserMsg msg = {
+		.request.slave = VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG,
+		.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY,
+		.size = sizeof(msg.payload.area),
+		.payload.area = {
+			.u64 = index & VHOST_USER_VRING_IDX_MASK,
+			.size = size,
+			.offset = offset,
+		},
+	};
+
+	if (fd < 0)
+		msg.payload.area.u64 |= VHOST_USER_VRING_NOFD_MASK;
+	else {
+		fdp = &fd;
+		fd_num = 1;
+	}
+
+	ret = send_vhost_slave_message(dev, &msg, fdp, fd_num);
+	if (ret < 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Failed to set host notifier (%d)\n", ret);
+		return ret;
+	}
+
+	return process_slave_message_reply(dev, &msg);
+}
+
+int vhost_user_host_notifier_ctrl(int vid, bool enable)
+{
+	struct virtio_net *dev;
+	struct rte_vdpa_device *vdpa_dev;
+	int vfio_device_fd, did, ret = 0;
+	uint64_t offset, size;
+	unsigned int i;
+
+	dev = get_device(vid);
+	if (!dev)
+		return -ENODEV;
+
+	did = dev->vdpa_dev_id;
+	if (did < 0)
+		return -EINVAL;
+
+	if (!(dev->features & (1ULL << VIRTIO_F_VERSION_1)) ||
+	    !(dev->features & (1ULL << VHOST_USER_F_PROTOCOL_FEATURES)) ||
+	    !(dev->protocol_features &
+			(1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ)) ||
+	    !(dev->protocol_features &
+			(1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD)) ||
+	    !(dev->protocol_features &
+			(1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER)))
+		return -ENOTSUP;
+
+	vdpa_dev = rte_vdpa_get_device(did);
+	if (!vdpa_dev)
+		return -ENODEV;
+
+	RTE_FUNC_PTR_OR_ERR_RET(vdpa_dev->ops->get_vfio_device_fd, -ENOTSUP);
+	RTE_FUNC_PTR_OR_ERR_RET(vdpa_dev->ops->get_notify_area, -ENOTSUP);
+
+	vfio_device_fd = vdpa_dev->ops->get_vfio_device_fd(vid);
+	if (vfio_device_fd < 0)
+		return -ENOTSUP;
+
+	if (enable) {
+		for (i = 0; i < dev->nr_vring; i++) {
+			if (vdpa_dev->ops->get_notify_area(vid, i, &offset,
+					&size) < 0) {
+				ret = -ENOTSUP;
+				goto disable;
+			}
+
+			if (vhost_user_slave_set_vring_host_notifier(dev, i,
+					vfio_device_fd, offset, size) < 0) {
+				ret = -EFAULT;
+				goto disable;
+			}
+		}
+	} else {
+disable:
+		for (i = 0; i < dev->nr_vring; i++) {
+			vhost_user_slave_set_vring_host_notifier(dev, i, -1,
+					0, 0);
+		}
+	}
+
+	return ret;
+}
diff --git a/lib/librte_vhost/vhost_user.h b/lib/librte_vhost/vhost_user.h
index d4bd604b..42166adf 100644
--- a/lib/librte_vhost/vhost_user.h
+++ b/lib/librte_vhost/vhost_user.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2010-2014 Intel Corporation
+ * Copyright(c) 2010-2018 Intel Corporation
  */
 
 #ifndef _VHOST_NET_USER_H
@@ -14,19 +14,15 @@
 
 #define VHOST_MEMORY_MAX_NREGIONS 8
 
-#define VHOST_USER_PROTOCOL_F_MQ	0
-#define VHOST_USER_PROTOCOL_F_LOG_SHMFD	1
-#define VHOST_USER_PROTOCOL_F_RARP	2
-#define VHOST_USER_PROTOCOL_F_REPLY_ACK	3
-#define VHOST_USER_PROTOCOL_F_NET_MTU 4
-#define VHOST_USER_PROTOCOL_F_SLAVE_REQ 5
-
 #define VHOST_USER_PROTOCOL_FEATURES	((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \
 					 (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |\
 					 (1ULL << VHOST_USER_PROTOCOL_F_RARP) | \
 					 (1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK) | \
 					 (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU) | \
-					 (1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ))
+					 (1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \
+					 (1ULL << VHOST_USER_PROTOCOL_F_CRYPTO_SESSION) | \
+					 (1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD) | \
+					 (1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER))
 
 typedef enum VhostUserRequest {
 	VHOST_USER_NONE = 0,
@@ -52,12 +48,15 @@ typedef enum VhostUserRequest {
 	VHOST_USER_NET_SET_MTU = 20,
 	VHOST_USER_SET_SLAVE_REQ_FD = 21,
 	VHOST_USER_IOTLB_MSG = 22,
-	VHOST_USER_MAX
+	VHOST_USER_CRYPTO_CREATE_SESS = 26,
+	VHOST_USER_CRYPTO_CLOSE_SESS = 27,
+	VHOST_USER_MAX = 28
 } VhostUserRequest;
 
 typedef enum VhostUserSlaveRequest {
 	VHOST_USER_SLAVE_NONE = 0,
 	VHOST_USER_SLAVE_IOTLB_MSG = 1,
+	VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
 	VHOST_USER_SLAVE_MAX
 } VhostUserSlaveRequest;
 
@@ -79,10 +78,40 @@ typedef struct VhostUserLog {
 	uint64_t mmap_offset;
 } VhostUserLog;
 
+/* Comply with Cryptodev-Linux */
+#define VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH	512
+#define VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH	64
+
+/* Same structure as vhost-user backend session info */
+typedef struct VhostUserCryptoSessionParam {
+	int64_t session_id;
+	uint32_t op_code;
+	uint32_t cipher_algo;
+	uint32_t cipher_key_len;
+	uint32_t hash_algo;
+	uint32_t digest_len;
+	uint32_t auth_key_len;
+	uint32_t aad_len;
+	uint8_t op_type;
+	uint8_t dir;
+	uint8_t hash_mode;
+	uint8_t chaining_dir;
+	uint8_t *ciphe_key;
+	uint8_t *auth_key;
+	uint8_t cipher_key_buf[VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH];
+	uint8_t auth_key_buf[VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH];
+} VhostUserCryptoSessionParam;
+
+typedef struct VhostUserVringArea {
+	uint64_t u64;
+	uint64_t size;
+	uint64_t offset;
+} VhostUserVringArea;
+
 typedef struct VhostUserMsg {
 	union {
-		VhostUserRequest master;
-		VhostUserSlaveRequest slave;
+		uint32_t master; /* a VhostUserRequest value */
+		uint32_t slave;  /* a VhostUserSlaveRequest value*/
 	} request;
 
 #define VHOST_USER_VERSION_MASK     0x3
@@ -99,6 +128,8 @@ typedef struct VhostUserMsg {
 		VhostUserMemory memory;
 		VhostUserLog    log;
 		struct vhost_iotlb_msg iotlb;
+		VhostUserCryptoSessionParam crypto_session;
+		VhostUserVringArea area;
 	} payload;
 	int fds[VHOST_MEMORY_MAX_NREGIONS];
 } __attribute((packed)) VhostUserMsg;
@@ -112,6 +143,7 @@ typedef struct VhostUserMsg {
 /* vhost_user.c */
 int vhost_user_msg_handler(int vid, int fd);
 int vhost_user_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm);
+int vhost_user_host_notifier_ctrl(int vid, bool enable);
 
 /* socket.c */
 int read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num);
diff --git a/lib/librte_vhost/virtio_crypto.h b/lib/librte_vhost/virtio_crypto.h
new file mode 100644
index 00000000..e3b93573
--- /dev/null
+++ b/lib/librte_vhost/virtio_crypto.h
@@ -0,0 +1,422 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 HUAWEI TECHNOLOGIES CO., LTD.
+ */
+
+#ifndef _VIRTIO_CRYPTO_H
+#define _VIRTIO_CRYPTO_H
+
+#define VIRTIO_CRYPTO_SERVICE_CIPHER 0
+#define VIRTIO_CRYPTO_SERVICE_HASH   1
+#define VIRTIO_CRYPTO_SERVICE_MAC    2
+#define VIRTIO_CRYPTO_SERVICE_AEAD   3
+
+#define VIRTIO_CRYPTO_OPCODE(service, op)   (((service) << 8) | (op))
+
+struct virtio_crypto_ctrl_header {
+#define VIRTIO_CRYPTO_CIPHER_CREATE_SESSION \
+	   VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_CIPHER, 0x02)
+#define VIRTIO_CRYPTO_CIPHER_DESTROY_SESSION \
+	   VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_CIPHER, 0x03)
+#define VIRTIO_CRYPTO_HASH_CREATE_SESSION \
+	   VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_HASH, 0x02)
+#define VIRTIO_CRYPTO_HASH_DESTROY_SESSION \
+	   VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_HASH, 0x03)
+#define VIRTIO_CRYPTO_MAC_CREATE_SESSION \
+	   VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_MAC, 0x02)
+#define VIRTIO_CRYPTO_MAC_DESTROY_SESSION \
+	   VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_MAC, 0x03)
+#define VIRTIO_CRYPTO_AEAD_CREATE_SESSION \
+	   VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x02)
+#define VIRTIO_CRYPTO_AEAD_DESTROY_SESSION \
+	   VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x03)
+	uint32_t opcode;
+	uint32_t algo;
+	uint32_t flag;
+	/* data virtqueue id */
+	uint32_t queue_id;
+};
+
+struct virtio_crypto_cipher_session_para {
+#define VIRTIO_CRYPTO_NO_CIPHER                 0
+#define VIRTIO_CRYPTO_CIPHER_ARC4               1
+#define VIRTIO_CRYPTO_CIPHER_AES_ECB            2
+#define VIRTIO_CRYPTO_CIPHER_AES_CBC            3
+#define VIRTIO_CRYPTO_CIPHER_AES_CTR            4
+#define VIRTIO_CRYPTO_CIPHER_DES_ECB            5
+#define VIRTIO_CRYPTO_CIPHER_DES_CBC            6
+#define VIRTIO_CRYPTO_CIPHER_3DES_ECB           7
+#define VIRTIO_CRYPTO_CIPHER_3DES_CBC           8
+#define VIRTIO_CRYPTO_CIPHER_3DES_CTR           9
+#define VIRTIO_CRYPTO_CIPHER_KASUMI_F8          10
+#define VIRTIO_CRYPTO_CIPHER_SNOW3G_UEA2        11
+#define VIRTIO_CRYPTO_CIPHER_AES_F8             12
+#define VIRTIO_CRYPTO_CIPHER_AES_XTS            13
+#define VIRTIO_CRYPTO_CIPHER_ZUC_EEA3           14
+	uint32_t algo;
+	/* length of key */
+	uint32_t keylen;
+
+#define VIRTIO_CRYPTO_OP_ENCRYPT  1
+#define VIRTIO_CRYPTO_OP_DECRYPT  2
+	/* encrypt or decrypt */
+	uint32_t op;
+	uint32_t padding;
+};
+
+struct virtio_crypto_session_input {
+	/* Device-writable part */
+	uint64_t session_id;
+	uint32_t status;
+	uint32_t padding;
+};
+
+struct virtio_crypto_cipher_session_req {
+	struct virtio_crypto_cipher_session_para para;
+	uint8_t padding[32];
+};
+
+struct virtio_crypto_hash_session_para {
+#define VIRTIO_CRYPTO_NO_HASH            0
+#define VIRTIO_CRYPTO_HASH_MD5           1
+#define VIRTIO_CRYPTO_HASH_SHA1          2
+#define VIRTIO_CRYPTO_HASH_SHA_224       3
+#define VIRTIO_CRYPTO_HASH_SHA_256       4
+#define VIRTIO_CRYPTO_HASH_SHA_384       5
+#define VIRTIO_CRYPTO_HASH_SHA_512       6
+#define VIRTIO_CRYPTO_HASH_SHA3_224      7
+#define VIRTIO_CRYPTO_HASH_SHA3_256      8
+#define VIRTIO_CRYPTO_HASH_SHA3_384      9
+#define VIRTIO_CRYPTO_HASH_SHA3_512      10
+#define VIRTIO_CRYPTO_HASH_SHA3_SHAKE128      11
+#define VIRTIO_CRYPTO_HASH_SHA3_SHAKE256      12
+	uint32_t algo;
+	/* hash result length */
+	uint32_t hash_result_len;
+	uint8_t padding[8];
+};
+
+struct virtio_crypto_hash_create_session_req {
+	struct virtio_crypto_hash_session_para para;
+	uint8_t padding[40];
+};
+
+struct virtio_crypto_mac_session_para {
+#define VIRTIO_CRYPTO_NO_MAC                       0
+#define VIRTIO_CRYPTO_MAC_HMAC_MD5                 1
+#define VIRTIO_CRYPTO_MAC_HMAC_SHA1                2
+#define VIRTIO_CRYPTO_MAC_HMAC_SHA_224             3
+#define VIRTIO_CRYPTO_MAC_HMAC_SHA_256             4
+#define VIRTIO_CRYPTO_MAC_HMAC_SHA_384             5
+#define VIRTIO_CRYPTO_MAC_HMAC_SHA_512             6
+#define VIRTIO_CRYPTO_MAC_CMAC_3DES                25
+#define VIRTIO_CRYPTO_MAC_CMAC_AES                 26
+#define VIRTIO_CRYPTO_MAC_KASUMI_F9                27
+#define VIRTIO_CRYPTO_MAC_SNOW3G_UIA2              28
+#define VIRTIO_CRYPTO_MAC_GMAC_AES                 41
+#define VIRTIO_CRYPTO_MAC_GMAC_TWOFISH             42
+#define VIRTIO_CRYPTO_MAC_CBCMAC_AES               49
+#define VIRTIO_CRYPTO_MAC_CBCMAC_KASUMI_F9         50
+#define VIRTIO_CRYPTO_MAC_XCBC_AES                 53
+	uint32_t algo;
+	/* hash result length */
+	uint32_t hash_result_len;
+	/* length of authenticated key */
+	uint32_t auth_key_len;
+	uint32_t padding;
+};
+
+struct virtio_crypto_mac_create_session_req {
+	struct virtio_crypto_mac_session_para para;
+	uint8_t padding[40];
+};
+
+struct virtio_crypto_aead_session_para {
+#define VIRTIO_CRYPTO_NO_AEAD     0
+#define VIRTIO_CRYPTO_AEAD_GCM    1
+#define VIRTIO_CRYPTO_AEAD_CCM    2
+#define VIRTIO_CRYPTO_AEAD_CHACHA20_POLY1305  3
+	uint32_t algo;
+	/* length of key */
+	uint32_t key_len;
+	/* hash result length */
+	uint32_t hash_result_len;
+	/* length of the additional authenticated data (AAD) in bytes */
+	uint32_t aad_len;
+	/* encrypt or decrypt, See above VIRTIO_CRYPTO_OP_* */
+	uint32_t op;
+	uint32_t padding;
+};
+
+struct virtio_crypto_aead_create_session_req {
+	struct virtio_crypto_aead_session_para para;
+	uint8_t padding[32];
+};
+
+struct virtio_crypto_alg_chain_session_para {
+#define VIRTIO_CRYPTO_SYM_ALG_CHAIN_ORDER_HASH_THEN_CIPHER  1
+#define VIRTIO_CRYPTO_SYM_ALG_CHAIN_ORDER_CIPHER_THEN_HASH  2
+	uint32_t alg_chain_order;
+/* Plain hash */
+#define VIRTIO_CRYPTO_SYM_HASH_MODE_PLAIN    1
+/* Authenticated hash (mac) */
+#define VIRTIO_CRYPTO_SYM_HASH_MODE_AUTH     2
+/* Nested hash */
+#define VIRTIO_CRYPTO_SYM_HASH_MODE_NESTED   3
+	uint32_t hash_mode;
+	struct virtio_crypto_cipher_session_para cipher_param;
+	union {
+		struct virtio_crypto_hash_session_para hash_param;
+		struct virtio_crypto_mac_session_para mac_param;
+		uint8_t padding[16];
+	} u;
+	/* length of the additional authenticated data (AAD) in bytes */
+	uint32_t aad_len;
+	uint32_t padding;
+};
+
+struct virtio_crypto_alg_chain_session_req {
+	struct virtio_crypto_alg_chain_session_para para;
+};
+
+struct virtio_crypto_sym_create_session_req {
+	union {
+		struct virtio_crypto_cipher_session_req cipher;
+		struct virtio_crypto_alg_chain_session_req chain;
+		uint8_t padding[48];
+	} u;
+
+	/* Device-readable part */
+
+/* No operation */
+#define VIRTIO_CRYPTO_SYM_OP_NONE  0
+/* Cipher only operation on the data */
+#define VIRTIO_CRYPTO_SYM_OP_CIPHER  1
+/*
+ * Chain any cipher with any hash or mac operation. The order
+ * depends on the value of alg_chain_order param
+ */
+#define VIRTIO_CRYPTO_SYM_OP_ALGORITHM_CHAINING  2
+	uint32_t op_type;
+	uint32_t padding;
+};
+
+struct virtio_crypto_destroy_session_req {
+	/* Device-readable part */
+	uint64_t  session_id;
+	uint8_t padding[48];
+};
+
+/* The request of the control virtqueue's packet */
+struct virtio_crypto_op_ctrl_req {
+	struct virtio_crypto_ctrl_header header;
+
+	union {
+		struct virtio_crypto_sym_create_session_req
+			sym_create_session;
+		struct virtio_crypto_hash_create_session_req
+			hash_create_session;
+		struct virtio_crypto_mac_create_session_req
+			mac_create_session;
+		struct virtio_crypto_aead_create_session_req
+			aead_create_session;
+		struct virtio_crypto_destroy_session_req
+			destroy_session;
+		uint8_t padding[56];
+	} u;
+};
+
+struct virtio_crypto_op_header {
+#define VIRTIO_CRYPTO_CIPHER_ENCRYPT \
+	VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_CIPHER, 0x00)
+#define VIRTIO_CRYPTO_CIPHER_DECRYPT \
+	VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_CIPHER, 0x01)
+#define VIRTIO_CRYPTO_HASH \
+	VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_HASH, 0x00)
+#define VIRTIO_CRYPTO_MAC \
+	VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_MAC, 0x00)
+#define VIRTIO_CRYPTO_AEAD_ENCRYPT \
+	VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x00)
+#define VIRTIO_CRYPTO_AEAD_DECRYPT \
+	VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x01)
+	uint32_t opcode;
+	/* algo should be service-specific algorithms */
+	uint32_t algo;
+	/* session_id should be service-specific algorithms */
+	uint64_t session_id;
+	/* control flag to control the request */
+	uint32_t flag;
+	uint32_t padding;
+};
+
+struct virtio_crypto_cipher_para {
+	/*
+	 * Byte Length of valid IV/Counter
+	 *
+	 * For block ciphers in CBC or F8 mode, or for Kasumi in F8 mode, or for
+	 *   SNOW3G in UEA2 mode, this is the length of the IV (which
+	 *   must be the same as the block length of the cipher).
+	 * For block ciphers in CTR mode, this is the length of the counter
+	 *   (which must be the same as the block length of the cipher).
+	 * For AES-XTS, this is the 128bit tweak, i, from IEEE Std 1619-2007.
+	 *
+	 * The IV/Counter will be updated after every partial cryptographic
+	 * operation.
+	 */
+	uint32_t iv_len;
+	/* length of source data */
+	uint32_t src_data_len;
+	/* length of dst data */
+	uint32_t dst_data_len;
+	uint32_t padding;
+};
+
+struct virtio_crypto_hash_para {
+	/* length of source data */
+	uint32_t src_data_len;
+	/* hash result length */
+	uint32_t hash_result_len;
+};
+
+struct virtio_crypto_mac_para {
+	struct virtio_crypto_hash_para hash;
+};
+
+struct virtio_crypto_aead_para {
+	/*
+	 * Byte Length of valid IV data pointed to by the below iv_addr
+	 * parameter.
+	 *
+	 * For GCM mode, this is either 12 (for 96-bit IVs) or 16, in which
+	 *   case iv_addr points to J0.
+	 * For CCM mode, this is the length of the nonce, which can be in the
+	 *   range 7 to 13 inclusive.
+	 */
+	uint32_t iv_len;
+	/* length of additional auth data */
+	uint32_t aad_len;
+	/* length of source data */
+	uint32_t src_data_len;
+	/* length of dst data */
+	uint32_t dst_data_len;
+};
+
+struct virtio_crypto_cipher_data_req {
+	/* Device-readable part */
+	struct virtio_crypto_cipher_para para;
+	uint8_t padding[24];
+};
+
+struct virtio_crypto_hash_data_req {
+	/* Device-readable part */
+	struct virtio_crypto_hash_para para;
+	uint8_t padding[40];
+};
+
+struct virtio_crypto_mac_data_req {
+	/* Device-readable part */
+	struct virtio_crypto_mac_para para;
+	uint8_t padding[40];
+};
+
+struct virtio_crypto_alg_chain_data_para {
+	uint32_t iv_len;
+	/* Length of source data */
+	uint32_t src_data_len;
+	/* Length of destination data */
+	uint32_t dst_data_len;
+	/* Starting point for cipher processing in source data */
+	uint32_t cipher_start_src_offset;
+	/* Length of the source data that the cipher will be computed on */
+	uint32_t len_to_cipher;
+	/* Starting point for hash processing in source data */
+	uint32_t hash_start_src_offset;
+	/* Length of the source data that the hash will be computed on */
+	uint32_t len_to_hash;
+	/* Length of the additional auth data */
+	uint32_t aad_len;
+	/* Length of the hash result */
+	uint32_t hash_result_len;
+	uint32_t reserved;
+};
+
+struct virtio_crypto_alg_chain_data_req {
+	/* Device-readable part */
+	struct virtio_crypto_alg_chain_data_para para;
+};
+
+struct virtio_crypto_sym_data_req {
+	union {
+		struct virtio_crypto_cipher_data_req cipher;
+		struct virtio_crypto_alg_chain_data_req chain;
+		uint8_t padding[40];
+	} u;
+
+	/* See above VIRTIO_CRYPTO_SYM_OP_* */
+	uint32_t op_type;
+	uint32_t padding;
+};
+
+struct virtio_crypto_aead_data_req {
+	/* Device-readable part */
+	struct virtio_crypto_aead_para para;
+	uint8_t padding[32];
+};
+
+/* The request of the data virtqueue's packet */
+struct virtio_crypto_op_data_req {
+	struct virtio_crypto_op_header header;
+
+	union {
+		struct virtio_crypto_sym_data_req  sym_req;
+		struct virtio_crypto_hash_data_req hash_req;
+		struct virtio_crypto_mac_data_req mac_req;
+		struct virtio_crypto_aead_data_req aead_req;
+		uint8_t padding[48];
+	} u;
+};
+
+#define VIRTIO_CRYPTO_OK        0
+#define VIRTIO_CRYPTO_ERR       1
+#define VIRTIO_CRYPTO_BADMSG    2
+#define VIRTIO_CRYPTO_NOTSUPP   3
+#define VIRTIO_CRYPTO_INVSESS   4 /* Invalid session id */
+
+/* The accelerator hardware is ready */
+#define VIRTIO_CRYPTO_S_HW_READY  (1 << 0)
+
+struct virtio_crypto_config {
+	/* See VIRTIO_CRYPTO_OP_* above */
+	uint32_t  status;
+
+	/*
+	 * Maximum number of data queue
+	 */
+	uint32_t  max_dataqueues;
+
+	/*
+	 * Specifies the services mask which the device support,
+	 * see VIRTIO_CRYPTO_SERVICE_* above
+	 */
+	uint32_t crypto_services;
+
+	/* Detailed algorithms mask */
+	uint32_t cipher_algo_l;
+	uint32_t cipher_algo_h;
+	uint32_t hash_algo;
+	uint32_t mac_algo_l;
+	uint32_t mac_algo_h;
+	uint32_t aead_algo;
+	/* Maximum length of cipher key */
+	uint32_t max_cipher_key_len;
+	/* Maximum length of authenticated key */
+	uint32_t max_auth_key_len;
+	uint32_t reserve;
+	/* Maximum size of each crypto request's content */
+	uint64_t max_size;
+};
+
+struct virtio_crypto_inhdr {
+	/* See VIRTIO_CRYPTO_* above */
+	uint8_t status;
+};
+#endif /* _VIRTIO_CRYPTO_H */
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 700aca7c..99c7afc8 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -16,6 +16,7 @@
 #include <rte_sctp.h>
 #include <rte_arp.h>
 #include <rte_spinlock.h>
+#include <rte_malloc.h>
 
 #include "iotlb.h"
 #include "vhost.h"
@@ -24,60 +25,174 @@
 
 #define MAX_BATCH_LEN 256
 
+static  __rte_always_inline bool
+rxvq_is_mergeable(struct virtio_net *dev)
+{
+	return dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF);
+}
+
 static bool
 is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t nr_vring)
 {
 	return (is_tx ^ (idx & 1)) == 0 && idx < nr_vring;
 }
 
+static __rte_always_inline void *
+alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
+		uint64_t desc_addr, uint64_t desc_len)
+{
+	void *idesc;
+	uint64_t src, dst;
+	uint64_t len, remain = desc_len;
+
+	idesc = rte_malloc(__func__, desc_len, 0);
+	if (unlikely(!idesc))
+		return 0;
+
+	dst = (uint64_t)(uintptr_t)idesc;
+
+	while (remain) {
+		len = remain;
+		src = vhost_iova_to_vva(dev, vq, desc_addr, &len,
+				VHOST_ACCESS_RO);
+		if (unlikely(!src || !len)) {
+			rte_free(idesc);
+			return 0;
+		}
+
+		rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, len);
+
+		remain -= len;
+		dst += len;
+		desc_addr += len;
+	}
+
+	return idesc;
+}
+
 static __rte_always_inline void
-do_flush_shadow_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq,
-			  uint16_t to, uint16_t from, uint16_t size)
+free_ind_table(void *idesc)
+{
+	rte_free(idesc);
+}
+
+static __rte_always_inline void
+do_flush_shadow_used_ring_split(struct virtio_net *dev,
+			struct vhost_virtqueue *vq,
+			uint16_t to, uint16_t from, uint16_t size)
 {
 	rte_memcpy(&vq->used->ring[to],
-			&vq->shadow_used_ring[from],
+			&vq->shadow_used_split[from],
 			size * sizeof(struct vring_used_elem));
-	vhost_log_used_vring(dev, vq,
+	vhost_log_cache_used_vring(dev, vq,
 			offsetof(struct vring_used, ring[to]),
 			size * sizeof(struct vring_used_elem));
 }
 
 static __rte_always_inline void
-flush_shadow_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq)
+flush_shadow_used_ring_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
 {
 	uint16_t used_idx = vq->last_used_idx & (vq->size - 1);
 
 	if (used_idx + vq->shadow_used_idx <= vq->size) {
-		do_flush_shadow_used_ring(dev, vq, used_idx, 0,
+		do_flush_shadow_used_ring_split(dev, vq, used_idx, 0,
 					  vq->shadow_used_idx);
 	} else {
 		uint16_t size;
 
 		/* update used ring interval [used_idx, vq->size] */
 		size = vq->size - used_idx;
-		do_flush_shadow_used_ring(dev, vq, used_idx, 0, size);
+		do_flush_shadow_used_ring_split(dev, vq, used_idx, 0, size);
 
 		/* update the left half used ring interval [0, left_size] */
-		do_flush_shadow_used_ring(dev, vq, 0, size,
+		do_flush_shadow_used_ring_split(dev, vq, 0, size,
 					  vq->shadow_used_idx - size);
 	}
 	vq->last_used_idx += vq->shadow_used_idx;
 
 	rte_smp_wmb();
 
+	vhost_log_cache_sync(dev, vq);
+
 	*(volatile uint16_t *)&vq->used->idx += vq->shadow_used_idx;
+	vq->shadow_used_idx = 0;
 	vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx),
 		sizeof(vq->used->idx));
 }
 
 static __rte_always_inline void
-update_shadow_used_ring(struct vhost_virtqueue *vq,
+update_shadow_used_ring_split(struct vhost_virtqueue *vq,
 			 uint16_t desc_idx, uint16_t len)
 {
 	uint16_t i = vq->shadow_used_idx++;
 
-	vq->shadow_used_ring[i].id  = desc_idx;
-	vq->shadow_used_ring[i].len = len;
+	vq->shadow_used_split[i].id  = desc_idx;
+	vq->shadow_used_split[i].len = len;
+}
+
+static __rte_always_inline void
+flush_shadow_used_ring_packed(struct virtio_net *dev,
+			struct vhost_virtqueue *vq)
+{
+	int i;
+	uint16_t used_idx = vq->last_used_idx;
+
+	/* Split loop in two to save memory barriers */
+	for (i = 0; i < vq->shadow_used_idx; i++) {
+		vq->desc_packed[used_idx].id = vq->shadow_used_packed[i].id;
+		vq->desc_packed[used_idx].len = vq->shadow_used_packed[i].len;
+
+		used_idx += vq->shadow_used_packed[i].count;
+		if (used_idx >= vq->size)
+			used_idx -= vq->size;
+	}
+
+	rte_smp_wmb();
+
+	for (i = 0; i < vq->shadow_used_idx; i++) {
+		uint16_t flags;
+
+		if (vq->shadow_used_packed[i].len)
+			flags = VRING_DESC_F_WRITE;
+		else
+			flags = 0;
+
+		if (vq->used_wrap_counter) {
+			flags |= VRING_DESC_F_USED;
+			flags |= VRING_DESC_F_AVAIL;
+		} else {
+			flags &= ~VRING_DESC_F_USED;
+			flags &= ~VRING_DESC_F_AVAIL;
+		}
+
+		vq->desc_packed[vq->last_used_idx].flags = flags;
+
+		vhost_log_cache_used_vring(dev, vq,
+					vq->last_used_idx *
+					sizeof(struct vring_packed_desc),
+					sizeof(struct vring_packed_desc));
+
+		vq->last_used_idx += vq->shadow_used_packed[i].count;
+		if (vq->last_used_idx >= vq->size) {
+			vq->used_wrap_counter ^= 1;
+			vq->last_used_idx -= vq->size;
+		}
+	}
+
+	rte_smp_wmb();
+	vq->shadow_used_idx = 0;
+	vhost_log_cache_sync(dev, vq);
+}
+
+static __rte_always_inline void
+update_shadow_used_ring_packed(struct vhost_virtqueue *vq,
+			 uint16_t desc_idx, uint16_t len, uint16_t count)
+{
+	uint16_t i = vq->shadow_used_idx++;
+
+	vq->shadow_used_packed[i].id  = desc_idx;
+	vq->shadow_used_packed[i].len = len;
+	vq->shadow_used_packed[i].count = count;
 }
 
 static inline void
@@ -89,9 +204,11 @@ do_data_copy_enqueue(struct virtio_net *dev, struct vhost_virtqueue *vq)
 
 	for (i = 0; i < count; i++) {
 		rte_memcpy(elem[i].dst, elem[i].src, elem[i].len);
-		vhost_log_write(dev, elem[i].log_addr, elem[i].len);
+		vhost_log_cache_write(dev, vq, elem[i].log_addr, elem[i].len);
 		PRINT_PACKET(dev, (uintptr_t)elem[i].dst, elem[i].len, 0);
 	}
+
+	vq->batch_copy_nb_elems = 0;
 }
 
 static inline void
@@ -103,6 +220,8 @@ do_data_copy_dequeue(struct vhost_virtqueue *vq)
 
 	for (i = 0; i < count; i++)
 		rte_memcpy(elem[i].dst, elem[i].src, elem[i].len);
+
+	vq->batch_copy_nb_elems = 0;
 }
 
 /* avoid write operation when necessary, to lessen cache issues */
@@ -111,7 +230,7 @@ do_data_copy_dequeue(struct vhost_virtqueue *vq)
 		(var) = (val);			\
 } while (0)
 
-static void
+static __rte_always_inline void
 virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr)
 {
 	uint64_t csum_l4 = m_buf->ol_flags & PKT_TX_L4_MASK;
@@ -173,273 +292,268 @@ virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr)
 }
 
 static __rte_always_inline int
-copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
-		  struct vring_desc *descs, struct rte_mbuf *m,
-		  uint16_t desc_idx, uint32_t size)
+map_one_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
+		struct buf_vector *buf_vec, uint16_t *vec_idx,
+		uint64_t desc_iova, uint64_t desc_len, uint8_t perm)
 {
-	uint32_t desc_avail, desc_offset;
-	uint32_t mbuf_avail, mbuf_offset;
-	uint32_t cpy_len;
-	struct vring_desc *desc;
-	uint64_t desc_addr;
-	/* A counter to avoid desc dead loop chain */
-	uint16_t nr_desc = 1;
-	struct batch_copy_elem *batch_copy = vq->batch_copy_elems;
-	uint16_t copy_nb = vq->batch_copy_nb_elems;
-	int error = 0;
+	uint16_t vec_id = *vec_idx;
 
-	desc = &descs[desc_idx];
-	desc_addr = vhost_iova_to_vva(dev, vq, desc->addr,
-					desc->len, VHOST_ACCESS_RW);
-	/*
-	 * Checking of 'desc_addr' placed outside of 'unlikely' macro to avoid
-	 * performance issue with some versions of gcc (4.8.4 and 5.3.0) which
-	 * otherwise stores offset on the stack instead of in a register.
-	 */
-	if (unlikely(desc->len < dev->vhost_hlen) || !desc_addr) {
-		error = -1;
-		goto out;
+	while (desc_len) {
+		uint64_t desc_addr;
+		uint64_t desc_chunck_len = desc_len;
+
+		if (unlikely(vec_id >= BUF_VECTOR_MAX))
+			return -1;
+
+		desc_addr = vhost_iova_to_vva(dev, vq,
+				desc_iova,
+				&desc_chunck_len,
+				perm);
+		if (unlikely(!desc_addr))
+			return -1;
+
+		buf_vec[vec_id].buf_iova = desc_iova;
+		buf_vec[vec_id].buf_addr = desc_addr;
+		buf_vec[vec_id].buf_len  = desc_chunck_len;
+
+		desc_len -= desc_chunck_len;
+		desc_iova += desc_chunck_len;
+		vec_id++;
 	}
+	*vec_idx = vec_id;
 
-	rte_prefetch0((void *)(uintptr_t)desc_addr);
+	return 0;
+}
 
-	virtio_enqueue_offload(m, (struct virtio_net_hdr *)(uintptr_t)desc_addr);
-	vhost_log_write(dev, desc->addr, dev->vhost_hlen);
-	PRINT_PACKET(dev, (uintptr_t)desc_addr, dev->vhost_hlen, 0);
+static __rte_always_inline int
+fill_vec_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
+			 uint32_t avail_idx, uint16_t *vec_idx,
+			 struct buf_vector *buf_vec, uint16_t *desc_chain_head,
+			 uint16_t *desc_chain_len, uint8_t perm)
+{
+	uint16_t idx = vq->avail->ring[avail_idx & (vq->size - 1)];
+	uint16_t vec_id = *vec_idx;
+	uint32_t len    = 0;
+	uint64_t dlen;
+	struct vring_desc *descs = vq->desc;
+	struct vring_desc *idesc = NULL;
 
-	desc_offset = dev->vhost_hlen;
-	desc_avail  = desc->len - dev->vhost_hlen;
+	*desc_chain_head = idx;
 
-	mbuf_avail  = rte_pktmbuf_data_len(m);
-	mbuf_offset = 0;
-	while (mbuf_avail != 0 || m->next != NULL) {
-		/* done with current mbuf, fetch next */
-		if (mbuf_avail == 0) {
-			m = m->next;
+	if (vq->desc[idx].flags & VRING_DESC_F_INDIRECT) {
+		dlen = vq->desc[idx].len;
+		descs = (struct vring_desc *)(uintptr_t)
+			vhost_iova_to_vva(dev, vq, vq->desc[idx].addr,
+						&dlen,
+						VHOST_ACCESS_RO);
+		if (unlikely(!descs))
+			return -1;
 
-			mbuf_offset = 0;
-			mbuf_avail  = rte_pktmbuf_data_len(m);
-		}
+		if (unlikely(dlen < vq->desc[idx].len)) {
+			/*
+			 * The indirect desc table is not contiguous
+			 * in process VA space, we have to copy it.
+			 */
+			idesc = alloc_copy_ind_table(dev, vq,
+					vq->desc[idx].addr, vq->desc[idx].len);
+			if (unlikely(!idesc))
+				return -1;
 
-		/* done with current desc buf, fetch next */
-		if (desc_avail == 0) {
-			if ((desc->flags & VRING_DESC_F_NEXT) == 0) {
-				/* Room in vring buffer is not enough */
-				error = -1;
-				goto out;
-			}
-			if (unlikely(desc->next >= size || ++nr_desc > size)) {
-				error = -1;
-				goto out;
-			}
+			descs = idesc;
+		}
 
-			desc = &descs[desc->next];
-			desc_addr = vhost_iova_to_vva(dev, vq, desc->addr,
-							desc->len,
-							VHOST_ACCESS_RW);
-			if (unlikely(!desc_addr)) {
-				error = -1;
-				goto out;
-			}
+		idx = 0;
+	}
 
-			desc_offset = 0;
-			desc_avail  = desc->len;
+	while (1) {
+		if (unlikely(idx >= vq->size)) {
+			free_ind_table(idesc);
+			return -1;
 		}
 
-		cpy_len = RTE_MIN(desc_avail, mbuf_avail);
-		if (likely(cpy_len > MAX_BATCH_LEN || copy_nb >= vq->size)) {
-			rte_memcpy((void *)((uintptr_t)(desc_addr +
-							desc_offset)),
-				rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
-				cpy_len);
-			vhost_log_write(dev, desc->addr + desc_offset, cpy_len);
-			PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset),
-				     cpy_len, 0);
-		} else {
-			batch_copy[copy_nb].dst =
-				(void *)((uintptr_t)(desc_addr + desc_offset));
-			batch_copy[copy_nb].src =
-				rte_pktmbuf_mtod_offset(m, void *, mbuf_offset);
-			batch_copy[copy_nb].log_addr = desc->addr + desc_offset;
-			batch_copy[copy_nb].len = cpy_len;
-			copy_nb++;
+		len += descs[idx].len;
+
+		if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id,
+						descs[idx].addr, descs[idx].len,
+						perm))) {
+			free_ind_table(idesc);
+			return -1;
 		}
 
-		mbuf_avail  -= cpy_len;
-		mbuf_offset += cpy_len;
-		desc_avail  -= cpy_len;
-		desc_offset += cpy_len;
+		if ((descs[idx].flags & VRING_DESC_F_NEXT) == 0)
+			break;
+
+		idx = descs[idx].next;
 	}
 
-out:
-	vq->batch_copy_nb_elems = copy_nb;
+	*desc_chain_len = len;
+	*vec_idx = vec_id;
 
-	return error;
+	if (unlikely(!!idesc))
+		free_ind_table(idesc);
+
+	return 0;
 }
 
-/**
- * This function adds buffers to the virtio devices RX virtqueue. Buffers can
- * be received from the physical port or from another virtio device. A packet
- * count is returned to indicate the number of packets that are successfully
- * added to the RX queue. This function works when the mbuf is scattered, but
- * it doesn't support the mergeable feature.
+/*
+ * Returns -1 on fail, 0 on success
  */
-static __rte_always_inline uint32_t
-virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
-	      struct rte_mbuf **pkts, uint32_t count)
+static inline int
+reserve_avail_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
+				uint32_t size, struct buf_vector *buf_vec,
+				uint16_t *num_buffers, uint16_t avail_head,
+				uint16_t *nr_vec)
 {
-	struct vhost_virtqueue *vq;
-	uint16_t avail_idx, free_entries, start_idx;
-	uint16_t desc_indexes[MAX_PKT_BURST];
-	struct vring_desc *descs;
-	uint16_t used_idx;
-	uint32_t i, sz;
+	uint16_t cur_idx;
+	uint16_t vec_idx = 0;
+	uint16_t max_tries, tries = 0;
 
-	LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
-	if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
-		RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n",
-			dev->vid, __func__, queue_id);
-		return 0;
-	}
+	uint16_t head_idx = 0;
+	uint16_t len = 0;
 
-	vq = dev->virtqueue[queue_id];
+	*num_buffers = 0;
+	cur_idx  = vq->last_avail_idx;
 
-	rte_spinlock_lock(&vq->access_lock);
+	if (rxvq_is_mergeable(dev))
+		max_tries = vq->size - 1;
+	else
+		max_tries = 1;
 
-	if (unlikely(vq->enabled == 0))
-		goto out_access_unlock;
+	while (size > 0) {
+		if (unlikely(cur_idx == avail_head))
+			return -1;
+		/*
+		 * if we tried all available ring items, and still
+		 * can't get enough buf, it means something abnormal
+		 * happened.
+		 */
+		if (unlikely(++tries > max_tries))
+			return -1;
 
-	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
-		vhost_user_iotlb_rd_lock(vq);
+		if (unlikely(fill_vec_buf_split(dev, vq, cur_idx,
+						&vec_idx, buf_vec,
+						&head_idx, &len,
+						VHOST_ACCESS_RW) < 0))
+			return -1;
+		len = RTE_MIN(len, size);
+		update_shadow_used_ring_split(vq, head_idx, len);
+		size -= len;
 
-	if (unlikely(vq->access_ok == 0)) {
-		if (unlikely(vring_translate(dev, vq) < 0)) {
-			count = 0;
-			goto out;
-		}
+		cur_idx++;
+		*num_buffers += 1;
 	}
 
-	avail_idx = *((volatile uint16_t *)&vq->avail->idx);
-	start_idx = vq->last_used_idx;
-	free_entries = avail_idx - start_idx;
-	count = RTE_MIN(count, free_entries);
-	count = RTE_MIN(count, (uint32_t)MAX_PKT_BURST);
-	if (count == 0)
-		goto out;
+	*nr_vec = vec_idx;
 
-	LOG_DEBUG(VHOST_DATA, "(%d) start_idx %d | end_idx %d\n",
-		dev->vid, start_idx, start_idx + count);
+	return 0;
+}
 
-	vq->batch_copy_nb_elems = 0;
+static __rte_always_inline int
+fill_vec_buf_packed_indirect(struct virtio_net *dev,
+			struct vhost_virtqueue *vq,
+			struct vring_packed_desc *desc, uint16_t *vec_idx,
+			struct buf_vector *buf_vec, uint16_t *len, uint8_t perm)
+{
+	uint16_t i;
+	uint32_t nr_descs;
+	uint16_t vec_id = *vec_idx;
+	uint64_t dlen;
+	struct vring_packed_desc *descs, *idescs = NULL;
+
+	dlen = desc->len;
+	descs = (struct vring_packed_desc *)(uintptr_t)
+		vhost_iova_to_vva(dev, vq, desc->addr, &dlen, VHOST_ACCESS_RO);
+	if (unlikely(!descs))
+		return -1;
+
+	if (unlikely(dlen < desc->len)) {
+		/*
+		 * The indirect desc table is not contiguous
+		 * in process VA space, we have to copy it.
+		 */
+		idescs = alloc_copy_ind_table(dev, vq, desc->addr, desc->len);
+		if (unlikely(!idescs))
+			return -1;
 
-	/* Retrieve all of the desc indexes first to avoid caching issues. */
-	rte_prefetch0(&vq->avail->ring[start_idx & (vq->size - 1)]);
-	for (i = 0; i < count; i++) {
-		used_idx = (start_idx + i) & (vq->size - 1);
-		desc_indexes[i] = vq->avail->ring[used_idx];
-		vq->used->ring[used_idx].id = desc_indexes[i];
-		vq->used->ring[used_idx].len = pkts[i]->pkt_len +
-					       dev->vhost_hlen;
-		vhost_log_used_vring(dev, vq,
-			offsetof(struct vring_used, ring[used_idx]),
-			sizeof(vq->used->ring[used_idx]));
+		descs = idescs;
 	}
 
-	rte_prefetch0(&vq->desc[desc_indexes[0]]);
-	for (i = 0; i < count; i++) {
-		uint16_t desc_idx = desc_indexes[i];
-		int err;
-
-		if (vq->desc[desc_idx].flags & VRING_DESC_F_INDIRECT) {
-			descs = (struct vring_desc *)(uintptr_t)
-				vhost_iova_to_vva(dev,
-						vq, vq->desc[desc_idx].addr,
-						vq->desc[desc_idx].len,
-						VHOST_ACCESS_RO);
-			if (unlikely(!descs)) {
-				count = i;
-				break;
-			}
-
-			desc_idx = 0;
-			sz = vq->desc[desc_idx].len / sizeof(*descs);
-		} else {
-			descs = vq->desc;
-			sz = vq->size;
-		}
+	nr_descs =  desc->len / sizeof(struct vring_packed_desc);
+	if (unlikely(nr_descs >= vq->size)) {
+		free_ind_table(idescs);
+		return -1;
+	}
 
-		err = copy_mbuf_to_desc(dev, vq, descs, pkts[i], desc_idx, sz);
-		if (unlikely(err)) {
-			count = i;
-			break;
+	for (i = 0; i < nr_descs; i++) {
+		if (unlikely(vec_id >= BUF_VECTOR_MAX)) {
+			free_ind_table(idescs);
+			return -1;
 		}
 
-		if (i + 1 < count)
-			rte_prefetch0(&vq->desc[desc_indexes[i+1]]);
+		*len += descs[i].len;
+		if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id,
+						descs[i].addr, descs[i].len,
+						perm)))
+			return -1;
 	}
+	*vec_idx = vec_id;
 
-	do_data_copy_enqueue(dev, vq);
-
-	rte_smp_wmb();
-
-	*(volatile uint16_t *)&vq->used->idx += count;
-	vq->last_used_idx += count;
-	vhost_log_used_vring(dev, vq,
-		offsetof(struct vring_used, idx),
-		sizeof(vq->used->idx));
-
-	vhost_vring_call(dev, vq);
-out:
-	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
-		vhost_user_iotlb_rd_unlock(vq);
-
-out_access_unlock:
-	rte_spinlock_unlock(&vq->access_lock);
+	if (unlikely(!!idescs))
+		free_ind_table(idescs);
 
-	return count;
+	return 0;
 }
 
 static __rte_always_inline int
-fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
-			 uint32_t avail_idx, uint32_t *vec_idx,
-			 struct buf_vector *buf_vec, uint16_t *desc_chain_head,
-			 uint16_t *desc_chain_len)
+fill_vec_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
+				uint16_t avail_idx, uint16_t *desc_count,
+				struct buf_vector *buf_vec, uint16_t *vec_idx,
+				uint16_t *buf_id, uint16_t *len, uint8_t perm)
 {
-	uint16_t idx = vq->avail->ring[avail_idx & (vq->size - 1)];
-	uint32_t vec_id = *vec_idx;
-	uint32_t len    = 0;
-	struct vring_desc *descs = vq->desc;
+	bool wrap_counter = vq->avail_wrap_counter;
+	struct vring_packed_desc *descs = vq->desc_packed;
+	uint16_t vec_id = *vec_idx;
 
-	*desc_chain_head = idx;
+	if (avail_idx < vq->last_avail_idx)
+		wrap_counter ^= 1;
 
-	if (vq->desc[idx].flags & VRING_DESC_F_INDIRECT) {
-		descs = (struct vring_desc *)(uintptr_t)
-			vhost_iova_to_vva(dev, vq, vq->desc[idx].addr,
-						vq->desc[idx].len,
-						VHOST_ACCESS_RO);
-		if (unlikely(!descs))
-			return -1;
+	if (unlikely(!desc_is_avail(&descs[avail_idx], wrap_counter)))
+		return -1;
 
-		idx = 0;
-	}
+	*desc_count = 0;
 
 	while (1) {
-		if (unlikely(vec_id >= BUF_VECTOR_MAX || idx >= vq->size))
+		if (unlikely(vec_id >= BUF_VECTOR_MAX))
 			return -1;
 
-		len += descs[idx].len;
-		buf_vec[vec_id].buf_addr = descs[idx].addr;
-		buf_vec[vec_id].buf_len  = descs[idx].len;
-		buf_vec[vec_id].desc_idx = idx;
-		vec_id++;
+		*desc_count += 1;
+		*buf_id = descs[avail_idx].id;
 
-		if ((descs[idx].flags & VRING_DESC_F_NEXT) == 0)
+		if (descs[avail_idx].flags & VRING_DESC_F_INDIRECT) {
+			if (unlikely(fill_vec_buf_packed_indirect(dev, vq,
+							&descs[avail_idx],
+							&vec_id, buf_vec,
+							len, perm) < 0))
+				return -1;
+		} else {
+			*len += descs[avail_idx].len;
+
+			if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id,
+							descs[avail_idx].addr,
+							descs[avail_idx].len,
+							perm)))
+				return -1;
+		}
+
+		if ((descs[avail_idx].flags & VRING_DESC_F_NEXT) == 0)
 			break;
 
-		idx = descs[idx].next;
+		if (++avail_idx >= vq->size) {
+			avail_idx -= vq->size;
+			wrap_counter ^= 1;
+		}
 	}
 
-	*desc_chain_len = len;
 	*vec_idx = vec_id;
 
 	return 0;
@@ -449,61 +563,74 @@ fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
  * Returns -1 on fail, 0 on success
  */
 static inline int
-reserve_avail_buf_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
+reserve_avail_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
 				uint32_t size, struct buf_vector *buf_vec,
-				uint16_t *num_buffers, uint16_t avail_head)
+				uint16_t *nr_vec, uint16_t *num_buffers,
+				uint16_t *nr_descs)
 {
-	uint16_t cur_idx;
-	uint32_t vec_idx = 0;
-	uint16_t tries = 0;
+	uint16_t avail_idx;
+	uint16_t vec_idx = 0;
+	uint16_t max_tries, tries = 0;
 
-	uint16_t head_idx = 0;
+	uint16_t buf_id = 0;
 	uint16_t len = 0;
+	uint16_t desc_count;
 
 	*num_buffers = 0;
-	cur_idx  = vq->last_avail_idx;
+	avail_idx = vq->last_avail_idx;
+
+	if (rxvq_is_mergeable(dev))
+		max_tries = vq->size - 1;
+	else
+		max_tries = 1;
 
 	while (size > 0) {
-		if (unlikely(cur_idx == avail_head))
+		/*
+		 * if we tried all available ring items, and still
+		 * can't get enough buf, it means something abnormal
+		 * happened.
+		 */
+		if (unlikely(++tries > max_tries))
 			return -1;
 
-		if (unlikely(fill_vec_buf(dev, vq, cur_idx, &vec_idx, buf_vec,
-						&head_idx, &len) < 0))
+		if (unlikely(fill_vec_buf_packed(dev, vq,
+						avail_idx, &desc_count,
+						buf_vec, &vec_idx,
+						&buf_id, &len,
+						VHOST_ACCESS_RO) < 0))
 			return -1;
+
 		len = RTE_MIN(len, size);
-		update_shadow_used_ring(vq, head_idx, len);
+		update_shadow_used_ring_packed(vq, buf_id, len, desc_count);
 		size -= len;
 
-		cur_idx++;
-		tries++;
-		*num_buffers += 1;
+		avail_idx += desc_count;
+		if (avail_idx >= vq->size)
+			avail_idx -= vq->size;
 
-		/*
-		 * if we tried all available ring items, and still
-		 * can't get enough buf, it means something abnormal
-		 * happened.
-		 */
-		if (unlikely(tries >= vq->size))
-			return -1;
+		*nr_descs += desc_count;
+		*num_buffers += 1;
 	}
 
+	*nr_vec = vec_idx;
+
 	return 0;
 }
 
 static __rte_always_inline int
-copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
+copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
 			    struct rte_mbuf *m, struct buf_vector *buf_vec,
-			    uint16_t num_buffers)
+			    uint16_t nr_vec, uint16_t num_buffers)
 {
 	uint32_t vec_idx = 0;
-	uint64_t desc_addr;
 	uint32_t mbuf_offset, mbuf_avail;
-	uint32_t desc_offset, desc_avail;
+	uint32_t buf_offset, buf_avail;
+	uint64_t buf_addr, buf_iova, buf_len;
 	uint32_t cpy_len;
-	uint64_t hdr_addr, hdr_phys_addr;
+	uint64_t hdr_addr;
 	struct rte_mbuf *hdr_mbuf;
 	struct batch_copy_elem *batch_copy = vq->batch_copy_elems;
-	uint16_t copy_nb = vq->batch_copy_nb_elems;
+	struct virtio_net_hdr_mrg_rxbuf tmp_hdr, *hdr = NULL;
 	int error = 0;
 
 	if (unlikely(m == NULL)) {
@@ -511,45 +638,61 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		goto out;
 	}
 
-	desc_addr = vhost_iova_to_vva(dev, vq, buf_vec[vec_idx].buf_addr,
-						buf_vec[vec_idx].buf_len,
-						VHOST_ACCESS_RW);
-	if (buf_vec[vec_idx].buf_len < dev->vhost_hlen || !desc_addr) {
+	buf_addr = buf_vec[vec_idx].buf_addr;
+	buf_iova = buf_vec[vec_idx].buf_iova;
+	buf_len = buf_vec[vec_idx].buf_len;
+
+	if (nr_vec > 1)
+		rte_prefetch0((void *)(uintptr_t)buf_vec[1].buf_addr);
+
+	if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1)) {
 		error = -1;
 		goto out;
 	}
 
 	hdr_mbuf = m;
-	hdr_addr = desc_addr;
-	hdr_phys_addr = buf_vec[vec_idx].buf_addr;
-	rte_prefetch0((void *)(uintptr_t)hdr_addr);
+	hdr_addr = buf_addr;
+	if (unlikely(buf_len < dev->vhost_hlen))
+		hdr = &tmp_hdr;
+	else
+		hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)hdr_addr;
 
-	LOG_DEBUG(VHOST_DATA, "(%d) RX: num merge buffers %d\n",
+	VHOST_LOG_DEBUG(VHOST_DATA, "(%d) RX: num merge buffers %d\n",
 		dev->vid, num_buffers);
 
-	desc_avail  = buf_vec[vec_idx].buf_len - dev->vhost_hlen;
-	desc_offset = dev->vhost_hlen;
+	if (unlikely(buf_len < dev->vhost_hlen)) {
+		buf_offset = dev->vhost_hlen - buf_len;
+		vec_idx++;
+		buf_addr = buf_vec[vec_idx].buf_addr;
+		buf_iova = buf_vec[vec_idx].buf_iova;
+		buf_len = buf_vec[vec_idx].buf_len;
+		buf_avail = buf_len - buf_offset;
+	} else {
+		buf_offset = dev->vhost_hlen;
+		buf_avail = buf_len - dev->vhost_hlen;
+	}
 
 	mbuf_avail  = rte_pktmbuf_data_len(m);
 	mbuf_offset = 0;
 	while (mbuf_avail != 0 || m->next != NULL) {
-		/* done with current desc buf, get the next one */
-		if (desc_avail == 0) {
+		/* done with current buf, get the next one */
+		if (buf_avail == 0) {
 			vec_idx++;
-			desc_addr =
-				vhost_iova_to_vva(dev, vq,
-					buf_vec[vec_idx].buf_addr,
-					buf_vec[vec_idx].buf_len,
-					VHOST_ACCESS_RW);
-			if (unlikely(!desc_addr)) {
+			if (unlikely(vec_idx >= nr_vec)) {
 				error = -1;
 				goto out;
 			}
 
-			/* Prefetch buffer address. */
-			rte_prefetch0((void *)(uintptr_t)desc_addr);
-			desc_offset = 0;
-			desc_avail  = buf_vec[vec_idx].buf_len;
+			buf_addr = buf_vec[vec_idx].buf_addr;
+			buf_iova = buf_vec[vec_idx].buf_iova;
+			buf_len = buf_vec[vec_idx].buf_len;
+
+			/* Prefetch next buffer address. */
+			if (vec_idx + 1 < nr_vec)
+				rte_prefetch0((void *)(uintptr_t)
+						buf_vec[vec_idx + 1].buf_addr);
+			buf_offset = 0;
+			buf_avail  = buf_len;
 		}
 
 		/* done with current mbuf, get the next one */
@@ -561,129 +704,221 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		}
 
 		if (hdr_addr) {
-			struct virtio_net_hdr_mrg_rxbuf *hdr;
-
-			hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)
-				hdr_addr;
 			virtio_enqueue_offload(hdr_mbuf, &hdr->hdr);
-			ASSIGN_UNLESS_EQUAL(hdr->num_buffers, num_buffers);
-
-			vhost_log_write(dev, hdr_phys_addr, dev->vhost_hlen);
-			PRINT_PACKET(dev, (uintptr_t)hdr_addr,
-				     dev->vhost_hlen, 0);
+			if (rxvq_is_mergeable(dev))
+				ASSIGN_UNLESS_EQUAL(hdr->num_buffers,
+						num_buffers);
+
+			if (unlikely(hdr == &tmp_hdr)) {
+				uint64_t len;
+				uint64_t remain = dev->vhost_hlen;
+				uint64_t src = (uint64_t)(uintptr_t)hdr, dst;
+				uint64_t iova = buf_vec[0].buf_iova;
+				uint16_t hdr_vec_idx = 0;
+
+				while (remain) {
+					len = RTE_MIN(remain,
+						buf_vec[hdr_vec_idx].buf_len);
+					dst = buf_vec[hdr_vec_idx].buf_addr;
+					rte_memcpy((void *)(uintptr_t)dst,
+							(void *)(uintptr_t)src,
+							len);
+
+					PRINT_PACKET(dev, (uintptr_t)dst,
+							(uint32_t)len, 0);
+					vhost_log_cache_write(dev, vq,
+							iova, len);
+
+					remain -= len;
+					iova += len;
+					src += len;
+					hdr_vec_idx++;
+				}
+			} else {
+				PRINT_PACKET(dev, (uintptr_t)hdr_addr,
+						dev->vhost_hlen, 0);
+				vhost_log_cache_write(dev, vq,
+						buf_vec[0].buf_iova,
+						dev->vhost_hlen);
+			}
 
 			hdr_addr = 0;
 		}
 
-		cpy_len = RTE_MIN(desc_avail, mbuf_avail);
+		cpy_len = RTE_MIN(buf_avail, mbuf_avail);
 
-		if (likely(cpy_len > MAX_BATCH_LEN || copy_nb >= vq->size)) {
-			rte_memcpy((void *)((uintptr_t)(desc_addr +
-							desc_offset)),
+		if (likely(cpy_len > MAX_BATCH_LEN ||
+					vq->batch_copy_nb_elems >= vq->size)) {
+			rte_memcpy((void *)((uintptr_t)(buf_addr + buf_offset)),
 				rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
 				cpy_len);
-			vhost_log_write(dev,
-				buf_vec[vec_idx].buf_addr + desc_offset,
-				cpy_len);
-			PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset),
+			vhost_log_cache_write(dev, vq, buf_iova + buf_offset,
+					cpy_len);
+			PRINT_PACKET(dev, (uintptr_t)(buf_addr + buf_offset),
 				cpy_len, 0);
 		} else {
-			batch_copy[copy_nb].dst =
-				(void *)((uintptr_t)(desc_addr + desc_offset));
-			batch_copy[copy_nb].src =
+			batch_copy[vq->batch_copy_nb_elems].dst =
+				(void *)((uintptr_t)(buf_addr + buf_offset));
+			batch_copy[vq->batch_copy_nb_elems].src =
 				rte_pktmbuf_mtod_offset(m, void *, mbuf_offset);
-			batch_copy[copy_nb].log_addr =
-				buf_vec[vec_idx].buf_addr + desc_offset;
-			batch_copy[copy_nb].len = cpy_len;
-			copy_nb++;
+			batch_copy[vq->batch_copy_nb_elems].log_addr =
+				buf_iova + buf_offset;
+			batch_copy[vq->batch_copy_nb_elems].len = cpy_len;
+			vq->batch_copy_nb_elems++;
 		}
 
 		mbuf_avail  -= cpy_len;
 		mbuf_offset += cpy_len;
-		desc_avail  -= cpy_len;
-		desc_offset += cpy_len;
+		buf_avail  -= cpy_len;
+		buf_offset += cpy_len;
 	}
 
 out:
-	vq->batch_copy_nb_elems = copy_nb;
 
 	return error;
 }
 
 static __rte_always_inline uint32_t
-virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
+virtio_dev_rx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	struct rte_mbuf **pkts, uint32_t count)
 {
-	struct vhost_virtqueue *vq;
 	uint32_t pkt_idx = 0;
 	uint16_t num_buffers;
 	struct buf_vector buf_vec[BUF_VECTOR_MAX];
 	uint16_t avail_head;
 
-	LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
-	if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
-		RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n",
-			dev->vid, __func__, queue_id);
-		return 0;
-	}
+	rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
+	avail_head = *((volatile uint16_t *)&vq->avail->idx);
 
-	vq = dev->virtqueue[queue_id];
+	for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
+		uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen;
+		uint16_t nr_vec = 0;
 
-	rte_spinlock_lock(&vq->access_lock);
+		if (unlikely(reserve_avail_buf_split(dev, vq,
+						pkt_len, buf_vec, &num_buffers,
+						avail_head, &nr_vec) < 0)) {
+			VHOST_LOG_DEBUG(VHOST_DATA,
+				"(%d) failed to get enough desc from vring\n",
+				dev->vid);
+			vq->shadow_used_idx -= num_buffers;
+			break;
+		}
 
-	if (unlikely(vq->enabled == 0))
-		goto out_access_unlock;
+		rte_prefetch0((void *)(uintptr_t)buf_vec[0].buf_addr);
 
-	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
-		vhost_user_iotlb_rd_lock(vq);
+		VHOST_LOG_DEBUG(VHOST_DATA, "(%d) current index %d | end index %d\n",
+			dev->vid, vq->last_avail_idx,
+			vq->last_avail_idx + num_buffers);
 
-	if (unlikely(vq->access_ok == 0))
-		if (unlikely(vring_translate(dev, vq) < 0))
-			goto out;
+		if (copy_mbuf_to_desc(dev, vq, pkts[pkt_idx],
+						buf_vec, nr_vec,
+						num_buffers) < 0) {
+			vq->shadow_used_idx -= num_buffers;
+			break;
+		}
 
-	count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
-	if (count == 0)
-		goto out;
+		vq->last_avail_idx += num_buffers;
+	}
 
-	vq->batch_copy_nb_elems = 0;
+	do_data_copy_enqueue(dev, vq);
 
-	rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
+	if (likely(vq->shadow_used_idx)) {
+		flush_shadow_used_ring_split(dev, vq);
+		vhost_vring_call_split(dev, vq);
+	}
+
+	return pkt_idx;
+}
+
+static __rte_always_inline uint32_t
+virtio_dev_rx_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
+	struct rte_mbuf **pkts, uint32_t count)
+{
+	uint32_t pkt_idx = 0;
+	uint16_t num_buffers;
+	struct buf_vector buf_vec[BUF_VECTOR_MAX];
 
-	vq->shadow_used_idx = 0;
-	avail_head = *((volatile uint16_t *)&vq->avail->idx);
 	for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
 		uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen;
+		uint16_t nr_vec = 0;
+		uint16_t nr_descs = 0;
 
-		if (unlikely(reserve_avail_buf_mergeable(dev, vq,
-						pkt_len, buf_vec, &num_buffers,
-						avail_head) < 0)) {
-			LOG_DEBUG(VHOST_DATA,
+		if (unlikely(reserve_avail_buf_packed(dev, vq,
+						pkt_len, buf_vec, &nr_vec,
+						&num_buffers, &nr_descs) < 0)) {
+			VHOST_LOG_DEBUG(VHOST_DATA,
 				"(%d) failed to get enough desc from vring\n",
 				dev->vid);
 			vq->shadow_used_idx -= num_buffers;
 			break;
 		}
 
-		LOG_DEBUG(VHOST_DATA, "(%d) current index %d | end index %d\n",
+		rte_prefetch0((void *)(uintptr_t)buf_vec[0].buf_addr);
+
+		VHOST_LOG_DEBUG(VHOST_DATA, "(%d) current index %d | end index %d\n",
 			dev->vid, vq->last_avail_idx,
 			vq->last_avail_idx + num_buffers);
 
-		if (copy_mbuf_to_desc_mergeable(dev, vq, pkts[pkt_idx],
-						buf_vec, num_buffers) < 0) {
+		if (copy_mbuf_to_desc(dev, vq, pkts[pkt_idx],
+						buf_vec, nr_vec,
+						num_buffers) < 0) {
 			vq->shadow_used_idx -= num_buffers;
 			break;
 		}
 
-		vq->last_avail_idx += num_buffers;
+		vq->last_avail_idx += nr_descs;
+		if (vq->last_avail_idx >= vq->size) {
+			vq->last_avail_idx -= vq->size;
+			vq->avail_wrap_counter ^= 1;
+		}
 	}
 
 	do_data_copy_enqueue(dev, vq);
 
 	if (likely(vq->shadow_used_idx)) {
-		flush_shadow_used_ring(dev, vq);
-		vhost_vring_call(dev, vq);
+		flush_shadow_used_ring_packed(dev, vq);
+		vhost_vring_call_packed(dev, vq);
 	}
 
+	return pkt_idx;
+}
+
+static __rte_always_inline uint32_t
+virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
+	struct rte_mbuf **pkts, uint32_t count)
+{
+	struct vhost_virtqueue *vq;
+
+	VHOST_LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
+	if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
+		RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n",
+			dev->vid, __func__, queue_id);
+		return 0;
+	}
+
+	vq = dev->virtqueue[queue_id];
+
+	rte_spinlock_lock(&vq->access_lock);
+
+	if (unlikely(vq->enabled == 0))
+		goto out_access_unlock;
+
+	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+		vhost_user_iotlb_rd_lock(vq);
+
+	if (unlikely(vq->access_ok == 0))
+		if (unlikely(vring_translate(dev, vq) < 0))
+			goto out;
+
+	count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
+	if (count == 0)
+		goto out;
+
+	if (vq_is_packed(dev))
+		count = virtio_dev_rx_packed(dev, vq, pkts, count);
+	else
+		count = virtio_dev_rx_split(dev, vq, pkts, count);
+
 out:
 	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
 		vhost_user_iotlb_rd_unlock(vq);
@@ -691,7 +926,7 @@ out:
 out_access_unlock:
 	rte_spinlock_unlock(&vq->access_lock);
 
-	return pkt_idx;
+	return count;
 }
 
 uint16_t
@@ -710,10 +945,7 @@ rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
 		return 0;
 	}
 
-	if (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF))
-		return virtio_dev_merge_rx(dev, queue_id, pkts, count);
-	else
-		return virtio_dev_rx(dev, queue_id, pkts, count);
+	return virtio_dev_rx(dev, queue_id, pkts, count);
 }
 
 static inline bool
@@ -838,42 +1070,62 @@ put_zmbuf(struct zcopy_mbuf *zmbuf)
 
 static __rte_always_inline int
 copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
-		  struct vring_desc *descs, uint16_t max_desc,
-		  struct rte_mbuf *m, uint16_t desc_idx,
-		  struct rte_mempool *mbuf_pool)
+		  struct buf_vector *buf_vec, uint16_t nr_vec,
+		  struct rte_mbuf *m, struct rte_mempool *mbuf_pool)
 {
-	struct vring_desc *desc;
-	uint64_t desc_addr;
-	uint32_t desc_avail, desc_offset;
+	uint32_t buf_avail, buf_offset;
+	uint64_t buf_addr, buf_iova, buf_len;
 	uint32_t mbuf_avail, mbuf_offset;
 	uint32_t cpy_len;
 	struct rte_mbuf *cur = m, *prev = m;
+	struct virtio_net_hdr tmp_hdr;
 	struct virtio_net_hdr *hdr = NULL;
 	/* A counter to avoid desc dead loop chain */
-	uint32_t nr_desc = 1;
+	uint16_t vec_idx = 0;
 	struct batch_copy_elem *batch_copy = vq->batch_copy_elems;
-	uint16_t copy_nb = vq->batch_copy_nb_elems;
 	int error = 0;
 
-	desc = &descs[desc_idx];
-	if (unlikely((desc->len < dev->vhost_hlen)) ||
-			(desc->flags & VRING_DESC_F_INDIRECT)) {
-		error = -1;
-		goto out;
-	}
+	buf_addr = buf_vec[vec_idx].buf_addr;
+	buf_iova = buf_vec[vec_idx].buf_iova;
+	buf_len = buf_vec[vec_idx].buf_len;
 
-	desc_addr = vhost_iova_to_vva(dev,
-					vq, desc->addr,
-					desc->len,
-					VHOST_ACCESS_RO);
-	if (unlikely(!desc_addr)) {
+	if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1)) {
 		error = -1;
 		goto out;
 	}
 
+	if (likely(nr_vec > 1))
+		rte_prefetch0((void *)(uintptr_t)buf_vec[1].buf_addr);
+
 	if (virtio_net_with_host_offload(dev)) {
-		hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
-		rte_prefetch0(hdr);
+		if (unlikely(buf_len < sizeof(struct virtio_net_hdr))) {
+			uint64_t len;
+			uint64_t remain = sizeof(struct virtio_net_hdr);
+			uint64_t src;
+			uint64_t dst = (uint64_t)(uintptr_t)&tmp_hdr;
+			uint16_t hdr_vec_idx = 0;
+
+			/*
+			 * No luck, the virtio-net header doesn't fit
+			 * in a contiguous virtual area.
+			 */
+			while (remain) {
+				len = RTE_MIN(remain,
+					buf_vec[hdr_vec_idx].buf_len);
+				src = buf_vec[hdr_vec_idx].buf_addr;
+				rte_memcpy((void *)(uintptr_t)dst,
+						   (void *)(uintptr_t)src, len);
+
+				remain -= len;
+				dst += len;
+				hdr_vec_idx++;
+			}
+
+			hdr = &tmp_hdr;
+		} else {
+			hdr = (struct virtio_net_hdr *)((uintptr_t)buf_addr);
+			rte_prefetch0(hdr);
+		}
 	}
 
 	/*
@@ -881,41 +1133,40 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	 * for Tx: the first for storing the header, and others
 	 * for storing the data.
 	 */
-	if (likely((desc->len == dev->vhost_hlen) &&
-		   (desc->flags & VRING_DESC_F_NEXT) != 0)) {
-		desc = &descs[desc->next];
-		if (unlikely(desc->flags & VRING_DESC_F_INDIRECT)) {
-			error = -1;
-			goto out;
-		}
-
-		desc_addr = vhost_iova_to_vva(dev,
-							vq, desc->addr,
-							desc->len,
-							VHOST_ACCESS_RO);
-		if (unlikely(!desc_addr)) {
-			error = -1;
+	if (unlikely(buf_len < dev->vhost_hlen)) {
+		buf_offset = dev->vhost_hlen - buf_len;
+		vec_idx++;
+		buf_addr = buf_vec[vec_idx].buf_addr;
+		buf_iova = buf_vec[vec_idx].buf_iova;
+		buf_len = buf_vec[vec_idx].buf_len;
+		buf_avail  = buf_len - buf_offset;
+	} else if (buf_len == dev->vhost_hlen) {
+		if (unlikely(++vec_idx >= nr_vec))
 			goto out;
-		}
+		buf_addr = buf_vec[vec_idx].buf_addr;
+		buf_iova = buf_vec[vec_idx].buf_iova;
+		buf_len = buf_vec[vec_idx].buf_len;
 
-		desc_offset = 0;
-		desc_avail  = desc->len;
-		nr_desc    += 1;
+		buf_offset = 0;
+		buf_avail = buf_len;
 	} else {
-		desc_avail  = desc->len - dev->vhost_hlen;
-		desc_offset = dev->vhost_hlen;
+		buf_offset = dev->vhost_hlen;
+		buf_avail = buf_vec[vec_idx].buf_len - dev->vhost_hlen;
 	}
 
-	rte_prefetch0((void *)(uintptr_t)(desc_addr + desc_offset));
+	rte_prefetch0((void *)(uintptr_t)
+			(buf_addr + buf_offset));
 
-	PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), desc_avail, 0);
+	PRINT_PACKET(dev,
+			(uintptr_t)(buf_addr + buf_offset),
+			(uint32_t)buf_avail, 0);
 
 	mbuf_offset = 0;
 	mbuf_avail  = m->buf_len - RTE_PKTMBUF_HEADROOM;
 	while (1) {
 		uint64_t hpa;
 
-		cpy_len = RTE_MIN(desc_avail, mbuf_avail);
+		cpy_len = RTE_MIN(buf_avail, mbuf_avail);
 
 		/*
 		 * A desc buf might across two host physical pages that are
@@ -923,11 +1174,11 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		 * will be copied even though zero copy is enabled.
 		 */
 		if (unlikely(dev->dequeue_zero_copy && (hpa = gpa_to_hpa(dev,
-					desc->addr + desc_offset, cpy_len)))) {
+					buf_iova + buf_offset, cpy_len)))) {
 			cur->data_len = cpy_len;
 			cur->data_off = 0;
-			cur->buf_addr = (void *)(uintptr_t)(desc_addr
-				+ desc_offset);
+			cur->buf_addr =
+				(void *)(uintptr_t)(buf_addr + buf_offset);
 			cur->buf_iova = hpa;
 
 			/*
@@ -937,61 +1188,53 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 			mbuf_avail = cpy_len;
 		} else {
 			if (likely(cpy_len > MAX_BATCH_LEN ||
-				   copy_nb >= vq->size ||
+				   vq->batch_copy_nb_elems >= vq->size ||
 				   (hdr && cur == m))) {
 				rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *,
 								   mbuf_offset),
-					   (void *)((uintptr_t)(desc_addr +
-								desc_offset)),
+					   (void *)((uintptr_t)(buf_addr +
+							   buf_offset)),
 					   cpy_len);
 			} else {
-				batch_copy[copy_nb].dst =
+				batch_copy[vq->batch_copy_nb_elems].dst =
 					rte_pktmbuf_mtod_offset(cur, void *,
 								mbuf_offset);
-				batch_copy[copy_nb].src =
-					(void *)((uintptr_t)(desc_addr +
-							     desc_offset));
-				batch_copy[copy_nb].len = cpy_len;
-				copy_nb++;
+				batch_copy[vq->batch_copy_nb_elems].src =
+					(void *)((uintptr_t)(buf_addr +
+								buf_offset));
+				batch_copy[vq->batch_copy_nb_elems].len =
+					cpy_len;
+				vq->batch_copy_nb_elems++;
 			}
 		}
 
 		mbuf_avail  -= cpy_len;
 		mbuf_offset += cpy_len;
-		desc_avail  -= cpy_len;
-		desc_offset += cpy_len;
+		buf_avail -= cpy_len;
+		buf_offset += cpy_len;
 
-		/* This desc reaches to its end, get the next one */
-		if (desc_avail == 0) {
-			if ((desc->flags & VRING_DESC_F_NEXT) == 0)
+		/* This buf reaches to its end, get the next one */
+		if (buf_avail == 0) {
+			if (++vec_idx >= nr_vec)
 				break;
 
-			if (unlikely(desc->next >= max_desc ||
-				     ++nr_desc > max_desc)) {
-				error = -1;
-				goto out;
-			}
-			desc = &descs[desc->next];
-			if (unlikely(desc->flags & VRING_DESC_F_INDIRECT)) {
-				error = -1;
-				goto out;
-			}
-
-			desc_addr = vhost_iova_to_vva(dev,
-							vq, desc->addr,
-							desc->len,
-							VHOST_ACCESS_RO);
-			if (unlikely(!desc_addr)) {
-				error = -1;
-				goto out;
-			}
+			buf_addr = buf_vec[vec_idx].buf_addr;
+			buf_iova = buf_vec[vec_idx].buf_iova;
+			buf_len = buf_vec[vec_idx].buf_len;
 
-			rte_prefetch0((void *)(uintptr_t)desc_addr);
+			/*
+			 * Prefecth desc n + 1 buffer while
+			 * desc n buffer is processed.
+			 */
+			if (vec_idx + 1 < nr_vec)
+				rte_prefetch0((void *)(uintptr_t)
+						buf_vec[vec_idx + 1].buf_addr);
 
-			desc_offset = 0;
-			desc_avail  = desc->len;
+			buf_offset = 0;
+			buf_avail  = buf_len;
 
-			PRINT_PACKET(dev, (uintptr_t)desc_addr, desc->len, 0);
+			PRINT_PACKET(dev, (uintptr_t)buf_addr,
+					(uint32_t)buf_avail, 0);
 		}
 
 		/*
@@ -1027,38 +1270,10 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		vhost_dequeue_offload(hdr, m);
 
 out:
-	vq->batch_copy_nb_elems = copy_nb;
 
 	return error;
 }
 
-static __rte_always_inline void
-update_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq,
-		 uint32_t used_idx, uint32_t desc_idx)
-{
-	vq->used->ring[used_idx].id  = desc_idx;
-	vq->used->ring[used_idx].len = 0;
-	vhost_log_used_vring(dev, vq,
-			offsetof(struct vring_used, ring[used_idx]),
-			sizeof(vq->used->ring[used_idx]));
-}
-
-static __rte_always_inline void
-update_used_idx(struct virtio_net *dev, struct vhost_virtqueue *vq,
-		uint32_t count)
-{
-	if (unlikely(count == 0))
-		return;
-
-	rte_smp_wmb();
-	rte_smp_rmb();
-
-	vq->used->idx += count;
-	vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx),
-			sizeof(vq->used->idx));
-	vhost_vring_call(dev, vq);
-}
-
 static __rte_always_inline struct zcopy_mbuf *
 get_zmbuf(struct vhost_virtqueue *vq)
 {
@@ -1118,66 +1333,137 @@ restore_mbuf(struct rte_mbuf *m)
 	}
 }
 
-uint16_t
-rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
+static __rte_always_inline uint16_t
+virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
 {
-	struct virtio_net *dev;
-	struct rte_mbuf *rarp_mbuf = NULL;
-	struct vhost_virtqueue *vq;
-	uint32_t desc_indexes[MAX_PKT_BURST];
-	uint32_t used_idx;
-	uint32_t i = 0;
+	uint16_t i;
 	uint16_t free_entries;
-	uint16_t avail_idx;
 
-	dev = get_device(vid);
-	if (!dev)
-		return 0;
+	if (unlikely(dev->dequeue_zero_copy)) {
+		struct zcopy_mbuf *zmbuf, *next;
 
-	if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) {
-		RTE_LOG(ERR, VHOST_DATA,
-			"(%d) %s: built-in vhost net backend is disabled.\n",
-			dev->vid, __func__);
-		return 0;
-	}
+		for (zmbuf = TAILQ_FIRST(&vq->zmbuf_list);
+		     zmbuf != NULL; zmbuf = next) {
+			next = TAILQ_NEXT(zmbuf, next);
 
-	if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->nr_vring))) {
-		RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n",
-			dev->vid, __func__, queue_id);
-		return 0;
+			if (mbuf_is_consumed(zmbuf->mbuf)) {
+				update_shadow_used_ring_split(vq,
+						zmbuf->desc_idx, 0);
+				TAILQ_REMOVE(&vq->zmbuf_list, zmbuf, next);
+				restore_mbuf(zmbuf->mbuf);
+				rte_pktmbuf_free(zmbuf->mbuf);
+				put_zmbuf(zmbuf);
+				vq->nr_zmbuf -= 1;
+			}
+		}
+
+		flush_shadow_used_ring_split(dev, vq);
+		vhost_vring_call_split(dev, vq);
 	}
 
-	vq = dev->virtqueue[queue_id];
+	rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
 
-	if (unlikely(rte_spinlock_trylock(&vq->access_lock) == 0))
+	free_entries = *((volatile uint16_t *)&vq->avail->idx) -
+			vq->last_avail_idx;
+	if (free_entries == 0)
 		return 0;
 
-	if (unlikely(vq->enabled == 0))
-		goto out_access_unlock;
+	VHOST_LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
 
-	vq->batch_copy_nb_elems = 0;
+	count = RTE_MIN(count, MAX_PKT_BURST);
+	count = RTE_MIN(count, free_entries);
+	VHOST_LOG_DEBUG(VHOST_DATA, "(%d) about to dequeue %u buffers\n",
+			dev->vid, count);
 
-	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
-		vhost_user_iotlb_rd_lock(vq);
+	for (i = 0; i < count; i++) {
+		struct buf_vector buf_vec[BUF_VECTOR_MAX];
+		uint16_t head_idx, dummy_len;
+		uint16_t nr_vec = 0;
+		int err;
 
-	if (unlikely(vq->access_ok == 0))
-		if (unlikely(vring_translate(dev, vq) < 0))
-			goto out;
+		if (unlikely(fill_vec_buf_split(dev, vq,
+						vq->last_avail_idx + i,
+						&nr_vec, buf_vec,
+						&head_idx, &dummy_len,
+						VHOST_ACCESS_RO) < 0))
+			break;
+
+		if (likely(dev->dequeue_zero_copy == 0))
+			update_shadow_used_ring_split(vq, head_idx, 0);
+
+		rte_prefetch0((void *)(uintptr_t)buf_vec[0].buf_addr);
+
+		pkts[i] = rte_pktmbuf_alloc(mbuf_pool);
+		if (unlikely(pkts[i] == NULL)) {
+			RTE_LOG(ERR, VHOST_DATA,
+				"Failed to allocate memory for mbuf.\n");
+			break;
+		}
+
+		err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts[i],
+				mbuf_pool);
+		if (unlikely(err)) {
+			rte_pktmbuf_free(pkts[i]);
+			break;
+		}
+
+		if (unlikely(dev->dequeue_zero_copy)) {
+			struct zcopy_mbuf *zmbuf;
+
+			zmbuf = get_zmbuf(vq);
+			if (!zmbuf) {
+				rte_pktmbuf_free(pkts[i]);
+				break;
+			}
+			zmbuf->mbuf = pkts[i];
+			zmbuf->desc_idx = head_idx;
+
+			/*
+			 * Pin lock the mbuf; we will check later to see
+			 * whether the mbuf is freed (when we are the last
+			 * user) or not. If that's the case, we then could
+			 * update the used ring safely.
+			 */
+			rte_mbuf_refcnt_update(pkts[i], 1);
+
+			vq->nr_zmbuf += 1;
+			TAILQ_INSERT_TAIL(&vq->zmbuf_list, zmbuf, next);
+		}
+	}
+	vq->last_avail_idx += i;
+
+	if (likely(dev->dequeue_zero_copy == 0)) {
+		do_data_copy_dequeue(vq);
+		if (unlikely(i < count))
+			vq->shadow_used_idx = i;
+		flush_shadow_used_ring_split(dev, vq);
+		vhost_vring_call_split(dev, vq);
+	}
+
+	return i;
+}
+
+static __rte_always_inline uint16_t
+virtio_dev_tx_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
+	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
+{
+	uint16_t i;
+
+	rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]);
 
 	if (unlikely(dev->dequeue_zero_copy)) {
 		struct zcopy_mbuf *zmbuf, *next;
-		int nr_updated = 0;
 
 		for (zmbuf = TAILQ_FIRST(&vq->zmbuf_list);
 		     zmbuf != NULL; zmbuf = next) {
 			next = TAILQ_NEXT(zmbuf, next);
 
 			if (mbuf_is_consumed(zmbuf->mbuf)) {
-				used_idx = vq->last_used_idx++ & (vq->size - 1);
-				update_used_ring(dev, vq, used_idx,
-						 zmbuf->desc_idx);
-				nr_updated += 1;
+				update_shadow_used_ring_packed(vq,
+						zmbuf->desc_idx,
+						0,
+						zmbuf->desc_count);
 
 				TAILQ_REMOVE(&vq->zmbuf_list, zmbuf, next);
 				restore_mbuf(zmbuf->mbuf);
@@ -1187,93 +1473,34 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 			}
 		}
 
-		update_used_idx(dev, vq, nr_updated);
+		flush_shadow_used_ring_packed(dev, vq);
+		vhost_vring_call_packed(dev, vq);
 	}
 
-	/*
-	 * Construct a RARP broadcast packet, and inject it to the "pkts"
-	 * array, to looks like that guest actually send such packet.
-	 *
-	 * Check user_send_rarp() for more information.
-	 *
-	 * broadcast_rarp shares a cacheline in the virtio_net structure
-	 * with some fields that are accessed during enqueue and
-	 * rte_atomic16_cmpset() causes a write if using cmpxchg. This could
-	 * result in false sharing between enqueue and dequeue.
-	 *
-	 * Prevent unnecessary false sharing by reading broadcast_rarp first
-	 * and only performing cmpset if the read indicates it is likely to
-	 * be set.
-	 */
-
-	if (unlikely(rte_atomic16_read(&dev->broadcast_rarp) &&
-			rte_atomic16_cmpset((volatile uint16_t *)
-				&dev->broadcast_rarp.cnt, 1, 0))) {
-
-		rarp_mbuf = rte_net_make_rarp_packet(mbuf_pool, &dev->mac);
-		if (rarp_mbuf == NULL) {
-			RTE_LOG(ERR, VHOST_DATA,
-				"Failed to make RARP packet.\n");
-			return 0;
-		}
-		count -= 1;
-	}
-
-	free_entries = *((volatile uint16_t *)&vq->avail->idx) -
-			vq->last_avail_idx;
-	if (free_entries == 0)
-		goto out;
-
-	LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
-
-	/* Prefetch available and used ring */
-	avail_idx = vq->last_avail_idx & (vq->size - 1);
-	used_idx  = vq->last_used_idx  & (vq->size - 1);
-	rte_prefetch0(&vq->avail->ring[avail_idx]);
-	rte_prefetch0(&vq->used->ring[used_idx]);
+	VHOST_LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
 
 	count = RTE_MIN(count, MAX_PKT_BURST);
-	count = RTE_MIN(count, free_entries);
-	LOG_DEBUG(VHOST_DATA, "(%d) about to dequeue %u buffers\n",
+	VHOST_LOG_DEBUG(VHOST_DATA, "(%d) about to dequeue %u buffers\n",
 			dev->vid, count);
 
-	/* Retrieve all of the head indexes first to avoid caching issues. */
-	for (i = 0; i < count; i++) {
-		avail_idx = (vq->last_avail_idx + i) & (vq->size - 1);
-		used_idx  = (vq->last_used_idx  + i) & (vq->size - 1);
-		desc_indexes[i] = vq->avail->ring[avail_idx];
-
-		if (likely(dev->dequeue_zero_copy == 0))
-			update_used_ring(dev, vq, used_idx, desc_indexes[i]);
-	}
-
-	/* Prefetch descriptor index. */
-	rte_prefetch0(&vq->desc[desc_indexes[0]]);
 	for (i = 0; i < count; i++) {
-		struct vring_desc *desc;
-		uint16_t sz, idx;
+		struct buf_vector buf_vec[BUF_VECTOR_MAX];
+		uint16_t buf_id, dummy_len;
+		uint16_t desc_count, nr_vec = 0;
 		int err;
 
-		if (likely(i + 1 < count))
-			rte_prefetch0(&vq->desc[desc_indexes[i + 1]]);
+		if (unlikely(fill_vec_buf_packed(dev, vq,
+						vq->last_avail_idx, &desc_count,
+						buf_vec, &nr_vec,
+						&buf_id, &dummy_len,
+						VHOST_ACCESS_RW) < 0))
+			break;
 
-		if (vq->desc[desc_indexes[i]].flags & VRING_DESC_F_INDIRECT) {
-			desc = (struct vring_desc *)(uintptr_t)
-				vhost_iova_to_vva(dev, vq,
-						vq->desc[desc_indexes[i]].addr,
-						sizeof(*desc),
-						VHOST_ACCESS_RO);
-			if (unlikely(!desc))
-				break;
+		if (likely(dev->dequeue_zero_copy == 0))
+			update_shadow_used_ring_packed(vq, buf_id, 0,
+					desc_count);
 
-			rte_prefetch0(desc);
-			sz = vq->desc[desc_indexes[i]].len / sizeof(*desc);
-			idx = 0;
-		} else {
-			desc = vq->desc;
-			sz = vq->size;
-			idx = desc_indexes[i];
-		}
+		rte_prefetch0((void *)(uintptr_t)buf_vec[0].buf_addr);
 
 		pkts[i] = rte_pktmbuf_alloc(mbuf_pool);
 		if (unlikely(pkts[i] == NULL)) {
@@ -1282,8 +1509,8 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 			break;
 		}
 
-		err = copy_desc_to_mbuf(dev, vq, desc, sz, pkts[i], idx,
-					mbuf_pool);
+		err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts[i],
+				mbuf_pool);
 		if (unlikely(err)) {
 			rte_pktmbuf_free(pkts[i]);
 			break;
@@ -1298,7 +1525,8 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 				break;
 			}
 			zmbuf->mbuf = pkts[i];
-			zmbuf->desc_idx = desc_indexes[i];
+			zmbuf->desc_idx = buf_id;
+			zmbuf->desc_count = desc_count;
 
 			/*
 			 * Pin lock the mbuf; we will check later to see
@@ -1311,15 +1539,103 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 			vq->nr_zmbuf += 1;
 			TAILQ_INSERT_TAIL(&vq->zmbuf_list, zmbuf, next);
 		}
+
+		vq->last_avail_idx += desc_count;
+		if (vq->last_avail_idx >= vq->size) {
+			vq->last_avail_idx -= vq->size;
+			vq->avail_wrap_counter ^= 1;
+		}
 	}
-	vq->last_avail_idx += i;
 
 	if (likely(dev->dequeue_zero_copy == 0)) {
 		do_data_copy_dequeue(vq);
-		vq->last_used_idx += i;
-		update_used_idx(dev, vq, i);
+		if (unlikely(i < count))
+			vq->shadow_used_idx = i;
+		flush_shadow_used_ring_packed(dev, vq);
+		vhost_vring_call_packed(dev, vq);
+	}
+
+	return i;
+}
+
+uint16_t
+rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
+	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
+{
+	struct virtio_net *dev;
+	struct rte_mbuf *rarp_mbuf = NULL;
+	struct vhost_virtqueue *vq;
+
+	dev = get_device(vid);
+	if (!dev)
+		return 0;
+
+	if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) {
+		RTE_LOG(ERR, VHOST_DATA,
+			"(%d) %s: built-in vhost net backend is disabled.\n",
+			dev->vid, __func__);
+		return 0;
+	}
+
+	if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->nr_vring))) {
+		RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n",
+			dev->vid, __func__, queue_id);
+		return 0;
+	}
+
+	vq = dev->virtqueue[queue_id];
+
+	if (unlikely(rte_spinlock_trylock(&vq->access_lock) == 0))
+		return 0;
+
+	if (unlikely(vq->enabled == 0)) {
+		count = 0;
+		goto out_access_unlock;
+	}
+
+	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+		vhost_user_iotlb_rd_lock(vq);
+
+	if (unlikely(vq->access_ok == 0))
+		if (unlikely(vring_translate(dev, vq) < 0)) {
+			count = 0;
+			goto out;
+		}
+
+	/*
+	 * Construct a RARP broadcast packet, and inject it to the "pkts"
+	 * array, to looks like that guest actually send such packet.
+	 *
+	 * Check user_send_rarp() for more information.
+	 *
+	 * broadcast_rarp shares a cacheline in the virtio_net structure
+	 * with some fields that are accessed during enqueue and
+	 * rte_atomic16_cmpset() causes a write if using cmpxchg. This could
+	 * result in false sharing between enqueue and dequeue.
+	 *
+	 * Prevent unnecessary false sharing by reading broadcast_rarp first
+	 * and only performing cmpset if the read indicates it is likely to
+	 * be set.
+	 */
+	if (unlikely(rte_atomic16_read(&dev->broadcast_rarp) &&
+			rte_atomic16_cmpset((volatile uint16_t *)
+				&dev->broadcast_rarp.cnt, 1, 0))) {
+
+		rarp_mbuf = rte_net_make_rarp_packet(mbuf_pool, &dev->mac);
+		if (rarp_mbuf == NULL) {
+			RTE_LOG(ERR, VHOST_DATA,
+				"Failed to make RARP packet.\n");
+			count = 0;
+			goto out;
+		}
+		count -= 1;
 	}
 
+	if (vq_is_packed(dev))
+		count = virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, count);
+	else
+		count = virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count);
+
 out:
 	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
 		vhost_user_iotlb_rd_unlock(vq);
@@ -1332,10 +1648,10 @@ out_access_unlock:
 		 * Inject it to the head of "pkts" array, so that switch's mac
 		 * learning table will get updated first.
 		 */
-		memmove(&pkts[1], pkts, i * sizeof(struct rte_mbuf *));
+		memmove(&pkts[1], pkts, count * sizeof(struct rte_mbuf *));
 		pkts[0] = rarp_mbuf;
-		i += 1;
+		count += 1;
 	}
 
-	return i;
+	return count;
 }
author	Luca Boccassi <luca.boccassi@gmail.com>	2018-08-14 18:52:30 +0100
committer	Luca Boccassi <luca.boccassi@gmail.com>	2018-08-14 18:53:17 +0100
commit	b63264c8342e6a1b6971c79550d2af2024b6a4de (patch)
tree	83114aac64286fe616506c0b3dfaec2ab86ef835 /lib/librte_vhost
parent	ca33590b6af032bff57d9cc70455660466a654b2 (diff)