New upstream version 18.11-rc1upstream/18.11-rc1

Change-Id: Iaa71986dd6332e878d8f4bf493101b2bbc6313bb Signed-off-by: Luca Boccassi <luca.boccassi@gmail.com>
author: Luca Boccassi <luca.boccassi@gmail.com> 2018-11-01 11:59:50 +0000
committer: Luca Boccassi <luca.boccassi@gmail.com> 2018-11-01 12:00:19 +0000
commit: 8d01b9cd70a67cdafd5b965a70420c3bd7fb3f82 (patch)
tree: 208e3bc33c220854d89d010e3abf720a2e62e546 /lib
parent: b63264c8342e6a1b6971c79550d2af2024b6a4de (diff)
200 files changed, 17096 insertions, 2700 deletions
diff --git a/lib/Makefile b/lib/Makefile
index afa604e2..b7370ef9 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -25,6 +25,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_ETHER) += librte_ethdev
 DEPDIRS-librte_ethdev := librte_net librte_eal librte_mempool librte_ring
 DEPDIRS-librte_ethdev += librte_mbuf
 DEPDIRS-librte_ethdev += librte_kvargs
+DEPDIRS-librte_ethdev += librte_cmdline
 DIRS-$(CONFIG_RTE_LIBRTE_BBDEV) += librte_bbdev
 DEPDIRS-librte_bbdev := librte_eal librte_mempool librte_mbuf
 DIRS-$(CONFIG_RTE_LIBRTE_CRYPTODEV) += librte_cryptodev
@@ -50,7 +51,7 @@ DEPDIRS-librte_hash := librte_eal librte_ring
 DIRS-$(CONFIG_RTE_LIBRTE_EFD) += librte_efd
 DEPDIRS-librte_efd := librte_eal librte_ring librte_hash
 DIRS-$(CONFIG_RTE_LIBRTE_LPM) += librte_lpm
-DEPDIRS-librte_lpm := librte_eal
+DEPDIRS-librte_lpm := librte_eal librte_hash
 DIRS-$(CONFIG_RTE_LIBRTE_ACL) += librte_acl
 DEPDIRS-librte_acl := librte_eal
 DIRS-$(CONFIG_RTE_LIBRTE_MEMBER) += librte_member
@@ -71,7 +72,7 @@ DEPDIRS-librte_bitratestats := librte_eal librte_metrics librte_ethdev
 DIRS-$(CONFIG_RTE_LIBRTE_LATENCY_STATS) += librte_latencystats
 DEPDIRS-librte_latencystats := librte_eal librte_metrics librte_ethdev librte_mbuf
 DIRS-$(CONFIG_RTE_LIBRTE_POWER) += librte_power
-DEPDIRS-librte_power := librte_eal
+DEPDIRS-librte_power := librte_eal librte_timer
 DIRS-$(CONFIG_RTE_LIBRTE_METER) += librte_meter
 DEPDIRS-librte_meter := librte_eal
 DIRS-$(CONFIG_RTE_LIBRTE_FLOW_CLASSIFY) += librte_flow_classify
@@ -105,6 +106,8 @@ DEPDIRS-librte_gso := librte_eal librte_mbuf librte_ethdev librte_net
 DEPDIRS-librte_gso += librte_mempool
 DIRS-$(CONFIG_RTE_LIBRTE_BPF) += librte_bpf
 DEPDIRS-librte_bpf := librte_eal librte_mempool librte_mbuf librte_ethdev
+DIRS-$(CONFIG_RTE_LIBRTE_TELEMETRY) += librte_telemetry
+DEPDIRS-librte_telemetry := librte_eal librte_metrics librte_ethdev
 
 ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
 DIRS-$(CONFIG_RTE_LIBRTE_KNI) += librte_kni
diff --git a/lib/librte_acl/rte_acl.c b/lib/librte_acl/rte_acl.c
index 2f1243cd..db7d3221 100644
--- a/lib/librte_acl/rte_acl.c
+++ b/lib/librte_acl/rte_acl.c
@@ -16,7 +16,7 @@ EAL_REGISTER_TAILQ(rte_acl_tailq)
  * If the compiler doesn't support AVX2 instructions,
  * then the dummy one would be used instead for AVX2 classify method.
  */
-int __attribute__ ((weak))
+__rte_weak int
 rte_acl_classify_avx2(__rte_unused const struct rte_acl_ctx *ctx,
 	__rte_unused const uint8_t **data,
 	__rte_unused uint32_t *results,
@@ -26,7 +26,7 @@ rte_acl_classify_avx2(__rte_unused const struct rte_acl_ctx *ctx,
 	return -ENOTSUP;
 }
 
-int __attribute__ ((weak))
+__rte_weak int
 rte_acl_classify_sse(__rte_unused const struct rte_acl_ctx *ctx,
 	__rte_unused const uint8_t **data,
 	__rte_unused uint32_t *results,
@@ -36,7 +36,7 @@ rte_acl_classify_sse(__rte_unused const struct rte_acl_ctx *ctx,
 	return -ENOTSUP;
 }
 
-int __attribute__ ((weak))
+__rte_weak int
 rte_acl_classify_neon(__rte_unused const struct rte_acl_ctx *ctx,
 	__rte_unused const uint8_t **data,
 	__rte_unused uint32_t *results,
@@ -46,7 +46,7 @@ rte_acl_classify_neon(__rte_unused const struct rte_acl_ctx *ctx,
 	return -ENOTSUP;
 }
 
-int __attribute__ ((weak))
+__rte_weak int
 rte_acl_classify_altivec(__rte_unused const struct rte_acl_ctx *ctx,
 	__rte_unused const uint8_t **data,
 	__rte_unused uint32_t *results,
diff --git a/lib/librte_acl/rte_acl.h b/lib/librte_acl/rte_acl.h
index 34c3b9c6..aa22e70c 100644
--- a/lib/librte_acl/rte_acl.h
+++ b/lib/librte_acl/rte_acl.h
@@ -88,7 +88,7 @@ enum {
 	RTE_ACL_TYPE_SHIFT = 29,
 	RTE_ACL_MAX_INDEX = RTE_LEN2MASK(RTE_ACL_TYPE_SHIFT, uint32_t),
 	RTE_ACL_MAX_PRIORITY = RTE_ACL_MAX_INDEX,
-	RTE_ACL_MIN_PRIORITY = 0,
+	RTE_ACL_MIN_PRIORITY = 1,
 };
 
 #define	RTE_ACL_MASKLEN_TO_BITMASK(v, s)	\
diff --git a/lib/librte_bpf/bpf_load.c b/lib/librte_bpf/bpf_load.c
index 2b84fe72..d9d163b7 100644
--- a/lib/librte_bpf/bpf_load.c
+++ b/lib/librte_bpf/bpf_load.c
@@ -131,7 +131,7 @@ rte_bpf_load(const struct rte_bpf_prm *prm)
 	return bpf;
 }
 
-__rte_experimental __attribute__ ((weak)) struct rte_bpf *
+__rte_experimental __rte_weak struct rte_bpf *
 rte_bpf_elf_load(const struct rte_bpf_prm *prm, const char *fname,
 	const char *sname)
 {
diff --git a/lib/librte_bpf/rte_bpf_ethdev.h b/lib/librte_bpf/rte_bpf_ethdev.h
index 31731e7a..11d09cdc 100644
--- a/lib/librte_bpf/rte_bpf_ethdev.h
+++ b/lib/librte_bpf/rte_bpf_ethdev.h
@@ -75,7 +75,7 @@ rte_bpf_eth_tx_unload(uint16_t port, uint16_t queue);
  * @param prm
  *  Parameters used to create and initialise the BPF exeution context.
  * @param flags
- *  Flags that define expected expected behavior of the loaded filter
+ *  Flags that define expected behavior of the loaded filter
  *  (i.e. jited/non-jited version to use).
  * @return
  *   Zero on successful completion or negative error code otherwise.
diff --git a/lib/librte_cmdline/Makefile b/lib/librte_cmdline/Makefile
index ddae1cfd..c64142b8 100644
--- a/lib/librte_cmdline/Makefile
+++ b/lib/librte_cmdline/Makefile
@@ -25,7 +25,6 @@ SRCS-$(CONFIG_RTE_LIBRTE_CMDLINE) += cmdline_vt100.c
 SRCS-$(CONFIG_RTE_LIBRTE_CMDLINE) += cmdline_socket.c
 SRCS-$(CONFIG_RTE_LIBRTE_CMDLINE) += cmdline_parse_portlist.c
 
-CFLAGS += -D_GNU_SOURCE
 LDLIBS += -lrte_eal
 
 # install includes
diff --git a/lib/librte_cmdline/cmdline.c b/lib/librte_cmdline/cmdline.c
index 591b78b0..d9042f04 100644
--- a/lib/librte_cmdline/cmdline.c
+++ b/lib/librte_cmdline/cmdline.c
@@ -126,35 +126,11 @@ cmdline_printf(const struct cmdline *cl, const char *fmt, ...)
 	if (!cl || !fmt)
 		return;
 
-#ifdef _GNU_SOURCE
 	if (cl->s_out < 0)
 		return;
 	va_start(ap, fmt);
 	vdprintf(cl->s_out, fmt, ap);
 	va_end(ap);
-#else
-	int ret;
-	char *buf;
-
-	if (cl->s_out < 0)
-		return;
-
-	buf = malloc(BUFSIZ);
-	if (buf == NULL)
-		return;
-	va_start(ap, fmt);
-	ret = vsnprintf(buf, BUFSIZ, fmt, ap);
-	va_end(ap);
-	if (ret < 0) {
-		free(buf);
-		return;
-	}
-	if (ret >= BUFSIZ)
-		ret = BUFSIZ - 1;
-	ret = write(cl->s_out, buf, ret);
-	(void)ret;
-	free(buf);
-#endif
 }
 
 int
diff --git a/lib/librte_cmdline/meson.build b/lib/librte_cmdline/meson.build
index 5741817a..30498906 100644
--- a/lib/librte_cmdline/meson.build
+++ b/lib/librte_cmdline/meson.build
@@ -1,6 +1,10 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
+# This library is processed before EAL
+includes = [global_inc]
+includes += include_directories('../librte_eal/common/include')
+
 version = 2
 sources = files('cmdline.c',
 	'cmdline_cirbuf.c',
diff --git a/lib/librte_compressdev/rte_comp.c b/lib/librte_compressdev/rte_comp.c
index 98ad0cfd..c663be59 100644
--- a/lib/librte_compressdev/rte_comp.c
+++ b/lib/librte_compressdev/rte_comp.c
@@ -83,8 +83,8 @@ struct rte_comp_op_pool_private {
  * @param nb_ops
  *   Number of operations to allocate
  * @return
- *   - 0: Success
- *   - -ENOENT: Not enough entries in the mempool; no ops are retrieved.
+ *   - nb_ops: Success, the nb_ops requested was allocated
+ *   - 0: Not enough entries in the mempool; no ops are retrieved.
  */
 static inline int
 rte_comp_op_raw_bulk_alloc(struct rte_mempool *mempool,
diff --git a/lib/librte_compressdev/rte_comp.h b/lib/librte_compressdev/rte_comp.h
index ee9056ea..395ce29f 100644
--- a/lib/librte_compressdev/rte_comp.h
+++ b/lib/librte_compressdev/rte_comp.h
@@ -448,8 +448,8 @@ rte_comp_op_alloc(struct rte_mempool *mempool);
  * @param nb_ops
  *   Number of operations to allocate
  * @return
- *   - 0: Success
- *   - -ENOENT: Not enough entries in the mempool; no ops are retrieved.
+ *   - nb_ops: Success, the nb_ops requested was allocated
+ *   - 0: Not enough entries in the mempool; no ops are retrieved.
  */
 int __rte_experimental
 rte_comp_op_bulk_alloc(struct rte_mempool *mempool,
diff --git a/lib/librte_compressdev/rte_compressdev.c b/lib/librte_compressdev/rte_compressdev.c
index 9091dd6e..10101ebb 100644
--- a/lib/librte_compressdev/rte_compressdev.c
+++ b/lib/librte_compressdev/rte_compressdev.c
@@ -18,19 +18,15 @@
 #define RTE_COMPRESSDEV_DETACHED  (0)
 #define RTE_COMPRESSDEV_ATTACHED  (1)
 
-struct rte_compressdev rte_comp_devices[RTE_COMPRESS_MAX_DEVS];
-
-struct rte_compressdev *rte_compressdevs = &rte_comp_devices[0];
+static struct rte_compressdev rte_comp_devices[RTE_COMPRESS_MAX_DEVS];
 
 static struct rte_compressdev_global compressdev_globals = {
-		.devs			= &rte_comp_devices[0],
+		.devs			= rte_comp_devices,
 		.data			= { NULL },
 		.nb_devs		= 0,
 		.max_devs		= RTE_COMPRESS_MAX_DEVS
 };
 
-struct rte_compressdev_global *rte_compressdev_globals = &compressdev_globals;
-
 const struct rte_compressdev_capabilities * __rte_experimental
 rte_compressdev_capability_get(uint8_t dev_id,
 			enum rte_comp_algorithm algo)
@@ -78,7 +74,7 @@ rte_compressdev_get_feature_name(uint64_t flag)
 static struct rte_compressdev *
 rte_compressdev_get_dev(uint8_t dev_id)
 {
-	return &rte_compressdev_globals->devs[dev_id];
+	return &compressdev_globals.devs[dev_id];
 }
 
 struct rte_compressdev * __rte_experimental
@@ -90,8 +86,8 @@ rte_compressdev_pmd_get_named_dev(const char *name)
 	if (name == NULL)
 		return NULL;
 
-	for (i = 0; i < rte_compressdev_globals->max_devs; i++) {
-		dev = &rte_compressdev_globals->devs[i];
+	for (i = 0; i < compressdev_globals.max_devs; i++) {
+		dev = &compressdev_globals.devs[i];
 
 		if ((dev->attached == RTE_COMPRESSDEV_ATTACHED) &&
 				(strcmp(dev->data->name, name) == 0))
@@ -106,7 +102,7 @@ rte_compressdev_is_valid_dev(uint8_t dev_id)
 {
 	struct rte_compressdev *dev = NULL;
 
-	if (dev_id >= rte_compressdev_globals->nb_devs)
+	if (dev_id >= compressdev_globals.nb_devs)
 		return 0;
 
 	dev = rte_compressdev_get_dev(dev_id);
@@ -125,10 +121,10 @@ rte_compressdev_get_dev_id(const char *name)
 	if (name == NULL)
 		return -1;
 
-	for (i = 0; i < rte_compressdev_globals->nb_devs; i++)
-		if ((strcmp(rte_compressdev_globals->devs[i].data->name, name)
+	for (i = 0; i < compressdev_globals.nb_devs; i++)
+		if ((strcmp(compressdev_globals.devs[i].data->name, name)
 				== 0) &&
-				(rte_compressdev_globals->devs[i].attached ==
+				(compressdev_globals.devs[i].attached ==
 						RTE_COMPRESSDEV_ATTACHED))
 			return i;
 
@@ -138,7 +134,7 @@ rte_compressdev_get_dev_id(const char *name)
 uint8_t __rte_experimental
 rte_compressdev_count(void)
 {
-	return rte_compressdev_globals->nb_devs;
+	return compressdev_globals.nb_devs;
 }
 
 uint8_t __rte_experimental
@@ -146,8 +142,8 @@ rte_compressdev_devices_get(const char *driver_name, uint8_t *devices,
 	uint8_t nb_devices)
 {
 	uint8_t i, count = 0;
-	struct rte_compressdev *devs = rte_compressdev_globals->devs;
-	uint8_t max_devs = rte_compressdev_globals->max_devs;
+	struct rte_compressdev *devs = compressdev_globals.devs;
+	uint8_t max_devs = compressdev_globals.max_devs;
 
 	for (i = 0; i < max_devs && count < nb_devices;	i++) {
 
@@ -578,7 +574,7 @@ uint16_t __rte_experimental
 rte_compressdev_dequeue_burst(uint8_t dev_id, uint16_t qp_id,
 		struct rte_comp_op **ops, uint16_t nb_ops)
 {
-	struct rte_compressdev *dev = &rte_compressdevs[dev_id];
+	struct rte_compressdev *dev = &rte_comp_devices[dev_id];
 
 	nb_ops = (*dev->dequeue_burst)
 			(dev->data->queue_pairs[qp_id], ops, nb_ops);
@@ -590,7 +586,7 @@ uint16_t __rte_experimental
 rte_compressdev_enqueue_burst(uint8_t dev_id, uint16_t qp_id,
 		struct rte_comp_op **ops, uint16_t nb_ops)
 {
-	struct rte_compressdev *dev = &rte_compressdevs[dev_id];
+	struct rte_compressdev *dev = &rte_comp_devices[dev_id];
 
 	return (*dev->enqueue_burst)(
 			dev->data->queue_pairs[qp_id], ops, nb_ops);
diff --git a/lib/librte_compressdev/rte_compressdev_pmd.c b/lib/librte_compressdev/rte_compressdev_pmd.c
index 7de4f339..95beb26a 100644
--- a/lib/librte_compressdev/rte_compressdev_pmd.c
+++ b/lib/librte_compressdev/rte_compressdev_pmd.c
@@ -92,24 +92,20 @@ rte_compressdev_pmd_create(const char *name,
 	struct rte_compressdev *compressdev;
 
 	if (params->name[0] != '\0') {
-		COMPRESSDEV_LOG(INFO, "[%s] User specified device name = %s\n",
-				device->driver->name, params->name);
+		COMPRESSDEV_LOG(INFO, "User specified device name = %s\n",
+				params->name);
 		name = params->name;
 	}
 
-	COMPRESSDEV_LOG(INFO, "[%s] - Creating compressdev %s\n",
-			device->driver->name, name);
+	COMPRESSDEV_LOG(INFO, "Creating compressdev %s\n", name);
 
-	COMPRESSDEV_LOG(INFO,
-	"[%s] - Init parameters - name: %s, socket id: %d",
-			device->driver->name, name,
-			params->socket_id);
+	COMPRESSDEV_LOG(INFO, "Init parameters - name: %s, socket id: %d",
+			name, params->socket_id);
 
 	/* allocate device structure */
 	compressdev = rte_compressdev_pmd_allocate(name, params->socket_id);
 	if (compressdev == NULL) {
-		COMPRESSDEV_LOG(ERR, "[%s] Failed to allocate comp device %s",
-				device->driver->name, name);
+		COMPRESSDEV_LOG(ERR, "Failed to allocate comp device %s", name);
 		return NULL;
 	}
 
@@ -123,8 +119,8 @@ rte_compressdev_pmd_create(const char *name,
 
 		if (compressdev->data->dev_private == NULL) {
 			COMPRESSDEV_LOG(ERR,
-		"[%s] Cannot allocate memory for compressdev %s private data",
-					device->driver->name, name);
+					"Cannot allocate memory for compressdev"
+					" %s private data", name);
 
 			rte_compressdev_pmd_release_device(compressdev);
 			return NULL;
@@ -141,8 +137,7 @@ rte_compressdev_pmd_destroy(struct rte_compressdev *compressdev)
 {
 	int retval;
 
-	COMPRESSDEV_LOG(INFO, "[%s] Closing comp device %s",
-			compressdev->device->driver->name,
+	COMPRESSDEV_LOG(INFO, "Closing comp device %s",
 			compressdev->device->name);
 
 	/* free comp device */
diff --git a/lib/librte_compressdev/rte_compressdev_pmd.h b/lib/librte_compressdev/rte_compressdev_pmd.h
index 38e9ea02..043353c9 100644
--- a/lib/librte_compressdev/rte_compressdev_pmd.h
+++ b/lib/librte_compressdev/rte_compressdev_pmd.h
@@ -51,11 +51,6 @@ struct rte_compressdev_global {
 	uint8_t max_devs;		/**< Max number of devices */
 };
 
-/** Pointer to global array of comp devices */
-extern struct rte_compressdev *rte_compressdevs;
-/** Pointer to global comp devices data structure */
-extern struct rte_compressdev_global *rte_compressdev_globals;
-
 /**
  * Get the rte_compressdev structure device pointer for the named device.
  *
diff --git a/lib/librte_cryptodev/Makefile b/lib/librte_cryptodev/Makefile
index c1148887..a8f94c09 100644
--- a/lib/librte_cryptodev/Makefile
+++ b/lib/librte_cryptodev/Makefile
@@ -7,7 +7,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
 LIB = librte_cryptodev.a
 
 # library version
-LIBABIVER := 4
+LIBABIVER := 5
 
 # build flags
 CFLAGS += -O3
diff --git a/lib/librte_cryptodev/meson.build b/lib/librte_cryptodev/meson.build
index 295f509e..990dd3d4 100644
--- a/lib/librte_cryptodev/meson.build
+++ b/lib/librte_cryptodev/meson.build
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
-version = 4
+version = 5
 sources = files('rte_cryptodev.c', 'rte_cryptodev_pmd.c')
 headers = files('rte_cryptodev.h',
 	'rte_cryptodev_pmd.h',
diff --git a/lib/librte_cryptodev/rte_cryptodev.c b/lib/librte_cryptodev/rte_cryptodev.c
index 63ae23f0..a52eaaa4 100644
--- a/lib/librte_cryptodev/rte_cryptodev.c
+++ b/lib/librte_cryptodev/rte_cryptodev.c
@@ -43,19 +43,17 @@
 
 static uint8_t nb_drivers;
 
-struct rte_cryptodev rte_crypto_devices[RTE_CRYPTO_MAX_DEVS];
+static struct rte_cryptodev rte_crypto_devices[RTE_CRYPTO_MAX_DEVS];
 
-struct rte_cryptodev *rte_cryptodevs = &rte_crypto_devices[0];
+struct rte_cryptodev *rte_cryptodevs = rte_crypto_devices;
 
 static struct rte_cryptodev_global cryptodev_globals = {
-		.devs			= &rte_crypto_devices[0],
+		.devs			= rte_crypto_devices,
 		.data			= { NULL },
 		.nb_devs		= 0,
 		.max_devs		= RTE_CRYPTO_MAX_DEVS
 };
 
-struct rte_cryptodev_global *rte_cryptodev_globals = &cryptodev_globals;
-
 /* spinlock for crypto device callbacks */
 static rte_spinlock_t rte_cryptodev_cb_lock = RTE_SPINLOCK_INITIALIZER;
 
@@ -486,7 +484,7 @@ rte_cryptodev_get_feature_name(uint64_t flag)
 struct rte_cryptodev *
 rte_cryptodev_pmd_get_dev(uint8_t dev_id)
 {
-	return &rte_cryptodev_globals->devs[dev_id];
+	return &cryptodev_globals.devs[dev_id];
 }
 
 struct rte_cryptodev *
@@ -498,8 +496,8 @@ rte_cryptodev_pmd_get_named_dev(const char *name)
 	if (name == NULL)
 		return NULL;
 
-	for (i = 0; i < rte_cryptodev_globals->max_devs; i++) {
-		dev = &rte_cryptodev_globals->devs[i];
+	for (i = 0; i < cryptodev_globals.max_devs; i++) {
+		dev = &cryptodev_globals.devs[i];
 
 		if ((dev->attached == RTE_CRYPTODEV_ATTACHED) &&
 				(strcmp(dev->data->name, name) == 0))
@@ -514,7 +512,7 @@ rte_cryptodev_pmd_is_valid_dev(uint8_t dev_id)
 {
 	struct rte_cryptodev *dev = NULL;
 
-	if (dev_id >= rte_cryptodev_globals->nb_devs)
+	if (dev_id >= cryptodev_globals.nb_devs)
 		return 0;
 
 	dev = rte_cryptodev_pmd_get_dev(dev_id);
@@ -533,10 +531,10 @@ rte_cryptodev_get_dev_id(const char *name)
 	if (name == NULL)
 		return -1;
 
-	for (i = 0; i < rte_cryptodev_globals->nb_devs; i++)
-		if ((strcmp(rte_cryptodev_globals->devs[i].data->name, name)
+	for (i = 0; i < cryptodev_globals.nb_devs; i++)
+		if ((strcmp(cryptodev_globals.devs[i].data->name, name)
 				== 0) &&
-				(rte_cryptodev_globals->devs[i].attached ==
+				(cryptodev_globals.devs[i].attached ==
 						RTE_CRYPTODEV_ATTACHED))
 			return i;
 
@@ -546,7 +544,7 @@ rte_cryptodev_get_dev_id(const char *name)
 uint8_t
 rte_cryptodev_count(void)
 {
-	return rte_cryptodev_globals->nb_devs;
+	return cryptodev_globals.nb_devs;
 }
 
 uint8_t
@@ -554,9 +552,9 @@ rte_cryptodev_device_count_by_driver(uint8_t driver_id)
 {
 	uint8_t i, dev_count = 0;
 
-	for (i = 0; i < rte_cryptodev_globals->max_devs; i++)
-		if (rte_cryptodev_globals->devs[i].driver_id == driver_id &&
-			rte_cryptodev_globals->devs[i].attached ==
+	for (i = 0; i < cryptodev_globals.max_devs; i++)
+		if (cryptodev_globals.devs[i].driver_id == driver_id &&
+			cryptodev_globals.devs[i].attached ==
 					RTE_CRYPTODEV_ATTACHED)
 			dev_count++;
 
@@ -568,8 +566,8 @@ rte_cryptodev_devices_get(const char *driver_name, uint8_t *devices,
 	uint8_t nb_devices)
 {
 	uint8_t i, count = 0;
-	struct rte_cryptodev *devs = rte_cryptodev_globals->devs;
-	uint8_t max_devs = rte_cryptodev_globals->max_devs;
+	struct rte_cryptodev *devs = cryptodev_globals.devs;
+	uint8_t max_devs = cryptodev_globals.max_devs;
 
 	for (i = 0; i < max_devs && count < nb_devices;	i++) {
 
@@ -1477,6 +1475,9 @@ rte_crypto_op_pool_create(const char *name, enum rte_crypto_op_type type,
 		elt_size += sizeof(struct rte_crypto_sym_op);
 	} else if (type == RTE_CRYPTO_OP_TYPE_ASYMMETRIC) {
 		elt_size += sizeof(struct rte_crypto_asym_op);
+	} else if (type == RTE_CRYPTO_OP_TYPE_UNDEFINED) {
+		elt_size += RTE_MAX(sizeof(struct rte_crypto_sym_op),
+		                    sizeof(struct rte_crypto_asym_op));
 	} else {
 		CDEV_LOG_ERR("Invalid op_type\n");
 		return NULL;
diff --git a/lib/librte_cryptodev/rte_cryptodev_pmd.c b/lib/librte_cryptodev/rte_cryptodev_pmd.c
index 2088ac3f..f03bdbd5 100644
--- a/lib/librte_cryptodev/rte_cryptodev_pmd.c
+++ b/lib/librte_cryptodev/rte_cryptodev_pmd.c
@@ -93,24 +93,20 @@ rte_cryptodev_pmd_create(const char *name,
 	struct rte_cryptodev *cryptodev;
 
 	if (params->name[0] != '\0') {
-		CDEV_LOG_INFO("[%s] User specified device name = %s\n",
-				device->driver->name, params->name);
+		CDEV_LOG_INFO("User specified device name = %s\n", params->name);
 		name = params->name;
 	}
 
-	CDEV_LOG_INFO("[%s] - Creating cryptodev %s\n",
-			device->driver->name, name);
+	CDEV_LOG_INFO("Creating cryptodev %s\n", name);
 
-	CDEV_LOG_INFO("[%s] - Initialisation parameters - name: %s,"
+	CDEV_LOG_INFO("Initialisation parameters - name: %s,"
 			"socket id: %d, max queue pairs: %u",
-			device->driver->name, name,
-			params->socket_id, params->max_nb_queue_pairs);
+			name, params->socket_id, params->max_nb_queue_pairs);
 
 	/* allocate device structure */
 	cryptodev = rte_cryptodev_pmd_allocate(name, params->socket_id);
 	if (cryptodev == NULL) {
-		CDEV_LOG_ERR("[%s] Failed to allocate crypto device for %s",
-				device->driver->name, name);
+		CDEV_LOG_ERR("Failed to allocate crypto device for %s", name);
 		return NULL;
 	}
 
@@ -123,9 +119,8 @@ rte_cryptodev_pmd_create(const char *name,
 						params->socket_id);
 
 		if (cryptodev->data->dev_private == NULL) {
-			CDEV_LOG_ERR("[%s] Cannot allocate memory for "
-					"cryptodev %s private data",
-					device->driver->name, name);
+			CDEV_LOG_ERR("Cannot allocate memory for cryptodev %s"
+					" private data", name);
 
 			rte_cryptodev_pmd_release_device(cryptodev);
 			return NULL;
@@ -145,9 +140,7 @@ rte_cryptodev_pmd_destroy(struct rte_cryptodev *cryptodev)
 {
 	int retval;
 
-	CDEV_LOG_INFO("[%s] Closing crypto device %s",
-			cryptodev->device->driver->name,
-			cryptodev->device->name);
+	CDEV_LOG_INFO("Closing crypto device %s", cryptodev->device->name);
 
 	/* free crypto device */
 	retval = rte_cryptodev_pmd_release_device(cryptodev);
diff --git a/lib/librte_cryptodev/rte_cryptodev_pmd.h b/lib/librte_cryptodev/rte_cryptodev_pmd.h
index 6ff49d64..1b6cafd1 100644
--- a/lib/librte_cryptodev/rte_cryptodev_pmd.h
+++ b/lib/librte_cryptodev/rte_cryptodev_pmd.h
@@ -71,9 +71,6 @@ struct cryptodev_driver {
 	uint8_t id;
 };
 
-/** pointer to global crypto devices data structure. */
-extern struct rte_cryptodev_global *rte_cryptodev_globals;
-
 /**
  * Get the rte_cryptodev structure device pointer for the device. Assumes a
  * valid device index.
diff --git a/lib/librte_eal/bsdapp/eal/Makefile b/lib/librte_eal/bsdapp/eal/Makefile
index d27da3d1..bfeddaad 100644
--- a/lib/librte_eal/bsdapp/eal/Makefile
+++ b/lib/librte_eal/bsdapp/eal/Makefile
@@ -22,7 +22,7 @@ LDLIBS += -lrte_kvargs
 
 EXPORT_MAP := ../../rte_eal_version.map
 
-LIBABIVER := 8
+LIBABIVER := 9
 
 # specific to bsdapp exec-env
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) := eal.c
@@ -62,10 +62,12 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_proc.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_fbarray.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_uuid.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_malloc.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += hotplug_mp.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_elem.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_heap.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_mp.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_keepalive.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_option.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_service.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_reciprocal.c
 
@@ -77,11 +79,6 @@ SRCS-y += rte_cycles.c
 
 CFLAGS_eal_common_cpuflags.o := $(CPUFLAGS_LIST)
 
-CFLAGS_eal.o := -D_GNU_SOURCE
-#CFLAGS_eal_thread.o := -D_GNU_SOURCE
-CFLAGS_eal_log.o := -D_GNU_SOURCE
-CFLAGS_eal_common_log.o := -D_GNU_SOURCE
-
 # workaround for a gcc bug with noreturn attribute
 # http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
 ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c
index d7ae9d68..508cbc46 100644
--- a/lib/librte_eal/bsdapp/eal/eal.c
+++ b/lib/librte_eal/bsdapp/eal/eal.c
@@ -42,6 +42,7 @@
 #include <rte_devargs.h>
 #include <rte_version.h>
 #include <rte_vfio.h>
+#include <rte_option.h>
 #include <rte_atomic.h>
 #include <malloc_heap.h>
 
@@ -141,7 +142,7 @@ eal_create_runtime_dir(void)
 }
 
 const char *
-eal_get_runtime_dir(void)
+rte_eal_get_runtime_dir(void)
 {
 	return runtime_dir;
 }
@@ -414,12 +415,20 @@ eal_parse_args(int argc, char **argv)
 	argvopt = argv;
 	optind = 1;
 	optreset = 1;
+	opterr = 0;
 
 	while ((opt = getopt_long(argc, argvopt, eal_short_options,
 				  eal_long_options, &option_index)) != EOF) {
 
-		/* getopt is not happy, stop right now */
+		/*
+		 * getopt didn't recognise the option, lets parse the
+		 * registered options to see if the flag is valid
+		 */
 		if (opt == '?') {
+			ret = rte_option_parse(argv[optind-1]);
+			if (ret == 0)
+				continue;
+
 			eal_usage(prgname);
 			ret = -1;
 			goto out;
@@ -502,6 +511,9 @@ check_socket(const struct rte_memseg_list *msl, void *arg)
 {
 	int *socket_id = arg;
 
+	if (msl->external)
+		return 0;
+
 	if (msl->socket_id == *socket_id && msl->memseg_arr.count != 0)
 		return 1;
 
@@ -607,7 +619,7 @@ rte_eal_init(int argc, char **argv)
 	internal_config.legacy_mem = true;
 
 	if (eal_plugins_init() < 0) {
-		rte_eal_init_alert("Cannot init plugins\n");
+		rte_eal_init_alert("Cannot init plugins");
 		rte_errno = EINVAL;
 		rte_atomic32_clear(&run_once);
 		return -1;
@@ -622,7 +634,7 @@ rte_eal_init(int argc, char **argv)
 	rte_config_init();
 
 	if (rte_eal_intr_init() < 0) {
-		rte_eal_init_alert("Cannot init interrupt-handling thread\n");
+		rte_eal_init_alert("Cannot init interrupt-handling thread");
 		return -1;
 	}
 
@@ -630,7 +642,7 @@ rte_eal_init(int argc, char **argv)
 	 * bus through mp channel in the secondary process before the bus scan.
 	 */
 	if (rte_mp_channel_init() < 0) {
-		rte_eal_init_alert("failed to init mp channel\n");
+		rte_eal_init_alert("failed to init mp channel");
 		if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
 			rte_errno = EFAULT;
 			return -1;
@@ -638,14 +650,21 @@ rte_eal_init(int argc, char **argv)
 	}
 
 	if (rte_bus_scan()) {
-		rte_eal_init_alert("Cannot scan the buses for devices\n");
+		rte_eal_init_alert("Cannot scan the buses for devices");
 		rte_errno = ENODEV;
 		rte_atomic32_clear(&run_once);
 		return -1;
 	}
 
-	/* autodetect the iova mapping mode (default is iova_pa) */
-	rte_eal_get_configuration()->iova_mode = rte_bus_get_iommu_class();
+	/* if no EAL option "--iova-mode=<pa|va>", use bus IOVA scheme */
+	if (internal_config.iova_mode == RTE_IOVA_DC) {
+		/* autodetect the IOVA mapping mode (default is RTE_IOVA_PA) */
+		rte_eal_get_configuration()->iova_mode =
+			rte_bus_get_iommu_class();
+	} else {
+		rte_eal_get_configuration()->iova_mode =
+			internal_config.iova_mode;
+	}
 
 	if (internal_config.no_hugetlbfs == 0) {
 		/* rte_config isn't initialized yet */
@@ -685,37 +704,37 @@ rte_eal_init(int argc, char **argv)
 	 * initialize memzones first.
 	 */
 	if (rte_eal_memzone_init() < 0) {
-		rte_eal_init_alert("Cannot init memzone\n");
+		rte_eal_init_alert("Cannot init memzone");
 		rte_errno = ENODEV;
 		return -1;
 	}
 
 	if (rte_eal_memory_init() < 0) {
-		rte_eal_init_alert("Cannot init memory\n");
+		rte_eal_init_alert("Cannot init memory");
 		rte_errno = ENOMEM;
 		return -1;
 	}
 
 	if (rte_eal_malloc_heap_init() < 0) {
-		rte_eal_init_alert("Cannot init malloc heap\n");
+		rte_eal_init_alert("Cannot init malloc heap");
 		rte_errno = ENODEV;
 		return -1;
 	}
 
 	if (rte_eal_tailqs_init() < 0) {
-		rte_eal_init_alert("Cannot init tail queues for objects\n");
+		rte_eal_init_alert("Cannot init tail queues for objects");
 		rte_errno = EFAULT;
 		return -1;
 	}
 
 	if (rte_eal_alarm_init() < 0) {
-		rte_eal_init_alert("Cannot init interrupt-handling thread\n");
+		rte_eal_init_alert("Cannot init interrupt-handling thread");
 		/* rte_eal_alarm_init sets rte_errno on failure. */
 		return -1;
 	}
 
 	if (rte_eal_timer_init() < 0) {
-		rte_eal_init_alert("Cannot init HPET or TSC timers\n");
+		rte_eal_init_alert("Cannot init HPET or TSC timers");
 		rte_errno = ENOTSUP;
 		return -1;
 	}
@@ -765,14 +784,14 @@ rte_eal_init(int argc, char **argv)
 	/* initialize services so vdevs register service during bus_probe. */
 	ret = rte_service_init();
 	if (ret) {
-		rte_eal_init_alert("rte_service_init() failed\n");
+		rte_eal_init_alert("rte_service_init() failed");
 		rte_errno = ENOEXEC;
 		return -1;
 	}
 
 	/* Probe all the buses and devices/drivers on them */
 	if (rte_bus_probe()) {
-		rte_eal_init_alert("Cannot probe devices\n");
+		rte_eal_init_alert("Cannot probe devices");
 		rte_errno = ENOTSUP;
 		return -1;
 	}
@@ -788,6 +807,9 @@ rte_eal_init(int argc, char **argv)
 
 	rte_eal_mcfg_complete();
 
+	/* Call each registered callback, if enabled */
+	rte_option_init();
+
 	return fctret;
 }
 
diff --git a/lib/librte_eal/bsdapp/eal/eal_dev.c b/lib/librte_eal/bsdapp/eal/eal_dev.c
index 1c6c51bd..255d611e 100644
--- a/lib/librte_eal/bsdapp/eal/eal_dev.c
+++ b/lib/librte_eal/bsdapp/eal/eal_dev.c
@@ -19,3 +19,17 @@ rte_dev_event_monitor_stop(void)
 	RTE_LOG(ERR, EAL, "Device event is not supported for FreeBSD\n");
 	return -1;
 }
+
+int __rte_experimental
+rte_dev_hotplug_handle_enable(void)
+{
+	RTE_LOG(ERR, EAL, "Device event is not supported for FreeBSD\n");
+	return -1;
+}
+
+int __rte_experimental
+rte_dev_hotplug_handle_disable(void)
+{
+	RTE_LOG(ERR, EAL, "Device event is not supported for FreeBSD\n");
+	return -1;
+}
diff --git a/lib/librte_eal/bsdapp/eal/eal_memalloc.c b/lib/librte_eal/bsdapp/eal/eal_memalloc.c
index f7f07abd..a5847f0b 100644
--- a/lib/librte_eal/bsdapp/eal/eal_memalloc.c
+++ b/lib/librte_eal/bsdapp/eal/eal_memalloc.c
@@ -4,6 +4,7 @@
 
 #include <inttypes.h>
 
+#include <rte_errno.h>
 #include <rte_log.h>
 #include <rte_memory.h>
 
@@ -48,6 +49,26 @@ eal_memalloc_sync_with_primary(void)
 }
 
 int
+eal_memalloc_get_seg_fd(int list_idx __rte_unused, int seg_idx __rte_unused)
+{
+	return -ENOTSUP;
+}
+
+int
+eal_memalloc_set_seg_fd(int list_idx __rte_unused, int seg_idx __rte_unused,
+		int fd __rte_unused)
+{
+	return -ENOTSUP;
+}
+
+int
+eal_memalloc_get_seg_fd_offset(int list_idx __rte_unused,
+		int seg_idx __rte_unused, size_t *offset __rte_unused)
+{
+	return -ENOTSUP;
+}
+
+int
 eal_memalloc_init(void)
 {
 	return 0;
diff --git a/lib/librte_eal/bsdapp/eal/eal_memory.c b/lib/librte_eal/bsdapp/eal/eal_memory.c
index 16d2bc7c..4b092e1f 100644
--- a/lib/librte_eal/bsdapp/eal/eal_memory.c
+++ b/lib/librte_eal/bsdapp/eal/eal_memory.c
@@ -79,6 +79,7 @@ rte_eal_hugepage_init(void)
 		}
 		msl->base_va = addr;
 		msl->page_sz = page_sz;
+		msl->len = internal_config.memory;
 		msl->socket_id = 0;
 
 		/* populate memsegs. each memseg is 1 page long */
@@ -235,12 +236,15 @@ struct attach_walk_args {
 	int seg_idx;
 };
 static int
-attach_segment(const struct rte_memseg_list *msl __rte_unused,
-		const struct rte_memseg *ms, void *arg)
+attach_segment(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
+		void *arg)
 {
 	struct attach_walk_args *wa = arg;
 	void *addr;
 
+	if (msl->external)
+		return 0;
+
 	addr = mmap(ms->addr, ms->len, PROT_READ | PROT_WRITE,
 			MAP_SHARED | MAP_FIXED, wa->fd_hugepage,
 			wa->seg_idx * EAL_PAGE_SIZE);
@@ -370,6 +374,7 @@ alloc_va_space(struct rte_memseg_list *msl)
 		return -1;
 	}
 	msl->base_va = addr;
+	msl->len = mem_sz;
 
 	return 0;
 }
diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
index cca68826..87d8c455 100644
--- a/lib/librte_eal/common/Makefile
+++ b/lib/librte_eal/common/Makefile
@@ -12,6 +12,7 @@ INC += rte_tailq.h rte_interrupts.h rte_alarm.h
 INC += rte_string_fns.h rte_version.h
 INC += rte_eal_memconfig.h rte_malloc_heap.h
 INC += rte_hexdump.h rte_devargs.h rte_bus.h rte_dev.h rte_class.h
+INC += rte_option.h
 INC += rte_pci_dev_feature_defs.h rte_pci_dev_features.h
 INC += rte_malloc.h rte_keepalive.h rte_time.h
 INC += rte_service.h rte_service_component.h
diff --git a/lib/librte_eal/common/arch/arm/meson.build b/lib/librte_eal/common/arch/arm/meson.build
index c6bd9227..79731e1a 100644
--- a/lib/librte_eal/common/arch/arm/meson.build
+++ b/lib/librte_eal/common/arch/arm/meson.build
@@ -2,4 +2,4 @@
 # Copyright(c) 2017 Intel Corporation.
 
 eal_common_arch_sources = files('rte_cpuflags.c',
-	'rte_cycles.c')
+	'rte_cycles.c', 'rte_hypervisor.c')
diff --git a/lib/librte_eal/common/arch/ppc_64/meson.build b/lib/librte_eal/common/arch/ppc_64/meson.build
new file mode 100644
index 00000000..40b3dc53
--- /dev/null
+++ b/lib/librte_eal/common/arch/ppc_64/meson.build
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Luca Boccassi <bluca@debian.org>
+
+eal_common_arch_sources = files('rte_cpuflags.c',
+	'rte_cycles.c', 'rte_hypervisor.c')
diff --git a/lib/librte_eal/common/arch/x86/meson.build b/lib/librte_eal/common/arch/x86/meson.build
index 4e0f7790..14bf204c 100644
--- a/lib/librte_eal/common/arch/x86/meson.build
+++ b/lib/librte_eal/common/arch/x86/meson.build
@@ -2,4 +2,4 @@
 # Copyright(c) 2017 Intel Corporation
 
 eal_common_arch_sources = files('rte_spinlock.c', 'rte_cpuflags.c',
-	'rte_cycles.c')
+	'rte_cycles.c', 'rte_hypervisor.c')
diff --git a/lib/librte_eal/common/eal_common_bus.c b/lib/librte_eal/common/eal_common_bus.c
index 0943851c..c8f1901f 100644
--- a/lib/librte_eal/common/eal_common_bus.c
+++ b/lib/librte_eal/common/eal_common_bus.c
@@ -37,10 +37,11 @@
 #include <rte_bus.h>
 #include <rte_debug.h>
 #include <rte_string_fns.h>
+#include <rte_errno.h>
 
 #include "eal_private.h"
 
-struct rte_bus_list rte_bus_list =
+static struct rte_bus_list rte_bus_list =
 	TAILQ_HEAD_INITIALIZER(rte_bus_list);
 
 void
@@ -242,3 +243,45 @@ rte_bus_get_iommu_class(void)
 	}
 	return mode;
 }
+
+static int
+bus_handle_sigbus(const struct rte_bus *bus,
+			const void *failure_addr)
+{
+	int ret;
+
+	if (!bus->sigbus_handler)
+		return -1;
+
+	ret = bus->sigbus_handler(failure_addr);
+
+	/* find bus but handle failed, keep the errno be set. */
+	if (ret < 0 && rte_errno == 0)
+		rte_errno = ENOTSUP;
+
+	return ret > 0;
+}
+
+int
+rte_bus_sigbus_handler(const void *failure_addr)
+{
+	struct rte_bus *bus;
+
+	int ret = 0;
+	int old_errno = rte_errno;
+
+	rte_errno = 0;
+
+	bus = rte_bus_find(NULL, bus_handle_sigbus, failure_addr);
+	/* can not find bus. */
+	if (!bus)
+		return 1;
+	/* find bus but handle failed, pass on the new errno. */
+	else if (rte_errno != 0)
+		return -1;
+
+	/* restore the old errno. */
+	rte_errno = old_errno;
+
+	return ret;
+}
diff --git a/lib/librte_eal/common/eal_common_class.c b/lib/librte_eal/common/eal_common_class.c
index 404a9065..d922266d 100644
--- a/lib/librte_eal/common/eal_common_class.c
+++ b/lib/librte_eal/common/eal_common_class.c
@@ -9,7 +9,7 @@
 #include <rte_class.h>
 #include <rte_debug.h>
 
-struct rte_class_list rte_class_list =
+static struct rte_class_list rte_class_list =
 	TAILQ_HEAD_INITIALIZER(rte_class_list);
 
 __rte_experimental void
diff --git a/lib/librte_eal/common/eal_common_dev.c b/lib/librte_eal/common/eal_common_dev.c
index 678dbcac..62e9ed47 100644
--- a/lib/librte_eal/common/eal_common_dev.c
+++ b/lib/librte_eal/common/eal_common_dev.c
@@ -19,8 +19,10 @@
 #include <rte_log.h>
 #include <rte_spinlock.h>
 #include <rte_malloc.h>
+#include <rte_string_fns.h>
 
 #include "eal_private.h"
+#include "hotplug_mp.h"
 
 /**
  * The device event callback description.
@@ -74,119 +76,110 @@ static int cmp_dev_name(const struct rte_device *dev, const void *_name)
 	return strcmp(dev->name, name);
 }
 
-int rte_eal_dev_attach(const char *name, const char *devargs)
+int __rte_experimental
+rte_dev_is_probed(const struct rte_device *dev)
 {
-	struct rte_bus *bus;
+	/* The field driver should be set only when the probe is successful. */
+	return dev->driver != NULL;
+}
 
-	if (name == NULL || devargs == NULL) {
-		RTE_LOG(ERR, EAL, "Invalid device or arguments provided\n");
+/* helper function to build devargs, caller should free the memory */
+static int
+build_devargs(const char *busname, const char *devname,
+	      const char *drvargs, char **devargs)
+{
+	int length;
+
+	length = snprintf(NULL, 0, "%s:%s,%s", busname, devname, drvargs);
+	if (length < 0)
 		return -EINVAL;
-	}
 
-	bus = rte_bus_find_by_device_name(name);
-	if (bus == NULL) {
-		RTE_LOG(ERR, EAL, "Unable to find a bus for the device '%s'\n",
-			name);
+	*devargs = malloc(length + 1);
+	if (*devargs == NULL)
+		return -ENOMEM;
+
+	length = snprintf(*devargs, length + 1, "%s:%s,%s",
+			busname, devname, drvargs);
+	if (length < 0) {
+		free(*devargs);
 		return -EINVAL;
 	}
-	if (strcmp(bus->name, "pci") == 0 || strcmp(bus->name, "vdev") == 0)
-		return rte_eal_hotplug_add(bus->name, name, devargs);
-
-	RTE_LOG(ERR, EAL,
-		"Device attach is only supported for PCI and vdev devices.\n");
 
-	return -ENOTSUP;
+	return 0;
 }
 
-int rte_eal_dev_detach(struct rte_device *dev)
+int
+rte_eal_hotplug_add(const char *busname, const char *devname,
+		    const char *drvargs)
 {
-	struct rte_bus *bus;
-	int ret;
 
-	if (dev == NULL) {
-		RTE_LOG(ERR, EAL, "Invalid device provided.\n");
-		return -EINVAL;
-	}
+	char *devargs;
+	int ret;
 
-	bus = rte_bus_find_by_device(dev);
-	if (bus == NULL) {
-		RTE_LOG(ERR, EAL, "Cannot find bus for device (%s)\n",
-			dev->name);
-		return -EINVAL;
-	}
+	ret = build_devargs(busname, devname, drvargs, &devargs);
+	if (ret != 0)
+		return ret;
 
-	if (bus->unplug == NULL) {
-		RTE_LOG(ERR, EAL, "Bus function not supported\n");
-		return -ENOTSUP;
-	}
+	ret = rte_dev_probe(devargs);
+	free(devargs);
 
-	ret = bus->unplug(dev);
-	if (ret)
-		RTE_LOG(ERR, EAL, "Driver cannot detach the device (%s)\n",
-			dev->name);
 	return ret;
 }
 
-int __rte_experimental rte_eal_hotplug_add(const char *busname, const char *devname,
-			const char *devargs)
+/* probe device at local process. */
+int
+local_dev_probe(const char *devargs, struct rte_device **new_dev)
 {
-	struct rte_bus *bus;
 	struct rte_device *dev;
 	struct rte_devargs *da;
 	int ret;
 
-	bus = rte_bus_find_by_name(busname);
-	if (bus == NULL) {
-		RTE_LOG(ERR, EAL, "Cannot find bus (%s)\n", busname);
-		return -ENOENT;
-	}
-
-	if (bus->plug == NULL) {
-		RTE_LOG(ERR, EAL, "Function plug not supported by bus (%s)\n",
-			bus->name);
-		return -ENOTSUP;
-	}
-
+	*new_dev = NULL;
 	da = calloc(1, sizeof(*da));
 	if (da == NULL)
 		return -ENOMEM;
 
-	ret = rte_devargs_parsef(da, "%s:%s,%s",
-				 busname, devname, devargs);
+	ret = rte_devargs_parse(da, devargs);
 	if (ret)
 		goto err_devarg;
 
+	if (da->bus->plug == NULL) {
+		RTE_LOG(ERR, EAL, "Function plug not supported by bus (%s)\n",
+			da->bus->name);
+		ret = -ENOTSUP;
+		goto err_devarg;
+	}
+
 	ret = rte_devargs_insert(da);
 	if (ret)
 		goto err_devarg;
 
-	ret = bus->scan();
+	ret = da->bus->scan();
 	if (ret)
 		goto err_devarg;
 
-	dev = bus->find_device(NULL, cmp_dev_name, devname);
+	dev = da->bus->find_device(NULL, cmp_dev_name, da->name);
 	if (dev == NULL) {
 		RTE_LOG(ERR, EAL, "Cannot find device (%s)\n",
-			devname);
+			da->name);
 		ret = -ENODEV;
 		goto err_devarg;
 	}
 
-	if (dev->driver != NULL) {
-		RTE_LOG(ERR, EAL, "Device is already plugged\n");
-		return -EEXIST;
-	}
-
-	ret = bus->plug(dev);
+	ret = dev->bus->plug(dev);
 	if (ret) {
+		if (rte_dev_is_probed(dev)) /* if already succeeded earlier */
+			return ret; /* no rollback */
 		RTE_LOG(ERR, EAL, "Driver cannot attach the device (%s)\n",
 			dev->name);
 		goto err_devarg;
 	}
+
+	*new_dev = dev;
 	return 0;
 
 err_devarg:
-	if (rte_devargs_remove(busname, devname)) {
+	if (rte_devargs_remove(da) != 0) {
 		free(da->args);
 		free(da);
 	}
@@ -194,40 +187,235 @@ err_devarg:
 }
 
 int __rte_experimental
-rte_eal_hotplug_remove(const char *busname, const char *devname)
+rte_dev_probe(const char *devargs)
 {
-	struct rte_bus *bus;
+	struct eal_dev_mp_req req;
 	struct rte_device *dev;
 	int ret;
 
+	memset(&req, 0, sizeof(req));
+	req.t = EAL_DEV_REQ_TYPE_ATTACH;
+	strlcpy(req.devargs, devargs, EAL_DEV_MP_DEV_ARGS_MAX_LEN);
+
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+		/**
+		 * If in secondary process, just send IPC request to
+		 * primary process.
+		 */
+		ret = eal_dev_hotplug_request_to_primary(&req);
+		if (ret != 0) {
+			RTE_LOG(ERR, EAL,
+				"Failed to send hotplug request to primary\n");
+			return -ENOMSG;
+		}
+		if (req.result != 0)
+			RTE_LOG(ERR, EAL,
+				"Failed to hotplug add device\n");
+		return req.result;
+	}
+
+	/* attach a shared device from primary start from here: */
+
+	/* primary attach the new device itself. */
+	ret = local_dev_probe(devargs, &dev);
+
+	if (ret != 0) {
+		RTE_LOG(ERR, EAL,
+			"Failed to attach device on primary process\n");
+
+		/**
+		 * it is possible that secondary process failed to attached a
+		 * device that primary process have during initialization,
+		 * so for -EEXIST case, we still need to sync with secondary
+		 * process.
+		 */
+		if (ret != -EEXIST)
+			return ret;
+	}
+
+	/* primary send attach sync request to secondary. */
+	ret = eal_dev_hotplug_request_to_secondary(&req);
+
+	/* if any communication error, we need to rollback. */
+	if (ret != 0) {
+		RTE_LOG(ERR, EAL,
+			"Failed to send hotplug add request to secondary\n");
+		ret = -ENOMSG;
+		goto rollback;
+	}
+
+	/**
+	 * if any secondary failed to attach, we need to consider if rollback
+	 * is necessary.
+	 */
+	if (req.result != 0) {
+		RTE_LOG(ERR, EAL,
+			"Failed to attach device on secondary process\n");
+		ret = req.result;
+
+		/* for -EEXIST, we don't need to rollback. */
+		if (ret == -EEXIST)
+			return ret;
+		goto rollback;
+	}
+
+	return 0;
+
+rollback:
+	req.t = EAL_DEV_REQ_TYPE_ATTACH_ROLLBACK;
+
+	/* primary send rollback request to secondary. */
+	if (eal_dev_hotplug_request_to_secondary(&req) != 0)
+		RTE_LOG(WARNING, EAL,
+			"Failed to rollback device attach on secondary."
+			"Devices in secondary may not sync with primary\n");
+
+	/* primary rollback itself. */
+	if (local_dev_remove(dev) != 0)
+		RTE_LOG(WARNING, EAL,
+			"Failed to rollback device attach on primary."
+			"Devices in secondary may not sync with primary\n");
+
+	return ret;
+}
+
+int
+rte_eal_hotplug_remove(const char *busname, const char *devname)
+{
+	struct rte_device *dev;
+	struct rte_bus *bus;
+
 	bus = rte_bus_find_by_name(busname);
 	if (bus == NULL) {
 		RTE_LOG(ERR, EAL, "Cannot find bus (%s)\n", busname);
 		return -ENOENT;
 	}
 
-	if (bus->unplug == NULL) {
-		RTE_LOG(ERR, EAL, "Function unplug not supported by bus (%s)\n",
-			bus->name);
-		return -ENOTSUP;
-	}
-
 	dev = bus->find_device(NULL, cmp_dev_name, devname);
 	if (dev == NULL) {
 		RTE_LOG(ERR, EAL, "Cannot find plugged device (%s)\n", devname);
 		return -EINVAL;
 	}
 
-	if (dev->driver == NULL) {
-		RTE_LOG(ERR, EAL, "Device is already unplugged\n");
-		return -ENOENT;
+	return rte_dev_remove(dev);
+}
+
+/* remove device at local process. */
+int
+local_dev_remove(struct rte_device *dev)
+{
+	int ret;
+
+	if (dev->bus->unplug == NULL) {
+		RTE_LOG(ERR, EAL, "Function unplug not supported by bus (%s)\n",
+			dev->bus->name);
+		return -ENOTSUP;
 	}
 
-	ret = bus->unplug(dev);
-	if (ret)
+	ret = dev->bus->unplug(dev);
+	if (ret) {
 		RTE_LOG(ERR, EAL, "Driver cannot detach the device (%s)\n",
 			dev->name);
-	rte_devargs_remove(busname, devname);
+		return ret;
+	}
+
+	return 0;
+}
+
+int __rte_experimental
+rte_dev_remove(struct rte_device *dev)
+{
+	struct eal_dev_mp_req req;
+	char *devargs;
+	int ret;
+
+	if (!rte_dev_is_probed(dev)) {
+		RTE_LOG(ERR, EAL, "Device is not probed\n");
+		return -ENOENT;
+	}
+
+	ret = build_devargs(dev->bus->name, dev->name, "", &devargs);
+	if (ret != 0)
+		return ret;
+
+	memset(&req, 0, sizeof(req));
+	req.t = EAL_DEV_REQ_TYPE_DETACH;
+	strlcpy(req.devargs, devargs, EAL_DEV_MP_DEV_ARGS_MAX_LEN);
+	free(devargs);
+
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+		/**
+		 * If in secondary process, just send IPC request to
+		 * primary process.
+		 */
+		ret = eal_dev_hotplug_request_to_primary(&req);
+		if (ret != 0) {
+			RTE_LOG(ERR, EAL,
+				"Failed to send hotplug request to primary\n");
+			return -ENOMSG;
+		}
+		if (req.result != 0)
+			RTE_LOG(ERR, EAL,
+				"Failed to hotplug remove device\n");
+		return req.result;
+	}
+
+	/* detach a device from primary start from here: */
+
+	/* primary send detach sync request to secondary */
+	ret = eal_dev_hotplug_request_to_secondary(&req);
+
+	/**
+	 * if communication error, we need to rollback, because it is possible
+	 * part of the secondary processes still detached it successfully.
+	 */
+	if (ret != 0) {
+		RTE_LOG(ERR, EAL,
+			"Failed to send device detach request to secondary\n");
+		ret = -ENOMSG;
+		goto rollback;
+	}
+
+	/**
+	 * if any secondary failed to detach, we need to consider if rollback
+	 * is necessary.
+	 */
+	if (req.result != 0) {
+		RTE_LOG(ERR, EAL,
+			"Failed to detach device on secondary process\n");
+		ret = req.result;
+		/**
+		 * if -ENOENT, we don't need to rollback, since devices is
+		 * already detached on secondary process.
+		 */
+		if (ret != -ENOENT)
+			goto rollback;
+	}
+
+	/* primary detach the device itself. */
+	ret = local_dev_remove(dev);
+
+	/* if primary failed, still need to consider if rollback is necessary */
+	if (ret != 0) {
+		RTE_LOG(ERR, EAL,
+			"Failed to detach device on primary process\n");
+		/* if -ENOENT, we don't need to rollback */
+		if (ret == -ENOENT)
+			return ret;
+		goto rollback;
+	}
+
+	return 0;
+
+rollback:
+	req.t = EAL_DEV_REQ_TYPE_DETACH_ROLLBACK;
+
+	/* primary send rollback request to secondary. */
+	if (eal_dev_hotplug_request_to_secondary(&req) != 0)
+		RTE_LOG(WARNING, EAL,
+			"Failed to rollback device detach on secondary."
+			"Devices in secondary may not sync with primary\n");
+
 	return ret;
 }
 
@@ -342,8 +530,9 @@ rte_dev_event_callback_unregister(const char *device_name,
 	return ret;
 }
 
-void
-dev_callback_process(char *device_name, enum rte_dev_event_type event)
+void __rte_experimental
+rte_dev_event_callback_process(const char *device_name,
+			       enum rte_dev_event_type event)
 {
 	struct dev_event_callback *cb_lst;
 
diff --git a/lib/librte_eal/common/eal_common_devargs.c b/lib/librte_eal/common/eal_common_devargs.c
index dac2402a..b7b9cb69 100644
--- a/lib/librte_eal/common/eal_common_devargs.c
+++ b/lib/librte_eal/common/eal_common_devargs.c
@@ -4,9 +4,6 @@
 
 /* This file manages the list of devices and their arguments, as given
  * by the user at startup
- *
- * Code here should not call rte_log since the EAL environment
- * may not be initialized.
  */
 
 #include <stdio.h>
@@ -28,39 +25,9 @@
 TAILQ_HEAD(rte_devargs_list, rte_devargs);
 
 /** Global list of user devices */
-struct rte_devargs_list devargs_list =
+static struct rte_devargs_list devargs_list =
 	TAILQ_HEAD_INITIALIZER(devargs_list);
 
-int
-rte_eal_parse_devargs_str(const char *devargs_str,
-			char **drvname, char **drvargs)
-{
-	char *sep;
-
-	if ((devargs_str) == NULL || (drvname) == NULL || (drvargs == NULL))
-		return -1;
-
-	*drvname = strdup(devargs_str);
-	if (*drvname == NULL)
-		return -1;
-
-	/* set the first ',' to '\0' to split name and arguments */
-	sep = strchr(*drvname, ',');
-	if (sep != NULL) {
-		sep[0] = '\0';
-		*drvargs = strdup(sep + 1);
-	} else {
-		*drvargs = strdup("");
-	}
-
-	if (*drvargs == NULL) {
-		free(*drvname);
-		*drvname = NULL;
-		return -1;
-	}
-	return 0;
-}
-
 static size_t
 devargs_layer_count(const char *s)
 {
@@ -270,6 +237,7 @@ rte_devargs_parsef(struct rte_devargs *da, const char *format, ...)
 	va_list ap;
 	size_t len;
 	char *dev;
+	int ret;
 
 	if (da == NULL)
 		return -EINVAL;
@@ -288,7 +256,10 @@ rte_devargs_parsef(struct rte_devargs *da, const char *format, ...)
 	vsnprintf(dev, len + 1, format, ap);
 	va_end(ap);
 
-	return rte_devargs_parse(da, dev);
+	ret = rte_devargs_parse(da, dev);
+
+	free(dev);
+	return ret;
 }
 
 int __rte_experimental
@@ -296,7 +267,7 @@ rte_devargs_insert(struct rte_devargs *da)
 {
 	int ret;
 
-	ret = rte_devargs_remove(da->bus->name, da->name);
+	ret = rte_devargs_remove(da);
 	if (ret < 0)
 		return ret;
 	TAILQ_INSERT_TAIL(&devargs_list, da, next);
@@ -342,14 +313,17 @@ fail:
 }
 
 int __rte_experimental
-rte_devargs_remove(const char *busname, const char *devname)
+rte_devargs_remove(struct rte_devargs *devargs)
 {
 	struct rte_devargs *d;
 	void *tmp;
 
+	if (devargs == NULL || devargs->bus == NULL)
+		return -1;
+
 	TAILQ_FOREACH_SAFE(d, &devargs_list, next, tmp) {
-		if (strcmp(d->bus->name, busname) == 0 &&
-		    strcmp(d->name, devname) == 0) {
+		if (strcmp(d->bus->name, devargs->bus->name) == 0 &&
+		    strcmp(d->name, devargs->name) == 0) {
 			TAILQ_REMOVE(&devargs_list, d, next);
 			free(d->args);
 			free(d);
diff --git a/lib/librte_eal/common/eal_common_fbarray.c b/lib/librte_eal/common/eal_common_fbarray.c
index 43caf3ce..ea0735cb 100644
--- a/lib/librte_eal/common/eal_common_fbarray.c
+++ b/lib/librte_eal/common/eal_common_fbarray.c
@@ -2,6 +2,7 @@
  * Copyright(c) 2017-2018 Intel Corporation
  */
 
+#include <fcntl.h>
 #include <inttypes.h>
 #include <limits.h>
 #include <sys/mman.h>
@@ -878,6 +879,10 @@ rte_fbarray_destroy(struct rte_fbarray *arr)
 	if (ret)
 		return ret;
 
+	/* with no shconf, there were never any files to begin with */
+	if (internal_config.no_shconf)
+		return 0;
+
 	/* try deleting the file */
 	eal_get_fbarray_path(path, sizeof(path), arr->name);
 
diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c
index fbfb1b05..12dcedf5 100644
--- a/lib/librte_eal/common/eal_common_memory.c
+++ b/lib/librte_eal/common/eal_common_memory.c
@@ -2,6 +2,7 @@
  * Copyright(c) 2010-2014 Intel Corporation
  */
 
+#include <fcntl.h>
 #include <errno.h>
 #include <stdio.h>
 #include <stdint.h>
@@ -37,6 +38,23 @@
 static void *next_baseaddr;
 static uint64_t system_page_sz;
 
+#ifdef RTE_ARCH_64
+/*
+ * Linux kernel uses a really high address as starting address for serving
+ * mmaps calls. If there exists addressing limitations and IOVA mode is VA,
+ * this starting address is likely too high for those devices. However, it
+ * is possible to use a lower address in the process virtual address space
+ * as with 64 bits there is a lot of available space.
+ *
+ * Current known limitations are 39 or 40 bits. Setting the starting address
+ * at 4GB implies there are 508GB or 1020GB for mapping the available
+ * hugepages. This is likely enough for most systems, although a device with
+ * addressing limitations should call rte_eal_check_dma_mask for ensuring all
+ * memory is within supported range.
+ */
+static uint64_t baseaddr = 0x100000000;
+#endif
+
 void *
 eal_get_virtual_area(void *requested_addr, size_t *size,
 		size_t page_sz, int flags, int mmap_flags)
@@ -60,6 +78,11 @@ eal_get_virtual_area(void *requested_addr, size_t *size,
 			rte_eal_process_type() == RTE_PROC_PRIMARY)
 		next_baseaddr = (void *) internal_config.base_virtaddr;
 
+#ifdef RTE_ARCH_64
+	if (next_baseaddr == NULL && internal_config.base_virtaddr == 0 &&
+			rte_eal_process_type() == RTE_PROC_PRIMARY)
+		next_baseaddr = (void *) baseaddr;
+#endif
 	if (requested_addr == NULL && next_baseaddr != NULL) {
 		requested_addr = next_baseaddr;
 		requested_addr = RTE_PTR_ALIGN(requested_addr, page_sz);
@@ -91,7 +114,17 @@ eal_get_virtual_area(void *requested_addr, size_t *size,
 				mmap_flags, -1, 0);
 		if (mapped_addr == MAP_FAILED && allow_shrink)
 			*size -= page_sz;
-	} while (allow_shrink && mapped_addr == MAP_FAILED && *size > 0);
+
+		if (mapped_addr != MAP_FAILED && addr_is_hint &&
+		    mapped_addr != requested_addr) {
+			/* hint was not used. Try with another offset */
+			munmap(mapped_addr, map_sz);
+			mapped_addr = MAP_FAILED;
+			next_baseaddr = RTE_PTR_ADD(next_baseaddr, page_sz);
+			requested_addr = next_baseaddr;
+		}
+	} while ((allow_shrink || addr_is_hint) &&
+		 mapped_addr == MAP_FAILED && *size > 0);
 
 	/* align resulting address - if map failed, we will ignore the value
 	 * anyway, so no need to add additional checks.
@@ -171,7 +204,7 @@ virt2memseg(const void *addr, const struct rte_memseg_list *msl)
 
 	/* a memseg list was specified, check if it's the right one */
 	start = msl->base_va;
-	end = RTE_PTR_ADD(start, (size_t)msl->page_sz * msl->memseg_arr.len);
+	end = RTE_PTR_ADD(start, msl->len);
 
 	if (addr < start || addr >= end)
 		return NULL;
@@ -194,8 +227,7 @@ virt2memseg_list(const void *addr)
 		msl = &mcfg->memsegs[msl_idx];
 
 		start = msl->base_va;
-		end = RTE_PTR_ADD(start,
-				(size_t)msl->page_sz * msl->memseg_arr.len);
+		end = RTE_PTR_ADD(start, msl->len);
 		if (addr >= start && addr < end)
 			break;
 	}
@@ -273,6 +305,9 @@ physmem_size(const struct rte_memseg_list *msl, void *arg)
 {
 	uint64_t *total_len = arg;
 
+	if (msl->external)
+		return 0;
+
 	*total_len += msl->memseg_arr.count * msl->page_sz;
 
 	return 0;
@@ -294,7 +329,7 @@ dump_memseg(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
 		void *arg)
 {
 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
-	int msl_idx, ms_idx;
+	int msl_idx, ms_idx, fd;
 	FILE *f = arg;
 
 	msl_idx = msl - mcfg->memsegs;
@@ -305,10 +340,11 @@ dump_memseg(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
 	if (ms_idx < 0)
 		return -1;
 
+	fd = eal_memalloc_get_seg_fd(msl_idx, ms_idx);
 	fprintf(f, "Segment %i-%i: IOVA:0x%"PRIx64", len:%zu, "
 			"virt:%p, socket_id:%"PRId32", "
 			"hugepage_sz:%"PRIu64", nchannel:%"PRIx32", "
-			"nrank:%"PRIx32"\n",
+			"nrank:%"PRIx32" fd:%i\n",
 			msl_idx, ms_idx,
 			ms->iova,
 			ms->len,
@@ -316,7 +352,8 @@ dump_memseg(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
 			ms->socket_id,
 			ms->hugepage_sz,
 			ms->nchannel,
-			ms->nrank);
+			ms->nrank,
+			fd);
 
 	return 0;
 }
@@ -383,6 +420,66 @@ rte_dump_physmem_layout(FILE *f)
 	rte_memseg_walk(dump_memseg, f);
 }
 
+static int
+check_iova(const struct rte_memseg_list *msl __rte_unused,
+		const struct rte_memseg *ms, void *arg)
+{
+	uint64_t *mask = arg;
+	rte_iova_t iova;
+
+	/* higher address within segment */
+	iova = (ms->iova + ms->len) - 1;
+	if (!(iova & *mask))
+		return 0;
+
+	RTE_LOG(DEBUG, EAL, "memseg iova %"PRIx64", len %zx, out of range\n",
+			    ms->iova, ms->len);
+
+	RTE_LOG(DEBUG, EAL, "\tusing dma mask %"PRIx64"\n", *mask);
+	return 1;
+}
+
+#if defined(RTE_ARCH_64)
+#define MAX_DMA_MASK_BITS 63
+#else
+#define MAX_DMA_MASK_BITS 31
+#endif
+
+/* check memseg iovas are within the required range based on dma mask */
+int __rte_experimental
+rte_eal_check_dma_mask(uint8_t maskbits)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	uint64_t mask;
+
+	/* sanity check */
+	if (maskbits > MAX_DMA_MASK_BITS) {
+		RTE_LOG(ERR, EAL, "wrong dma mask size %u (Max: %u)\n",
+				   maskbits, MAX_DMA_MASK_BITS);
+		return -1;
+	}
+
+	/* create dma mask */
+	mask = ~((1ULL << maskbits) - 1);
+
+	if (rte_memseg_walk(check_iova, &mask))
+		/*
+		 * Dma mask precludes hugepage usage.
+		 * This device can not be used and we do not need to keep
+		 * the dma mask.
+		 */
+		return 1;
+
+	/*
+	 * we need to keep the more restricted maskbit for checking
+	 * potential dynamic memory allocation in the future.
+	 */
+	mcfg->dma_maskbits = mcfg->dma_maskbits == 0 ? maskbits :
+			     RTE_MIN(mcfg->dma_maskbits, maskbits);
+
+	return 0;
+}
+
 /* return the number of memory channels */
 unsigned rte_memory_get_nchannel(void)
 {
@@ -548,6 +645,105 @@ rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg)
 	return ret;
 }
 
+int __rte_experimental
+rte_memseg_get_fd_thread_unsafe(const struct rte_memseg *ms)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	struct rte_memseg_list *msl;
+	struct rte_fbarray *arr;
+	int msl_idx, seg_idx, ret;
+
+	if (ms == NULL) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	msl = rte_mem_virt2memseg_list(ms->addr);
+	if (msl == NULL) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+	arr = &msl->memseg_arr;
+
+	msl_idx = msl - mcfg->memsegs;
+	seg_idx = rte_fbarray_find_idx(arr, ms);
+
+	if (!rte_fbarray_is_used(arr, seg_idx)) {
+		rte_errno = ENOENT;
+		return -1;
+	}
+
+	ret = eal_memalloc_get_seg_fd(msl_idx, seg_idx);
+	if (ret < 0) {
+		rte_errno = -ret;
+		ret = -1;
+	}
+	return ret;
+}
+
+int __rte_experimental
+rte_memseg_get_fd(const struct rte_memseg *ms)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	int ret;
+
+	rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+	ret = rte_memseg_get_fd_thread_unsafe(ms);
+	rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+
+	return ret;
+}
+
+int __rte_experimental
+rte_memseg_get_fd_offset_thread_unsafe(const struct rte_memseg *ms,
+		size_t *offset)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	struct rte_memseg_list *msl;
+	struct rte_fbarray *arr;
+	int msl_idx, seg_idx, ret;
+
+	if (ms == NULL || offset == NULL) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	msl = rte_mem_virt2memseg_list(ms->addr);
+	if (msl == NULL) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+	arr = &msl->memseg_arr;
+
+	msl_idx = msl - mcfg->memsegs;
+	seg_idx = rte_fbarray_find_idx(arr, ms);
+
+	if (!rte_fbarray_is_used(arr, seg_idx)) {
+		rte_errno = ENOENT;
+		return -1;
+	}
+
+	ret = eal_memalloc_get_seg_fd_offset(msl_idx, seg_idx, offset);
+	if (ret < 0) {
+		rte_errno = -ret;
+		ret = -1;
+	}
+	return ret;
+}
+
+int __rte_experimental
+rte_memseg_get_fd_offset(const struct rte_memseg *ms, size_t *offset)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	int ret;
+
+	rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+	ret = rte_memseg_get_fd_offset_thread_unsafe(ms, offset);
+	rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+
+	return ret;
+}
+
 /* init memory subsystem */
 int
 rte_eal_memory_init(void)
diff --git a/lib/librte_eal/common/eal_common_memzone.c b/lib/librte_eal/common/eal_common_memzone.c
index 7300fe05..b7081afb 100644
--- a/lib/librte_eal/common/eal_common_memzone.c
+++ b/lib/librte_eal/common/eal_common_memzone.c
@@ -120,13 +120,15 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
 		return NULL;
 	}
 
-	if ((socket_id != SOCKET_ID_ANY) &&
-	    (socket_id >= RTE_MAX_NUMA_NODES || socket_id < 0)) {
+	if ((socket_id != SOCKET_ID_ANY) && socket_id < 0) {
 		rte_errno = EINVAL;
 		return NULL;
 	}
 
-	if (!rte_eal_has_hugepages())
+	/* only set socket to SOCKET_ID_ANY if we aren't allocating for an
+	 * external heap.
+	 */
+	if (!rte_eal_has_hugepages() && socket_id < RTE_MAX_NUMA_NODES)
 		socket_id = SOCKET_ID_ANY;
 
 	contig = (flags & RTE_MEMZONE_IOVA_CONTIG) != 0;
diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
index dd5f9740..b82f3ddd 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -58,6 +58,7 @@ eal_long_options[] = {
 	{OPT_HELP,              0, NULL, OPT_HELP_NUM             },
 	{OPT_HUGE_DIR,          1, NULL, OPT_HUGE_DIR_NUM         },
 	{OPT_HUGE_UNLINK,       0, NULL, OPT_HUGE_UNLINK_NUM      },
+	{OPT_IOVA_MODE,	        1, NULL, OPT_IOVA_MODE_NUM        },
 	{OPT_LCORES,            1, NULL, OPT_LCORES_NUM           },
 	{OPT_LOG_LEVEL,         1, NULL, OPT_LOG_LEVEL_NUM        },
 	{OPT_MASTER_LCORE,      1, NULL, OPT_MASTER_LCORE_NUM     },
@@ -205,6 +206,7 @@ eal_reset_internal_config(struct internal_config *internal_cfg)
 #endif
 	internal_cfg->vmware_tsc_map = 0;
 	internal_cfg->create_uio_dev = 0;
+	internal_cfg->iova_mode = RTE_IOVA_DC;
 	internal_cfg->user_mbuf_pool_ops_name = NULL;
 	internal_cfg->init_complete = 0;
 }
@@ -1075,6 +1077,25 @@ eal_parse_proc_type(const char *arg)
 	return RTE_PROC_INVALID;
 }
 
+static int
+eal_parse_iova_mode(const char *name)
+{
+	int mode;
+
+	if (name == NULL)
+		return -1;
+
+	if (!strcmp("pa", name))
+		mode = RTE_IOVA_PA;
+	else if (!strcmp("va", name))
+		mode = RTE_IOVA_VA;
+	else
+		return -1;
+
+	internal_config.iova_mode = mode;
+	return 0;
+}
+
 int
 eal_parse_common_option(int opt, const char *optarg,
 			struct internal_config *conf)
@@ -1281,6 +1302,13 @@ eal_parse_common_option(int opt, const char *optarg,
 	case OPT_SINGLE_FILE_SEGMENTS_NUM:
 		conf->single_file_segments = 1;
 		break;
+	case OPT_IOVA_MODE_NUM:
+		if (eal_parse_iova_mode(optarg) < 0) {
+			RTE_LOG(ERR, EAL, "invalid parameters for --"
+				OPT_IOVA_MODE "\n");
+			return -1;
+		}
+		break;
 
 	/* don't know what to do, leave this to caller */
 	default:
@@ -1384,10 +1412,16 @@ eal_check_common_options(struct internal_config *internal_cfg)
 			" is only supported in non-legacy memory mode\n");
 	}
 	if (internal_cfg->single_file_segments &&
-			internal_cfg->hugepage_unlink) {
+			internal_cfg->hugepage_unlink &&
+			!internal_cfg->in_memory) {
 		RTE_LOG(ERR, EAL, "Option --"OPT_SINGLE_FILE_SEGMENTS" is "
-			"not compatible with neither --"OPT_IN_MEMORY" nor "
-			"--"OPT_HUGE_UNLINK"\n");
+			"not compatible with --"OPT_HUGE_UNLINK"\n");
+		return -1;
+	}
+	if (internal_cfg->legacy_mem &&
+			internal_cfg->in_memory) {
+		RTE_LOG(ERR, EAL, "Option --"OPT_LEGACY_MEM" is not compatible "
+				"with --"OPT_IN_MEMORY"\n");
 		return -1;
 	}
 
@@ -1428,6 +1462,8 @@ eal_common_usage(void)
 	       "  --"OPT_VDEV"              Add a virtual device.\n"
 	       "                      The argument format is <driver><id>[,key=val,...]\n"
 	       "                      (ex: --vdev=net_pcap0,iface=eth2).\n"
+	       "  --"OPT_IOVA_MODE"   Set IOVA mode. 'pa' for IOVA_PA\n"
+	       "                      'va' for IOVA_VA\n"
 	       "  -d LIB.so|DIR       Add a driver or driver directory\n"
 	       "                      (can be used multiple times)\n"
 	       "  --"OPT_VMWARE_TSC_MAP"    Use VMware TSC map instead of native RDTSC\n"
diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c
index 9fcb9121..97663d3b 100644
--- a/lib/librte_eal/common/eal_common_proc.c
+++ b/lib/librte_eal/common/eal_common_proc.c
@@ -939,13 +939,17 @@ rte_mp_request_sync(struct rte_mp_msg *req, struct rte_mp_reply *reply,
 	if (check_input(req) == false)
 		return -1;
 
+	reply->nb_sent = 0;
+	reply->nb_received = 0;
+	reply->msgs = NULL;
+
 	if (internal_config.no_shconf) {
 		RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC is disabled\n");
 		return 0;
 	}
 
 	if (gettimeofday(&now, NULL) < 0) {
-		RTE_LOG(ERR, EAL, "Faile to get current time\n");
+		RTE_LOG(ERR, EAL, "Failed to get current time\n");
 		rte_errno = errno;
 		return -1;
 	}
@@ -954,10 +958,6 @@ rte_mp_request_sync(struct rte_mp_msg *req, struct rte_mp_reply *reply,
 	end.tv_sec = now.tv_sec + ts->tv_sec +
 			(now.tv_usec * 1000 + ts->tv_nsec) / 1000000000;
 
-	reply->nb_sent = 0;
-	reply->nb_received = 0;
-	reply->msgs = NULL;
-
 	/* for secondary process, send request to the primary process only */
 	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
 		pthread_mutex_lock(&pending_requests.lock);
diff --git a/lib/librte_eal/common/eal_common_string_fns.c b/lib/librte_eal/common/eal_common_string_fns.c
index 6ac5f828..60c5dd66 100644
--- a/lib/librte_eal/common/eal_common_string_fns.c
+++ b/lib/librte_eal/common/eal_common_string_fns.c
@@ -38,3 +38,29 @@ einval_error:
 	errno = EINVAL;
 	return -1;
 }
+
+/* Copy src string into dst.
+ *
+ * Return negative value and NUL-terminate if dst is too short,
+ * Otherwise return number of bytes copied.
+ */
+ssize_t
+rte_strscpy(char *dst, const char *src, size_t dsize)
+{
+	size_t nleft = dsize;
+	size_t res = 0;
+
+	/* Copy as many bytes as will fit. */
+	while (nleft != 0) {
+		dst[res] = src[res];
+		if (src[res] == '\0')
+			return res;
+		res++;
+		nleft--;
+	}
+
+	/* Not enough room in dst, set NUL and return error. */
+	if (res != 0)
+		dst[res - 1] = '\0';
+	return -E2BIG;
+}
diff --git a/lib/librte_eal/common/eal_common_timer.c b/lib/librte_eal/common/eal_common_timer.c
index 2e2b770f..dcf26bfe 100644
--- a/lib/librte_eal/common/eal_common_timer.c
+++ b/lib/librte_eal/common/eal_common_timer.c
@@ -7,9 +7,11 @@
 #include <unistd.h>
 #include <inttypes.h>
 #include <sys/types.h>
+#include <time.h>
 #include <errno.h>
 
 #include <rte_common.h>
+#include <rte_compat.h>
 #include <rte_log.h>
 #include <rte_cycles.h>
 #include <rte_pause.h>
@@ -31,6 +33,28 @@ rte_delay_us_block(unsigned int us)
 		rte_pause();
 }
 
+void __rte_experimental
+rte_delay_us_sleep(unsigned int us)
+{
+	struct timespec wait[2];
+	int ind = 0;
+
+	wait[0].tv_sec = 0;
+	if (us >= US_PER_S) {
+		wait[0].tv_sec = us / US_PER_S;
+		us -= wait[0].tv_sec * US_PER_S;
+	}
+	wait[0].tv_nsec = 1000 * us;
+
+	while (nanosleep(&wait[ind], &wait[1 - ind]) && errno == EINTR) {
+		/*
+		 * Sleep was interrupted. Flip the index, so the 'remainder'
+		 * will become the 'request' for a next call.
+		 */
+		ind = 1 - ind;
+	}
+}
+
 uint64_t
 rte_get_tsc_hz(void)
 {
diff --git a/lib/librte_eal/common/eal_filesystem.h b/lib/librte_eal/common/eal_filesystem.h
index de05febf..b3e8ae5e 100644
--- a/lib/librte_eal/common/eal_filesystem.h
+++ b/lib/librte_eal/common/eal_filesystem.h
@@ -27,7 +27,7 @@ eal_create_runtime_dir(void);
 
 /* returns runtime dir */
 const char *
-eal_get_runtime_dir(void);
+rte_eal_get_runtime_dir(void);
 
 #define RUNTIME_CONFIG_FNAME "config"
 static inline const char *
@@ -35,7 +35,7 @@ eal_runtime_config_path(void)
 {
 	static char buffer[PATH_MAX]; /* static so auto-zeroed */
 
-	snprintf(buffer, sizeof(buffer) - 1, "%s/%s", eal_get_runtime_dir(),
+	snprintf(buffer, sizeof(buffer) - 1, "%s/%s", rte_eal_get_runtime_dir(),
 			RUNTIME_CONFIG_FNAME);
 	return buffer;
 }
@@ -47,7 +47,7 @@ eal_mp_socket_path(void)
 {
 	static char buffer[PATH_MAX]; /* static so auto-zeroed */
 
-	snprintf(buffer, sizeof(buffer) - 1, "%s/%s", eal_get_runtime_dir(),
+	snprintf(buffer, sizeof(buffer) - 1, "%s/%s", rte_eal_get_runtime_dir(),
 			MP_SOCKET_FNAME);
 	return buffer;
 }
@@ -55,7 +55,8 @@ eal_mp_socket_path(void)
 #define FBARRAY_NAME_FMT "%s/fbarray_%s"
 static inline const char *
 eal_get_fbarray_path(char *buffer, size_t buflen, const char *name) {
-	snprintf(buffer, buflen, FBARRAY_NAME_FMT, eal_get_runtime_dir(), name);
+	snprintf(buffer, buflen, FBARRAY_NAME_FMT, rte_eal_get_runtime_dir(),
+			name);
 	return buffer;
 }
 
@@ -66,7 +67,7 @@ eal_hugepage_info_path(void)
 {
 	static char buffer[PATH_MAX]; /* static so auto-zeroed */
 
-	snprintf(buffer, sizeof(buffer) - 1, "%s/%s", eal_get_runtime_dir(),
+	snprintf(buffer, sizeof(buffer) - 1, "%s/%s", rte_eal_get_runtime_dir(),
 			HUGEPAGE_INFO_FNAME);
 	return buffer;
 }
@@ -78,7 +79,7 @@ eal_hugepage_data_path(void)
 {
 	static char buffer[PATH_MAX]; /* static so auto-zeroed */
 
-	snprintf(buffer, sizeof(buffer) - 1, "%s/%s", eal_get_runtime_dir(),
+	snprintf(buffer, sizeof(buffer) - 1, "%s/%s", rte_eal_get_runtime_dir(),
 			HUGEPAGE_DATA_FNAME);
 	return buffer;
 }
@@ -99,7 +100,7 @@ eal_get_hugefile_path(char *buffer, size_t buflen, const char *hugedir, int f_id
 static inline const char *
 eal_get_hugefile_lock_path(char *buffer, size_t buflen, int f_id)
 {
-	snprintf(buffer, buflen, HUGEFILE_LOCK_FMT, eal_get_runtime_dir(),
+	snprintf(buffer, buflen, HUGEFILE_LOCK_FMT, rte_eal_get_runtime_dir(),
 			f_id);
 	buffer[buflen - 1] = '\0';
 	return buffer;
diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
index 00ee6e06..737f17e3 100644
--- a/lib/librte_eal/common/eal_internal_cfg.h
+++ b/lib/librte_eal/common/eal_internal_cfg.h
@@ -70,6 +70,7 @@ struct internal_config {
 			/**< user defined mbuf pool ops name */
 	unsigned num_hugepage_sizes;      /**< how many sizes on this system */
 	struct hugepage_info hugepage_info[MAX_HUGEPAGE_SIZES];
+	enum rte_iova_mode iova_mode ;    /**< Set IOVA mode on this system  */
 	volatile unsigned int init_complete;
 	/**< indicates whether EAL has completed initialization */
 };
diff --git a/lib/librte_eal/common/eal_memalloc.h b/lib/librte_eal/common/eal_memalloc.h
index 36bb1a02..af917c2f 100644
--- a/lib/librte_eal/common/eal_memalloc.h
+++ b/lib/librte_eal/common/eal_memalloc.h
@@ -76,6 +76,17 @@ eal_memalloc_mem_alloc_validator_unregister(const char *name, int socket_id);
 int
 eal_memalloc_mem_alloc_validate(int socket_id, size_t new_len);
 
+/* returns fd or -errno */
+int
+eal_memalloc_get_seg_fd(int list_idx, int seg_idx);
+
+/* returns 0 or -errno */
+int
+eal_memalloc_set_seg_fd(int list_idx, int seg_idx, int fd);
+
+int
+eal_memalloc_get_seg_fd_offset(int list_idx, int seg_idx, size_t *offset);
+
 int
 eal_memalloc_init(void);
 
diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
index 96e16678..5271f944 100644
--- a/lib/librte_eal/common/eal_options.h
+++ b/lib/librte_eal/common/eal_options.h
@@ -63,6 +63,8 @@ enum {
 	OPT_LEGACY_MEM_NUM,
 #define OPT_SINGLE_FILE_SEGMENTS    "single-file-segments"
 	OPT_SINGLE_FILE_SEGMENTS_NUM,
+#define OPT_IOVA_MODE          "iova-mode"
+	OPT_IOVA_MODE_NUM,
 	OPT_LONG_MAX_NUM
 };
 
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index 4f809a83..442c6dc4 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -259,18 +259,6 @@ struct rte_bus *rte_bus_find_by_device_name(const char *str);
 int rte_mp_channel_init(void);
 
 /**
- * Internal Executes all the user application registered callbacks for
- * the specific device. It is for DPDK internal user only. User
- * application should not call it directly.
- *
- * @param device_name
- *  The device name.
- * @param event
- *  the device event type.
- */
-void dev_callback_process(char *device_name, enum rte_dev_event_type event);
-
-/**
  * @internal
  * Parse a device string and store its information in an
  * rte_devargs structure.
@@ -304,4 +292,82 @@ int
 rte_devargs_layers_parse(struct rte_devargs *devargs,
 			 const char *devstr);
 
+/*
+ * probe a device at local process.
+ *
+ * @param devargs
+ *   Device arguments including bus, class and driver properties.
+ * @param new_dev
+ *   new device be probed as output.
+ * @return
+ *   0 on success, negative on error.
+ */
+int local_dev_probe(const char *devargs, struct rte_device **new_dev);
+
+/**
+ * Hotplug remove a given device from a specific bus at local process.
+ *
+ * @param dev
+ *   Data structure of the device to remove.
+ * @return
+ *   0 on success, negative on error.
+ */
+int local_dev_remove(struct rte_device *dev);
+
+/**
+ * Iterate over all buses to find the corresponding bus to handle the sigbus
+ * error.
+ * @param failure_addr
+ *	Pointer of the fault address of the sigbus error.
+ *
+ * @return
+ *	 0 success to handle the sigbus.
+ *	-1 failed to handle the sigbus
+ *	 1 no bus can handler the sigbus
+ */
+int rte_bus_sigbus_handler(const void *failure_addr);
+
+/**
+ * @internal
+ * Register the sigbus handler.
+ *
+ * @return
+ *   - On success, zero.
+ *   - On failure, a negative value.
+ */
+int
+dev_sigbus_handler_register(void);
+
+/**
+ * @internal
+ * Unregister the sigbus handler.
+ *
+ * @return
+ *   - On success, zero.
+ *   - On failure, a negative value.
+ */
+int
+dev_sigbus_handler_unregister(void);
+
+/**
+ * Check if the option is registered.
+ *
+ * @param option
+ *  The option to be parsed.
+ *
+ * @return
+ *  0 on success
+ * @return
+ *  -1 on fail
+ */
+int
+rte_option_parse(const char *opt);
+
+/**
+ * Iterate through the registered options and execute the associated
+ * callback if enabled.
+ */
+void
+rte_option_init(void);
+
 #endif /* _EAL_PRIVATE_H_ */
diff --git a/lib/librte_eal/common/hotplug_mp.c b/lib/librte_eal/common/hotplug_mp.c
new file mode 100644
index 00000000..84f59d95
--- /dev/null
+++ b/lib/librte_eal/common/hotplug_mp.c
@@ -0,0 +1,426 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+#include <string.h>
+
+#include <rte_eal.h>
+#include <rte_alarm.h>
+#include <rte_string_fns.h>
+#include <rte_devargs.h>
+
+#include "hotplug_mp.h"
+#include "eal_private.h"
+
+#define MP_TIMEOUT_S 5 /**< 5 seconds timeouts */
+
+struct mp_reply_bundle {
+	struct rte_mp_msg msg;
+	void *peer;
+};
+
+static int cmp_dev_name(const struct rte_device *dev, const void *_name)
+{
+	const char *name = _name;
+
+	return strcmp(dev->name, name);
+}
+
+/**
+ * Secondary to primary request.
+ * start from function eal_dev_hotplug_request_to_primary.
+ *
+ * device attach on secondary:
+ * a) secondary send sync request to the primary.
+ * b) primary receive the request and attach the new device if
+ *    failed goto i).
+ * c) primary forward attach sync request to all secondary.
+ * d) secondary receive the request and attach the device and send a reply.
+ * e) primary check the reply if all success goes to j).
+ * f) primary send attach rollback sync request to all secondary.
+ * g) secondary receive the request and detach the device and send a reply.
+ * h) primary receive the reply and detach device as rollback action.
+ * i) send attach fail to secondary as a reply of step a), goto k).
+ * j) send attach success to secondary as a reply of step a).
+ * k) secondary receive reply and return.
+ *
+ * device detach on secondary:
+ * a) secondary send sync request to the primary.
+ * b) primary send detach sync request to all secondary.
+ * c) secondary detach the device and send a reply.
+ * d) primary check the reply if all success goes to g).
+ * e) primary send detach rollback sync request to all secondary.
+ * f) secondary receive the request and attach back device. goto h).
+ * g) primary detach the device if success goto i), else goto e).
+ * h) primary send detach fail to secondary as a reply of step a), goto j).
+ * i) primary send detach success to secondary as a reply of step a).
+ * j) secondary receive reply and return.
+ */
+
+static int
+send_response_to_secondary(const struct eal_dev_mp_req *req,
+			int result,
+			const void *peer)
+{
+	struct rte_mp_msg mp_resp;
+	struct eal_dev_mp_req *resp =
+		(struct eal_dev_mp_req *)mp_resp.param;
+	int ret;
+
+	memset(&mp_resp, 0, sizeof(mp_resp));
+	mp_resp.len_param = sizeof(*resp);
+	strlcpy(mp_resp.name, EAL_DEV_MP_ACTION_REQUEST, sizeof(mp_resp.name));
+	memcpy(resp, req, sizeof(*req));
+	resp->result = result;
+
+	ret = rte_mp_reply(&mp_resp, peer);
+	if (ret != 0)
+		RTE_LOG(ERR, EAL, "failed to send response to secondary\n");
+
+	return ret;
+}
+
+static void
+__handle_secondary_request(void *param)
+{
+	struct mp_reply_bundle *bundle = param;
+		const struct rte_mp_msg *msg = &bundle->msg;
+	const struct eal_dev_mp_req *req =
+		(const struct eal_dev_mp_req *)msg->param;
+	struct eal_dev_mp_req tmp_req;
+	struct rte_devargs *da;
+	struct rte_device *dev;
+	struct rte_bus *bus;
+	int ret = 0;
+
+	tmp_req = *req;
+
+	if (req->t == EAL_DEV_REQ_TYPE_ATTACH) {
+		ret = local_dev_probe(req->devargs, &dev);
+		if (ret != 0) {
+			RTE_LOG(ERR, EAL, "Failed to hotplug add device on primary\n");
+			if (ret != -EEXIST)
+				goto finish;
+		}
+		ret = eal_dev_hotplug_request_to_secondary(&tmp_req);
+		if (ret != 0) {
+			RTE_LOG(ERR, EAL, "Failed to send hotplug request to secondary\n");
+			ret = -ENOMSG;
+			goto rollback;
+		}
+		if (tmp_req.result != 0) {
+			ret = tmp_req.result;
+			RTE_LOG(ERR, EAL, "Failed to hotplug add device on secondary\n");
+			if (ret != -EEXIST)
+				goto rollback;
+		}
+	} else if (req->t == EAL_DEV_REQ_TYPE_DETACH) {
+		da = calloc(1, sizeof(*da));
+		if (da == NULL) {
+			ret = -ENOMEM;
+			goto finish;
+		}
+
+		ret = rte_devargs_parse(da, req->devargs);
+		if (ret != 0)
+			goto finish;
+
+		ret = eal_dev_hotplug_request_to_secondary(&tmp_req);
+		if (ret != 0) {
+			RTE_LOG(ERR, EAL, "Failed to send hotplug request to secondary\n");
+			ret = -ENOMSG;
+			goto rollback;
+		}
+
+		bus = rte_bus_find_by_name(da->bus->name);
+		if (bus == NULL) {
+			RTE_LOG(ERR, EAL, "Cannot find bus (%s)\n", da->bus->name);
+			ret = -ENOENT;
+			goto finish;
+		}
+
+		dev = bus->find_device(NULL, cmp_dev_name, da->name);
+		if (dev == NULL) {
+			RTE_LOG(ERR, EAL, "Cannot find plugged device (%s)\n", da->name);
+			ret = -ENOENT;
+			goto finish;
+		}
+
+		if (tmp_req.result != 0) {
+			RTE_LOG(ERR, EAL, "Failed to hotplug remove device on secondary\n");
+			ret = tmp_req.result;
+			if (ret != -ENOENT)
+				goto rollback;
+		}
+
+		ret = local_dev_remove(dev);
+		if (ret != 0) {
+			RTE_LOG(ERR, EAL, "Failed to hotplug remove device on primary\n");
+			if (ret != -ENOENT)
+				goto rollback;
+		}
+	} else {
+		RTE_LOG(ERR, EAL, "unsupported secondary to primary request\n");
+		ret = -ENOTSUP;
+	}
+	goto finish;
+
+rollback:
+	if (req->t == EAL_DEV_REQ_TYPE_ATTACH) {
+		tmp_req.t = EAL_DEV_REQ_TYPE_ATTACH_ROLLBACK;
+		eal_dev_hotplug_request_to_secondary(&tmp_req);
+		local_dev_remove(dev);
+	} else {
+		tmp_req.t = EAL_DEV_REQ_TYPE_DETACH_ROLLBACK;
+		eal_dev_hotplug_request_to_secondary(&tmp_req);
+	}
+
+finish:
+	ret = send_response_to_secondary(&tmp_req, ret, bundle->peer);
+	if (ret)
+		RTE_LOG(ERR, EAL, "failed to send response to secondary\n");
+
+	free(bundle->peer);
+	free(bundle);
+}
+
+static int
+handle_secondary_request(const struct rte_mp_msg *msg, const void *peer)
+{
+	struct mp_reply_bundle *bundle;
+	const struct eal_dev_mp_req *req =
+		(const struct eal_dev_mp_req *)msg->param;
+	int ret = 0;
+
+	bundle = malloc(sizeof(*bundle));
+	if (bundle == NULL) {
+		RTE_LOG(ERR, EAL, "not enough memory\n");
+		return send_response_to_secondary(req, -ENOMEM, peer);
+	}
+
+	bundle->msg = *msg;
+	/**
+	 * We need to send reply on interrupt thread, but peer can't be
+	 * parsed directly, so this is a temporal hack, need to be fixed
+	 * when it is ready.
+	 */
+	bundle->peer = strdup(peer);
+
+	/**
+	 * We are at IPC callback thread, sync IPC is not allowed due to
+	 * dead lock, so we delegate the task to interrupt thread.
+	 */
+	ret = rte_eal_alarm_set(1, __handle_secondary_request, bundle);
+	if (ret != 0) {
+		RTE_LOG(ERR, EAL, "failed to add mp task\n");
+		return send_response_to_secondary(req, ret, peer);
+	}
+	return 0;
+}
+
+static void __handle_primary_request(void *param)
+{
+	struct mp_reply_bundle *bundle = param;
+	struct rte_mp_msg *msg = &bundle->msg;
+	const struct eal_dev_mp_req *req =
+		(const struct eal_dev_mp_req *)msg->param;
+	struct rte_mp_msg mp_resp;
+	struct eal_dev_mp_req *resp =
+		(struct eal_dev_mp_req *)mp_resp.param;
+	struct rte_devargs *da;
+	struct rte_device *dev;
+	struct rte_bus *bus;
+	int ret = 0;
+
+	memset(&mp_resp, 0, sizeof(mp_resp));
+
+	switch (req->t) {
+	case EAL_DEV_REQ_TYPE_ATTACH:
+	case EAL_DEV_REQ_TYPE_DETACH_ROLLBACK:
+		ret = local_dev_probe(req->devargs, &dev);
+		break;
+	case EAL_DEV_REQ_TYPE_DETACH:
+	case EAL_DEV_REQ_TYPE_ATTACH_ROLLBACK:
+		da = calloc(1, sizeof(*da));
+		if (da == NULL) {
+			ret = -ENOMEM;
+			goto quit;
+		}
+
+		ret = rte_devargs_parse(da, req->devargs);
+		if (ret != 0)
+			goto quit;
+
+		bus = rte_bus_find_by_name(da->bus->name);
+		if (bus == NULL) {
+			RTE_LOG(ERR, EAL, "Cannot find bus (%s)\n", da->bus->name);
+			ret = -ENOENT;
+			goto quit;
+		}
+
+		dev = bus->find_device(NULL, cmp_dev_name, da->name);
+		if (dev == NULL) {
+			RTE_LOG(ERR, EAL, "Cannot find plugged device (%s)\n", da->name);
+			ret = -ENOENT;
+			goto quit;
+		}
+
+		ret = local_dev_remove(dev);
+quit:
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	strlcpy(mp_resp.name, EAL_DEV_MP_ACTION_REQUEST, sizeof(mp_resp.name));
+	mp_resp.len_param = sizeof(*req);
+	memcpy(resp, req, sizeof(*resp));
+	resp->result = ret;
+	if (rte_mp_reply(&mp_resp, bundle->peer) < 0)
+		RTE_LOG(ERR, EAL, "failed to send reply to primary request\n");
+
+	free(bundle->peer);
+	free(bundle);
+}
+
+static int
+handle_primary_request(const struct rte_mp_msg *msg, const void *peer)
+{
+	struct rte_mp_msg mp_resp;
+	const struct eal_dev_mp_req *req =
+		(const struct eal_dev_mp_req *)msg->param;
+	struct eal_dev_mp_req *resp =
+		(struct eal_dev_mp_req *)mp_resp.param;
+	struct mp_reply_bundle *bundle;
+	int ret = 0;
+
+	memset(&mp_resp, 0, sizeof(mp_resp));
+	strlcpy(mp_resp.name, EAL_DEV_MP_ACTION_REQUEST, sizeof(mp_resp.name));
+	mp_resp.len_param = sizeof(*req);
+	memcpy(resp, req, sizeof(*resp));
+
+	bundle = calloc(1, sizeof(*bundle));
+	if (bundle == NULL) {
+		resp->result = -ENOMEM;
+		ret = rte_mp_reply(&mp_resp, peer);
+		if (ret)
+			RTE_LOG(ERR, EAL, "failed to send reply to primary request\n");
+		return ret;
+	}
+
+	bundle->msg = *msg;
+	/**
+	 * We need to send reply on interrupt thread, but peer can't be
+	 * parsed directly, so this is a temporal hack, need to be fixed
+	 * when it is ready.
+	 */
+	bundle->peer = (void *)strdup(peer);
+
+	/**
+	 * We are at IPC callback thread, sync IPC is not allowed due to
+	 * dead lock, so we delegate the task to interrupt thread.
+	 */
+	ret = rte_eal_alarm_set(1, __handle_primary_request, bundle);
+	if (ret != 0) {
+		resp->result = ret;
+		ret = rte_mp_reply(&mp_resp, peer);
+		if  (ret != 0) {
+			RTE_LOG(ERR, EAL, "failed to send reply to primary request\n");
+			return ret;
+		}
+	}
+	return 0;
+}
+
+int eal_dev_hotplug_request_to_primary(struct eal_dev_mp_req *req)
+{
+	struct rte_mp_msg mp_req;
+	struct rte_mp_reply mp_reply;
+	struct timespec ts = {.tv_sec = MP_TIMEOUT_S, .tv_nsec = 0};
+	struct eal_dev_mp_req *resp;
+	int ret;
+
+	memset(&mp_req, 0, sizeof(mp_req));
+	memcpy(mp_req.param, req, sizeof(*req));
+	mp_req.len_param = sizeof(*req);
+	strlcpy(mp_req.name, EAL_DEV_MP_ACTION_REQUEST, sizeof(mp_req.name));
+
+	ret = rte_mp_request_sync(&mp_req, &mp_reply, &ts);
+	if (ret || mp_reply.nb_received != 1) {
+		RTE_LOG(ERR, EAL, "cannot send request to primary");
+		if (!ret)
+			return -1;
+		return ret;
+	}
+
+	resp = (struct eal_dev_mp_req *)mp_reply.msgs[0].param;
+	req->result = resp->result;
+
+	return ret;
+}
+
+int eal_dev_hotplug_request_to_secondary(struct eal_dev_mp_req *req)
+{
+	struct rte_mp_msg mp_req;
+	struct rte_mp_reply mp_reply;
+	struct timespec ts = {.tv_sec = MP_TIMEOUT_S, .tv_nsec = 0};
+	int ret;
+	int i;
+
+	memset(&mp_req, 0, sizeof(mp_req));
+	memcpy(mp_req.param, req, sizeof(*req));
+	mp_req.len_param = sizeof(*req);
+	strlcpy(mp_req.name, EAL_DEV_MP_ACTION_REQUEST, sizeof(mp_req.name));
+
+	ret = rte_mp_request_sync(&mp_req, &mp_reply, &ts);
+	if (ret != 0) {
+		RTE_LOG(ERR, EAL, "rte_mp_request_sync failed\n");
+		return ret;
+	}
+
+	if (mp_reply.nb_sent != mp_reply.nb_received) {
+		RTE_LOG(ERR, EAL, "not all secondary reply\n");
+		return -1;
+	}
+
+	req->result = 0;
+	for (i = 0; i < mp_reply.nb_received; i++) {
+		struct eal_dev_mp_req *resp =
+			(struct eal_dev_mp_req *)mp_reply.msgs[i].param;
+		if (resp->result != 0) {
+			req->result = resp->result;
+			if (req->t == EAL_DEV_REQ_TYPE_ATTACH &&
+				req->result != -EEXIST)
+				break;
+			if (req->t == EAL_DEV_REQ_TYPE_DETACH &&
+				req->result != -ENOENT)
+				break;
+		}
+	}
+
+	return 0;
+}
+
+int rte_mp_dev_hotplug_init(void)
+{
+	int ret;
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		ret = rte_mp_action_register(EAL_DEV_MP_ACTION_REQUEST,
+					handle_secondary_request);
+		if (ret != 0) {
+			RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n",
+				EAL_DEV_MP_ACTION_REQUEST);
+			return ret;
+		}
+	} else {
+		ret = rte_mp_action_register(EAL_DEV_MP_ACTION_REQUEST,
+					handle_primary_request);
+		if (ret != 0) {
+			RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n",
+				EAL_DEV_MP_ACTION_REQUEST);
+			return ret;
+		}
+	}
+
+	return 0;
+}
diff --git a/lib/librte_eal/common/hotplug_mp.h b/lib/librte_eal/common/hotplug_mp.h
new file mode 100644
index 00000000..597fde3d
--- /dev/null
+++ b/lib/librte_eal/common/hotplug_mp.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _HOTPLUG_MP_H_
+#define _HOTPLUG_MP_H_
+
+#include "rte_dev.h"
+#include "rte_bus.h"
+
+#define EAL_DEV_MP_ACTION_REQUEST      "eal_dev_mp_request"
+#define EAL_DEV_MP_ACTION_RESPONSE     "eal_dev_mp_response"
+
+#define EAL_DEV_MP_DEV_NAME_MAX_LEN RTE_DEV_NAME_MAX_LEN
+#define EAL_DEV_MP_BUS_NAME_MAX_LEN 32
+#define EAL_DEV_MP_DEV_ARGS_MAX_LEN 128
+
+enum eal_dev_req_type {
+	EAL_DEV_REQ_TYPE_ATTACH,
+	EAL_DEV_REQ_TYPE_DETACH,
+	EAL_DEV_REQ_TYPE_ATTACH_ROLLBACK,
+	EAL_DEV_REQ_TYPE_DETACH_ROLLBACK,
+};
+
+struct eal_dev_mp_req {
+	enum eal_dev_req_type t;
+	char devargs[EAL_DEV_MP_DEV_ARGS_MAX_LEN];
+	int result;
+};
+
+/**
+ * This is a synchronous wrapper for secondary process send
+ * request to primary process, this is invoked when an attach
+ * or detach request is issued from primary process.
+ */
+int eal_dev_hotplug_request_to_primary(struct eal_dev_mp_req *req);
+
+/**
+ * this is a synchronous wrapper for primary process send
+ * request to secondary process, this is invoked when an attach
+ * or detach request issued from secondary process.
+ */
+int eal_dev_hotplug_request_to_secondary(struct eal_dev_mp_req *req);
+
+
+#endif /* _HOTPLUG_MP_H_ */
diff --git a/lib/librte_eal/common/include/arch/arm/rte_cycles_32.h b/lib/librte_eal/common/include/arch/arm/rte_cycles_32.h
index c4f974fe..859b0974 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_cycles_32.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_cycles_32.h
@@ -29,8 +29,8 @@ extern "C" {
 #ifndef RTE_ARM_EAL_RDTSC_USE_PMU
 
 /**
- * This call is easily portable to any ARM architecture, however,
- * it may be damn slow and inprecise for some tasks.
+ * This call is easily portable to any architecture, however,
+ * it may require a system call and inprecise for some tasks.
  */
 static inline uint64_t
 __rte_rdtsc_syscall(void)
diff --git a/lib/librte_eal/common/include/arch/ppc_64/meson.build b/lib/librte_eal/common/include/arch/ppc_64/meson.build
new file mode 100644
index 00000000..00f96117
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/ppc_64/meson.build
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Luca Boccassi <bluca@debian.org>
+
+install_headers(
+	'rte_atomic.h',
+	'rte_byteorder.h',
+	'rte_cpuflags.h',
+	'rte_cycles.h',
+	'rte_io.h',
+	'rte_memcpy.h',
+	'rte_pause.h',
+	'rte_prefetch.h',
+	'rte_rwlock.h',
+	'rte_spinlock.h',
+	'rte_vect.h',
+	subdir: get_option('include_subdir_arch'))
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_pause.h b/lib/librte_eal/common/include/arch/ppc_64/rte_pause.h
index 8bd83576..16e47ce2 100644
--- a/lib/librte_eal/common/include/arch/ppc_64/rte_pause.h
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_pause.h
@@ -9,10 +9,17 @@
 extern "C" {
 #endif
 
+#include "rte_atomic.h"
+
 #include "generic/rte_pause.h"
 
 static inline void rte_pause(void)
 {
+	/* Set hardware multi-threading low priority */
+	asm volatile("or 1,1,1");
+	/* Set hardware multi-threading medium priority */
+	asm volatile("or 2,2,2");
+	rte_compiler_barrier();
 }
 
 #ifdef __cplusplus
diff --git a/lib/librte_eal/common/include/generic/rte_cycles.h b/lib/librte_eal/common/include/generic/rte_cycles.h
index 0ff1af50..ac379e87 100644
--- a/lib/librte_eal/common/include/generic/rte_cycles.h
+++ b/lib/librte_eal/common/include/generic/rte_cycles.h
@@ -13,6 +13,7 @@
  */
 
 #include <stdint.h>
+#include <rte_compat.h>
 #include <rte_debug.h>
 #include <rte_atomic.h>
 
@@ -158,6 +159,16 @@ rte_delay_ms(unsigned ms)
 void rte_delay_us_block(unsigned int us);
 
 /**
+ * Delay function that uses system sleep.
+ * Does not block the CPU core.
+ *
+ * @param us
+ *   Number of microseconds to wait.
+ */
+void __rte_experimental
+rte_delay_us_sleep(unsigned int us);
+
+/**
  * Replace rte_delay_us with user defined function.
  *
  * @param userfunc
diff --git a/lib/librte_eal/common/include/rte_bitmap.h b/lib/librte_eal/common/include/rte_bitmap.h
index d9facc64..7a36ce73 100644
--- a/lib/librte_eal/common/include/rte_bitmap.h
+++ b/lib/librte_eal/common/include/rte_bitmap.h
@@ -88,7 +88,7 @@ __rte_bitmap_index1_inc(struct rte_bitmap *bmp)
 static inline uint64_t
 __rte_bitmap_mask1_get(struct rte_bitmap *bmp)
 {
-	return (~1lu) << bmp->offset1;
+	return (~1llu) << bmp->offset1;
 }
 
 static inline void
@@ -317,7 +317,7 @@ rte_bitmap_get(struct rte_bitmap *bmp, uint32_t pos)
 	index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2;
 	offset2 = pos & RTE_BITMAP_SLAB_BIT_MASK;
 	slab2 = bmp->array2 + index2;
-	return (*slab2) & (1lu << offset2);
+	return (*slab2) & (1llu << offset2);
 }
 
 /**
@@ -342,8 +342,8 @@ rte_bitmap_set(struct rte_bitmap *bmp, uint32_t pos)
 	slab2 = bmp->array2 + index2;
 	slab1 = bmp->array1 + index1;
 
-	*slab2 |= 1lu << offset2;
-	*slab1 |= 1lu << offset1;
+	*slab2 |= 1llu << offset2;
+	*slab1 |= 1llu << offset1;
 }
 
 /**
@@ -370,7 +370,7 @@ rte_bitmap_set_slab(struct rte_bitmap *bmp, uint32_t pos, uint64_t slab)
 	slab1 = bmp->array1 + index1;
 
 	*slab2 |= slab;
-	*slab1 |= 1lu << offset1;
+	*slab1 |= 1llu << offset1;
 }
 
 static inline uint64_t
@@ -408,7 +408,7 @@ rte_bitmap_clear(struct rte_bitmap *bmp, uint32_t pos)
 	slab2 = bmp->array2 + index2;
 
 	/* Return if array2 slab is not all-zeros */
-	*slab2 &= ~(1lu << offset2);
+	*slab2 &= ~(1llu << offset2);
 	if (*slab2){
 		return;
 	}
@@ -424,7 +424,7 @@ rte_bitmap_clear(struct rte_bitmap *bmp, uint32_t pos)
 	index1 = pos >> (RTE_BITMAP_SLAB_BIT_SIZE_LOG2 + RTE_BITMAP_CL_BIT_SIZE_LOG2);
 	offset1 = (pos >> RTE_BITMAP_CL_BIT_SIZE_LOG2) & RTE_BITMAP_SLAB_BIT_MASK;
 	slab1 = bmp->array1 + index1;
-	*slab1 &= ~(1lu << offset1);
+	*slab1 &= ~(1llu << offset1);
 
 	return;
 }
diff --git a/lib/librte_eal/common/include/rte_bus.h b/lib/librte_eal/common/include/rte_bus.h
index b7b5b084..6be4b5ca 100644
--- a/lib/librte_eal/common/include/rte_bus.h
+++ b/lib/librte_eal/common/include/rte_bus.h
@@ -168,6 +168,35 @@ typedef int (*rte_bus_unplug_t)(struct rte_device *dev);
 typedef int (*rte_bus_parse_t)(const char *name, void *addr);
 
 /**
+ * Implement a specific hot-unplug handler, which is responsible for
+ * handle the failure when device be hot-unplugged. When the event of
+ * hot-unplug be detected, it could call this function to handle
+ * the hot-unplug failure and avoid app crash.
+ * @param dev
+ *	Pointer of the device structure.
+ *
+ * @return
+ *	0 on success.
+ *	!0 on error.
+ */
+typedef int (*rte_bus_hot_unplug_handler_t)(struct rte_device *dev);
+
+/**
+ * Implement a specific sigbus handler, which is responsible for handling
+ * the sigbus error which is either original memory error, or specific memory
+ * error that caused of device be hot-unplugged. When sigbus error be captured,
+ * it could call this function to handle sigbus error.
+ * @param failure_addr
+ *	Pointer of the fault address of the sigbus error.
+ *
+ * @return
+ *	0 for success handle the sigbus for hot-unplug.
+ *	1 for not process it, because it is a generic sigbus error.
+ *	-1 for failed to handle the sigbus for hot-unplug.
+ */
+typedef int (*rte_bus_sigbus_handler_t)(const void *failure_addr);
+
+/**
  * Bus scan policies
  */
 enum rte_bus_scan_mode {
@@ -212,6 +241,11 @@ struct rte_bus {
 	struct rte_bus_conf conf;    /**< Bus configuration */
 	rte_bus_get_iommu_class_t get_iommu_class; /**< Get iommu class */
 	rte_dev_iterate_t dev_iterate; /**< Device iterator. */
+	rte_bus_hot_unplug_handler_t hot_unplug_handler;
+				/**< handle hot-unplug failure on the bus */
+	rte_bus_sigbus_handler_t sigbus_handler;
+					/**< handle sigbus error on the bus */
+
 };
 
 /**
diff --git a/lib/librte_eal/common/include/rte_common.h b/lib/librte_eal/common/include/rte_common.h
index 069c13ec..cba7bbc1 100644
--- a/lib/librte_eal/common/include/rte_common.h
+++ b/lib/librte_eal/common/include/rte_common.h
@@ -68,6 +68,11 @@ typedef uint16_t unaligned_uint16_t;
 /******* Macro to mark functions and fields scheduled for removal *****/
 #define __rte_deprecated	__attribute__((__deprecated__))
 
+/**
+ * Mark a function or variable to a weak reference.
+ */
+#define __rte_weak __attribute__((__weak__))
+
 /*********** Macros to eliminate unused variable warnings ********/
 
 /**
@@ -164,6 +169,12 @@ static void __attribute__((destructor(RTE_PRIO(prio)), used)) func(void)
  */
 #define RTE_PTR_DIFF(ptr1, ptr2) ((uintptr_t)(ptr1) - (uintptr_t)(ptr2))
 
+/**
+ * Workaround to cast a const field of a structure to non-const type.
+ */
+#define RTE_CAST_FIELD(var, field, type) \
+	(*(type *)((uintptr_t)(var) + offsetof(typeof(*(var)), field)))
+
 /*********** Macros/static functions for doing alignment ********/
 
 
diff --git a/lib/librte_eal/common/include/rte_dev.h b/lib/librte_eal/common/include/rte_dev.h
index b80a8059..cd6c187c 100644
--- a/lib/librte_eal/common/include/rte_dev.h
+++ b/lib/librte_eal/common/include/rte_dev.h
@@ -39,7 +39,7 @@ struct rte_dev_event {
 	char *devname;			/**< device name */
 };
 
-typedef void (*rte_dev_event_cb_fn)(char *device_name,
+typedef void (*rte_dev_event_cb_fn)(const char *device_name,
 					enum rte_dev_event_type event,
 					void *cb_arg);
 
@@ -156,63 +156,67 @@ struct rte_driver {
 struct rte_device {
 	TAILQ_ENTRY(rte_device) next; /**< Next device */
 	const char *name;             /**< Device name */
-	const struct rte_driver *driver;/**< Associated driver */
+	const struct rte_driver *driver; /**< Driver assigned after probing */
+	const struct rte_bus *bus;    /**< Bus handle assigned on scan */
 	int numa_node;                /**< NUMA node connection */
-	struct rte_devargs *devargs;  /**< Device user arguments */
+	struct rte_devargs *devargs;  /**< Arguments for latest probing */
 };
 
 /**
- * Attach a device to a registered driver.
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
  *
- * @param name
- *   The device name, that refers to a pci device (or some private
- *   way of designating a vdev device). Based on this device name, eal
- *   will identify a driver capable of handling it and pass it to the
- *   driver probing function.
- * @param devargs
- *   Device arguments to be passed to the driver.
- * @return
- *   0 on success, negative on error.
- */
-__rte_deprecated
-int rte_eal_dev_attach(const char *name, const char *devargs);
-
-/**
- * Detach a device from its driver.
+ * Query status of a device.
  *
  * @param dev
- *   A pointer to a rte_device structure.
+ *   Generic device pointer.
  * @return
- *   0 on success, negative on error.
+ *   (int)true if already probed successfully, 0 otherwise.
  */
-__rte_deprecated
-int rte_eal_dev_detach(struct rte_device *dev);
+__rte_experimental
+int rte_dev_is_probed(const struct rte_device *dev);
 
 /**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
  * Hotplug add a given device to a specific bus.
  *
+ * In multi-process, it will request other processes to add the same device.
+ * A failure, in any process, will rollback the action
+ *
  * @param busname
  *   The bus name the device is added to.
  * @param devname
  *   The device name. Based on this device name, eal will identify a driver
  *   capable of handling it and pass it to the driver probing function.
- * @param devargs
+ * @param drvargs
  *   Device arguments to be passed to the driver.
  * @return
  *   0 on success, negative on error.
  */
-int __rte_experimental rte_eal_hotplug_add(const char *busname, const char *devname,
-			const char *devargs);
+int rte_eal_hotplug_add(const char *busname, const char *devname,
+			const char *drvargs);
 
 /**
  * @warning
  * @b EXPERIMENTAL: this API may change without prior notice
  *
+ * Add matching devices.
+ *
+ * In multi-process, it will request other processes to add the same device.
+ * A failure, in any process, will rollback the action
+ *
+ * @param devargs
+ *   Device arguments including bus, class and driver properties.
+ * @return
+ *   0 on success, negative on error.
+ */
+int __rte_experimental rte_dev_probe(const char *devargs);
+
+/**
  * Hotplug remove a given device from a specific bus.
  *
+ * In multi-process, it will request other processes to remove the same device.
+ * A failure, in any process, will rollback the action
+ *
  * @param busname
  *   The bus name the device is removed from.
  * @param devname
@@ -220,8 +224,23 @@ int __rte_experimental rte_eal_hotplug_add(const char *busname, const char *devn
  * @return
  *   0 on success, negative on error.
  */
-int __rte_experimental rte_eal_hotplug_remove(const char *busname,
-					  const char *devname);
+int rte_eal_hotplug_remove(const char *busname, const char *devname);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Remove one device.
+ *
+ * In multi-process, it will request other processes to remove the same device.
+ * A failure, in any process, will rollback the action
+ *
+ * @param dev
+ *   Data structure of the device to remove.
+ * @return
+ *   0 on success, negative on error.
+ */
+int __rte_experimental rte_dev_remove(struct rte_device *dev);
 
 /**
  * Device comparison function.
@@ -438,6 +457,22 @@ rte_dev_event_callback_unregister(const char *device_name,
  * @warning
  * @b EXPERIMENTAL: this API may change without prior notice
  *
+ * Executes all the user application registered callbacks for
+ * the specific device.
+ *
+ * @param device_name
+ *  The device name.
+ * @param event
+ *  the device event type.
+ */
+void  __rte_experimental
+rte_dev_event_callback_process(const char *device_name,
+			       enum rte_dev_event_type event);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
  * Start the device event monitoring.
  *
  * @return
@@ -460,4 +495,30 @@ rte_dev_event_monitor_start(void);
 int __rte_experimental
 rte_dev_event_monitor_stop(void);
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Enable hotplug handling for devices.
+ *
+ * @return
+ *   - On success, zero.
+ *   - On failure, a negative value.
+ */
+int __rte_experimental
+rte_dev_hotplug_handle_enable(void);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Disable hotplug handling for devices.
+ *
+ * @return
+ *   - On success, zero.
+ *   - On failure, a negative value.
+ */
+int __rte_experimental
+rte_dev_hotplug_handle_disable(void);
+
 #endif /* _RTE_DEV_H_ */
diff --git a/lib/librte_eal/common/include/rte_devargs.h b/lib/librte_eal/common/include/rte_devargs.h
index 097a4ce7..b1f121f8 100644
--- a/lib/librte_eal/common/include/rte_devargs.h
+++ b/lib/librte_eal/common/include/rte_devargs.h
@@ -67,36 +67,6 @@ struct rte_devargs {
 };
 
 /**
- * @deprecated
- * Parse a devargs string.
- *
- * For PCI devices, the format of arguments string is "PCI_ADDR" or
- * "PCI_ADDR,key=val,key2=val2,...". Examples: "08:00.1", "0000:5:00.0",
- * "04:00.0,arg=val".
- *
- * For virtual devices, the format of arguments string is "DRIVER_NAME*"
- * or "DRIVER_NAME*,key=val,key2=val2,...". Examples: "net_ring",
- * "net_ring0", "net_pmdAnything,arg=0:arg2=1".
- *
- * The function parses the arguments string to get driver name and driver
- * arguments.
- *
- * @param devargs_str
- *   The arguments as given by the user.
- * @param drvname
- *   The pointer to the string to store parsed driver name.
- * @param drvargs
- *   The pointer to the string to store parsed driver arguments.
- *
- * @return
- *   - 0 on success
- *   - A negative value on error
- */
-__rte_deprecated
-int rte_eal_parse_devargs_str(const char *devargs_str,
-				char **drvname, char **drvargs);
-
-/**
  * Parse a device string.
  *
  * Verify that a bus is capable of handling the device passed
@@ -202,32 +172,12 @@ __rte_experimental
 int rte_devargs_add(enum rte_devtype devtype, const char *devargs_str);
 
 /**
- * @deprecated
- * Add a device to the user device list
- * See rte_devargs_parse() for details.
- *
- * @param devtype
- *   The type of the device.
- * @param devargs_str
- *   The arguments as given by the user.
- *
- * @return
- *   - 0 on success
- *   - A negative value on error
- */
-__rte_deprecated
-int rte_eal_devargs_add(enum rte_devtype devtype, const char *devargs_str);
-
-/**
  * Remove a device from the user device list.
  * Its resources are freed.
  * If the devargs cannot be found, nothing happens.
  *
- * @param busname
- *   bus name of the devargs to remove.
- *
- * @param devname
- *   device name of the devargs to remove.
+ * @param devargs
+ *   The instance or a copy of devargs to remove.
  *
  * @return
  *   0 on success.
@@ -235,8 +185,7 @@ int rte_eal_devargs_add(enum rte_devtype devtype, const char *devargs_str);
  *   >0 if the devargs was not within the user device list.
  */
 __rte_experimental
-int rte_devargs_remove(const char *busname,
-		       const char *devname);
+int rte_devargs_remove(struct rte_devargs *devargs);
 
 /**
  * Count the number of user devices of a specified type
@@ -252,20 +201,6 @@ unsigned int
 rte_devargs_type_count(enum rte_devtype devtype);
 
 /**
- * @deprecated
- * Count the number of user devices of a specified type
- *
- * @param devtype
- *   The type of the devices to counted.
- *
- * @return
- *   The number of devices.
- */
-__rte_deprecated
-unsigned int
-rte_eal_devargs_type_count(enum rte_devtype devtype);
-
-/**
  * This function dumps the list of user device and their arguments.
  *
  * @param f
@@ -275,16 +210,6 @@ __rte_experimental
 void rte_devargs_dump(FILE *f);
 
 /**
- * @deprecated
- * This function dumps the list of user device and their arguments.
- *
- * @param f
- *   A pointer to a file for output
- */
-__rte_deprecated
-void rte_eal_devargs_dump(FILE *f);
-
-/**
  * Find next rte_devargs matching the provided bus name.
  *
  * @param busname
diff --git a/lib/librte_eal/common/include/rte_eal.h b/lib/librte_eal/common/include/rte_eal.h
index e114dcbd..a0cedd57 100644
--- a/lib/librte_eal/common/include/rte_eal.h
+++ b/lib/librte_eal/common/include/rte_eal.h
@@ -316,7 +316,7 @@ rte_mp_sendmsg(struct rte_mp_msg *msg);
  *
  * @param reply
  *   The reply argument will be for storing all the replied messages;
- *   the caller is responsible for free reply->replies.
+ *   the caller is responsible for free reply->msgs.
  *
  * @param ts
  *   The ts argument specifies how long we can wait for the peer(s) to reply.
@@ -378,6 +378,15 @@ int __rte_experimental
 rte_mp_reply(struct rte_mp_msg *msg, const char *peer);
 
 /**
+ * Register all mp action callbacks for hotplug.
+ *
+ * @return
+ *   0 on success, negative on error.
+ */
+int __rte_experimental
+rte_mp_dev_hotplug_init(void);
+
+/**
  * Usage function typedef used by the application usage function.
  *
  * Use this function typedef to define and call rte_set_application_usage_hook()
@@ -498,6 +507,15 @@ enum rte_iova_mode rte_eal_iova_mode(void);
 const char *
 rte_eal_mbuf_user_pool_ops(void);
 
+/**
+ * Get the runtime directory of DPDK
+ *
+ * @return
+ *  The runtime directory path of DPDK
+ */
+const char *
+rte_eal_get_runtime_dir(void);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_eal/common/include/rte_eal_interrupts.h b/lib/librte_eal/common/include/rte_eal_interrupts.h
index 6eb49327..9d302f41 100644
--- a/lib/librte_eal/common/include/rte_eal_interrupts.h
+++ b/lib/librte_eal/common/include/rte_eal_interrupts.h
@@ -35,6 +35,7 @@ enum rte_intr_handle_type {
 	RTE_INTR_HANDLE_EXT,          /**< external handler */
 	RTE_INTR_HANDLE_VDEV,         /**< virtual device */
 	RTE_INTR_HANDLE_DEV_EVENT,    /**< device event handle */
+	RTE_INTR_HANDLE_VFIO_REQ,     /**< VFIO request handle */
 	RTE_INTR_HANDLE_MAX           /**< count of elements */
 };
 
diff --git a/lib/librte_eal/common/include/rte_eal_memconfig.h b/lib/librte_eal/common/include/rte_eal_memconfig.h
index aff0688d..84aabe36 100644
--- a/lib/librte_eal/common/include/rte_eal_memconfig.h
+++ b/lib/librte_eal/common/include/rte_eal_memconfig.h
@@ -30,9 +30,11 @@ struct rte_memseg_list {
 		uint64_t addr_64;
 		/**< Makes sure addr is always 64-bits */
 	};
-	int socket_id; /**< Socket ID for all memsegs in this list. */
 	uint64_t page_sz; /**< Page size for all memsegs in this list. */
+	int socket_id; /**< Socket ID for all memsegs in this list. */
 	volatile uint32_t version; /**< version number for multiprocess sync. */
+	size_t len; /**< Length of memory area covered by this memseg list. */
+	unsigned int external; /**< 1 if this list points to external memory */
 	struct rte_fbarray memseg_arr;
 };
 
@@ -70,13 +72,23 @@ struct rte_mem_config {
 
 	struct rte_tailq_head tailq_head[RTE_MAX_TAILQ]; /**< Tailqs for objects */
 
-	/* Heaps of Malloc per socket */
-	struct malloc_heap malloc_heaps[RTE_MAX_NUMA_NODES];
+	/* Heaps of Malloc */
+	struct malloc_heap malloc_heaps[RTE_MAX_HEAPS];
+
+	/* next socket ID for external malloc heap */
+	int next_socket_id;
 
 	/* address of mem_config in primary process. used to map shared config into
 	 * exact same address the primary process maps it.
 	 */
 	uint64_t mem_cfg_addr;
+
+	/* legacy mem and single file segments options are shared */
+	uint32_t legacy_mem;
+	uint32_t single_file_segments;
+
+	/* keeps the more restricted dma mask */
+	uint8_t dma_maskbits;
 } __attribute__((__packed__));
 
 
diff --git a/lib/librte_eal/common/include/rte_malloc.h b/lib/librte_eal/common/include/rte_malloc.h
index a9fb7e45..7249e6aa 100644
--- a/lib/librte_eal/common/include/rte_malloc.h
+++ b/lib/librte_eal/common/include/rte_malloc.h
@@ -264,6 +264,198 @@ rte_malloc_get_socket_stats(int socket,
 		struct rte_malloc_socket_stats *socket_stats);
 
 /**
+ * Add memory chunk to a heap with specified name.
+ *
+ * @note Multiple memory chunks can be added to the same heap
+ *
+ * @note Before accessing this memory in other processes, it needs to be
+ *   attached in each of those processes by calling
+ *   ``rte_malloc_heap_memory_attach`` in each other process.
+ *
+ * @note Memory must be previously allocated for DPDK to be able to use it as a
+ *   malloc heap. Failing to do so will result in undefined behavior, up to and
+ *   including segmentation faults.
+ *
+ * @note Calling this function will erase any contents already present at the
+ *   supplied memory address.
+ *
+ * @param heap_name
+ *   Name of the heap to add memory chunk to
+ * @param va_addr
+ *   Start of virtual area to add to the heap
+ * @param len
+ *   Length of virtual area to add to the heap
+ * @param iova_addrs
+ *   Array of page IOVA addresses corresponding to each page in this memory
+ *   area. Can be NULL, in which case page IOVA addresses will be set to
+ *   RTE_BAD_IOVA.
+ * @param n_pages
+ *   Number of elements in the iova_addrs array. Ignored if  ``iova_addrs``
+ *   is NULL.
+ * @param page_sz
+ *   Page size of the underlying memory
+ *
+ * @return
+ *   - 0 on success
+ *   - -1 in case of error, with rte_errno set to one of the following:
+ *     EINVAL - one of the parameters was invalid
+ *     EPERM  - attempted to add memory to a reserved heap
+ *     ENOSPC - no more space in internal config to store a new memory chunk
+ */
+int __rte_experimental
+rte_malloc_heap_memory_add(const char *heap_name, void *va_addr, size_t len,
+		rte_iova_t iova_addrs[], unsigned int n_pages, size_t page_sz);
+
+/**
+ * Remove memory chunk from heap with specified name.
+ *
+ * @note Memory chunk being removed must be the same as one that was added;
+ *   partially removing memory chunks is not supported
+ *
+ * @note Memory area must not contain any allocated elements to allow its
+ *   removal from the heap
+ *
+ * @note All other processes must detach from the memory chunk prior to it being
+ *   removed from the heap.
+ *
+ * @param heap_name
+ *   Name of the heap to remove memory from
+ * @param va_addr
+ *   Virtual address to remove from the heap
+ * @param len
+ *   Length of virtual area to remove from the heap
+ *
+ * @return
+ *   - 0 on success
+ *   - -1 in case of error, with rte_errno set to one of the following:
+ *     EINVAL - one of the parameters was invalid
+ *     EPERM  - attempted to remove memory from a reserved heap
+ *     ENOENT - heap or memory chunk was not found
+ *     EBUSY  - memory chunk still contains data
+ */
+int __rte_experimental
+rte_malloc_heap_memory_remove(const char *heap_name, void *va_addr, size_t len);
+
+/**
+ * Attach to an already existing chunk of external memory in another process.
+ *
+ * @note This function must be called before any attempt is made to use an
+ *   already existing external memory chunk. This function does *not* need to
+ *   be called if a call to ``rte_malloc_heap_memory_add`` was made in the
+ *   current process.
+ *
+ * @param heap_name
+ *   Heap name to which this chunk of memory belongs
+ * @param va_addr
+ *   Start address of memory chunk to attach to
+ * @param len
+ *   Length of memory chunk to attach to
+ * @return
+ *   0 on successful attach
+ *   -1 on unsuccessful attach, with rte_errno set to indicate cause for error:
+ *     EINVAL - one of the parameters was invalid
+ *     EPERM  - attempted to attach memory to a reserved heap
+ *     ENOENT - heap or memory chunk was not found
+ */
+int __rte_experimental
+rte_malloc_heap_memory_attach(const char *heap_name, void *va_addr, size_t len);
+
+/**
+ * Detach from a chunk of external memory in secondary process.
+ *
+ * @note This function must be called in before any attempt is made to remove
+ *   external memory from the heap in another process. This function does *not*
+ *   need to be called if a call to ``rte_malloc_heap_memory_remove`` will be
+ *   called in current process.
+ *
+ * @param heap_name
+ *   Heap name to which this chunk of memory belongs
+ * @param va_addr
+ *   Start address of memory chunk to attach to
+ * @param len
+ *   Length of memory chunk to attach to
+ * @return
+ *   0 on successful detach
+ *   -1 on unsuccessful detach, with rte_errno set to indicate cause for error:
+ *     EINVAL - one of the parameters was invalid
+ *     EPERM  - attempted to detach memory from a reserved heap
+ *     ENOENT - heap or memory chunk was not found
+ */
+int __rte_experimental
+rte_malloc_heap_memory_detach(const char *heap_name, void *va_addr, size_t len);
+
+/**
+ * Creates a new empty malloc heap with a specified name.
+ *
+ * @note Heaps created via this call will automatically get assigned a unique
+ *   socket ID, which can be found using ``rte_malloc_heap_get_socket()``
+ *
+ * @param heap_name
+ *   Name of the heap to create.
+ *
+ * @return
+ *   - 0 on successful creation
+ *   - -1 in case of error, with rte_errno set to one of the following:
+ *     EINVAL - ``heap_name`` was NULL, empty or too long
+ *     EEXIST - heap by name of ``heap_name`` already exists
+ *     ENOSPC - no more space in internal config to store a new heap
+ */
+int __rte_experimental
+rte_malloc_heap_create(const char *heap_name);
+
+/**
+ * Destroys a previously created malloc heap with specified name.
+ *
+ * @note This function will return a failure result if not all memory allocated
+ *   from the heap has been freed back to the heap
+ *
+ * @note This function will return a failure result if not all memory segments
+ *   were removed from the heap prior to its destruction
+ *
+ * @param heap_name
+ *   Name of the heap to create.
+ *
+ * @return
+ *   - 0 on success
+ *   - -1 in case of error, with rte_errno set to one of the following:
+ *     EINVAL - ``heap_name`` was NULL, empty or too long
+ *     ENOENT - heap by the name of ``heap_name`` was not found
+ *     EPERM  - attempting to destroy reserved heap
+ *     EBUSY  - heap still contains data
+ */
+int __rte_experimental
+rte_malloc_heap_destroy(const char *heap_name);
+
+/**
+ * Find socket ID corresponding to a named heap.
+ *
+ * @param name
+ *   Heap name to find socket ID for
+ * @return
+ *   Socket ID in case of success (a non-negative number)
+ *   -1 in case of error, with rte_errno set to one of the following:
+ *     EINVAL - ``name`` was NULL
+ *     ENOENT - heap identified by the name ``name`` was not found
+ */
+int __rte_experimental
+rte_malloc_heap_get_socket(const char *name);
+
+/**
+ * Check if a given socket ID refers to externally allocated memory.
+ *
+ * @note Passing SOCKET_ID_ANY will return 0.
+ *
+ * @param socket_id
+ *   Socket ID to check
+ * @return
+ *   1 if socket ID refers to externally allocated memory
+ *   0 if socket ID refers to internal DPDK memory
+ *   -1 if socket ID is invalid
+ */
+int __rte_experimental
+rte_malloc_heap_socket_is_external(int socket_id);
+
+/**
  * Dump statistics.
  *
  * Dump for the specified type to a file. If the type argument is
diff --git a/lib/librte_eal/common/include/rte_malloc_heap.h b/lib/librte_eal/common/include/rte_malloc_heap.h
index d43fa909..4a7e0eb1 100644
--- a/lib/librte_eal/common/include/rte_malloc_heap.h
+++ b/lib/librte_eal/common/include/rte_malloc_heap.h
@@ -12,6 +12,7 @@
 
 /* Number of free lists per heap, grouped by size. */
 #define RTE_HEAP_NUM_FREELISTS  13
+#define RTE_HEAP_NAME_MAX_LEN 32
 
 /* dummy definition, for pointers */
 struct malloc_elem;
@@ -26,7 +27,9 @@ struct malloc_heap {
 	struct malloc_elem *volatile last;
 
 	unsigned alloc_count;
+	unsigned int socket_id;
 	size_t total_size;
+	char name[RTE_HEAP_NAME_MAX_LEN];
 } __rte_cache_aligned;
 
 #endif /* _RTE_MALLOC_HEAP_H_ */
diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h
index c4b7f4cf..ce937058 100644
--- a/lib/librte_eal/common/include/rte_memory.h
+++ b/lib/librte_eal/common/include/rte_memory.h
@@ -215,6 +215,9 @@ typedef int (*rte_memseg_list_walk_t)(const struct rte_memseg_list *msl,
  * @note This function read-locks the memory hotplug subsystem, and thus cannot
  *       be used within memory-related callback functions.
  *
+ * @note This function will also walk through externally allocated segments. It
+ *       is up to the user to decide whether to skip through these segments.
+ *
  * @param func
  *   Iterator function
  * @param arg
@@ -233,6 +236,9 @@ rte_memseg_walk(rte_memseg_walk_t func, void *arg);
  * @note This function read-locks the memory hotplug subsystem, and thus cannot
  *       be used within memory-related callback functions.
  *
+ * @note This function will also walk through externally allocated segments. It
+ *       is up to the user to decide whether to skip through these segments.
+ *
  * @param func
  *   Iterator function
  * @param arg
@@ -251,6 +257,9 @@ rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg);
  * @note This function read-locks the memory hotplug subsystem, and thus cannot
  *       be used within memory-related callback functions.
  *
+ * @note This function will also walk through externally allocated segments. It
+ *       is up to the user to decide whether to skip through these segments.
+ *
  * @param func
  *   Iterator function
  * @param arg
@@ -318,6 +327,103 @@ int __rte_experimental
 rte_memseg_list_walk_thread_unsafe(rte_memseg_list_walk_t func, void *arg);
 
 /**
+ * Return file descriptor associated with a particular memseg (if available).
+ *
+ * @note This function read-locks the memory hotplug subsystem, and thus cannot
+ *       be used within memory-related callback functions.
+ *
+ * @note This returns an internal file descriptor. Performing any operations on
+ *       this file descriptor is inherently dangerous, so it should be treated
+ *       as read-only for all intents and purposes.
+ *
+ * @param ms
+ *   A pointer to memseg for which to get file descriptor.
+ *
+ * @return
+ *   Valid file descriptor in case of success.
+ *   -1 in case of error, with ``rte_errno`` set to the following values:
+ *     - EINVAL  - ``ms`` pointer was NULL or did not point to a valid memseg
+ *     - ENODEV  - ``ms`` fd is not available
+ *     - ENOENT  - ``ms`` is an unused segment
+ *     - ENOTSUP - segment fd's are not supported
+ */
+int __rte_experimental
+rte_memseg_get_fd(const struct rte_memseg *ms);
+
+/**
+ * Return file descriptor associated with a particular memseg (if available).
+ *
+ * @note This function does not perform any locking, and is only safe to call
+ *       from within memory-related callback functions.
+ *
+ * @note This returns an internal file descriptor. Performing any operations on
+ *       this file descriptor is inherently dangerous, so it should be treated
+ *       as read-only for all intents and purposes.
+ *
+ * @param ms
+ *   A pointer to memseg for which to get file descriptor.
+ *
+ * @return
+ *   Valid file descriptor in case of success.
+ *   -1 in case of error, with ``rte_errno`` set to the following values:
+ *     - EINVAL  - ``ms`` pointer was NULL or did not point to a valid memseg
+ *     - ENODEV  - ``ms`` fd is not available
+ *     - ENOENT  - ``ms`` is an unused segment
+ *     - ENOTSUP - segment fd's are not supported
+ */
+int __rte_experimental
+rte_memseg_get_fd_thread_unsafe(const struct rte_memseg *ms);
+
+/**
+ * Get offset into segment file descriptor associated with a particular memseg
+ * (if available).
+ *
+ * @note This function read-locks the memory hotplug subsystem, and thus cannot
+ *       be used within memory-related callback functions.
+ *
+ * @param ms
+ *   A pointer to memseg for which to get file descriptor.
+ * @param offset
+ *   A pointer to offset value where the result will be stored.
+ *
+ * @return
+ *   Valid file descriptor in case of success.
+ *   -1 in case of error, with ``rte_errno`` set to the following values:
+ *     - EINVAL  - ``ms`` pointer was NULL or did not point to a valid memseg
+ *     - EINVAL  - ``offset`` pointer was NULL
+ *     - ENODEV  - ``ms`` fd is not available
+ *     - ENOENT  - ``ms`` is an unused segment
+ *     - ENOTSUP - segment fd's are not supported
+ */
+int __rte_experimental
+rte_memseg_get_fd_offset(const struct rte_memseg *ms, size_t *offset);
+
+/**
+ * Get offset into segment file descriptor associated with a particular memseg
+ * (if available).
+ *
+ * @note This function does not perform any locking, and is only safe to call
+ *       from within memory-related callback functions.
+ *
+ * @param ms
+ *   A pointer to memseg for which to get file descriptor.
+ * @param offset
+ *   A pointer to offset value where the result will be stored.
+ *
+ * @return
+ *   Valid file descriptor in case of success.
+ *   -1 in case of error, with ``rte_errno`` set to the following values:
+ *     - EINVAL  - ``ms`` pointer was NULL or did not point to a valid memseg
+ *     - EINVAL  - ``offset`` pointer was NULL
+ *     - ENODEV  - ``ms`` fd is not available
+ *     - ENOENT  - ``ms`` is an unused segment
+ *     - ENOTSUP - segment fd's are not supported
+ */
+int __rte_experimental
+rte_memseg_get_fd_offset_thread_unsafe(const struct rte_memseg *ms,
+		size_t *offset);
+
+/**
  * Dump the physical memory layout to a file.
  *
  * @note This function read-locks the memory hotplug subsystem, and thus cannot
@@ -357,6 +463,9 @@ unsigned rte_memory_get_nchannel(void);
  */
 unsigned rte_memory_get_nrank(void);
 
+/* check memsegs iovas are within a range based on dma mask */
+int __rte_experimental rte_eal_check_dma_mask(uint8_t maskbits);
+
 /**
  * Drivers based on uio will not load unless physical
  * addresses are obtainable. It is only possible to get
diff --git a/lib/librte_eal/common/include/rte_option.h b/lib/librte_eal/common/include/rte_option.h
new file mode 100644
index 00000000..8957b970
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_option.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation.
+ */
+
+#ifndef __INCLUDE_RTE_OPTION_H__
+#define __INCLUDE_RTE_OPTION_H__
+
+/**
+ * @file
+ *
+ * This API offers the ability to register options to the EAL command line and
+ * map those options to functions that will be executed at the end of EAL
+ * initialization. These options will be available as part of the EAL command
+ * line of applications and are dynamically managed.
+ *
+ * This is used primarily by DPDK libraries offering command line options.
+ * Currently, this API is limited to registering options without argument.
+ *
+ * The register API can be used to resolve circular dependency issues
+ * between EAL and the library. The library uses EAL, but is also initialized
+ * by EAL. Hence, EAL depends on the init function of the library. The API
+ * introduced in rte_option allows us to register the library init with EAL
+ * (passing a function pointer) and avoid the circular dependency.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef int (*rte_option_cb)(void);
+
+/*
+ * Structure describing the EAL command line option being registered.
+ */
+struct rte_option {
+	TAILQ_ENTRY(rte_option) next; /**< Next entry in the list. */
+	char *opt_str;             /**< The option name. */
+	rte_option_cb cb;          /**< Function called when option is used. */
+	int enabled;               /**< Set when the option is used. */
+};
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Register an option to the EAL command line.
+ * When recognized, the associated function will be executed at the end of EAL
+ * initialization.
+ *
+ * The associated structure must be available the whole time this option is
+ * registered (i.e. not stack memory).
+ *
+ * @param opt
+ *  Structure describing the option to parse.
+ */
+void __rte_experimental
+rte_option_register(struct rte_option *opt);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/librte_eal/common/include/rte_string_fns.h b/lib/librte_eal/common/include/rte_string_fns.h
index 97597a14..9a2a1ff9 100644
--- a/lib/librte_eal/common/include/rte_string_fns.h
+++ b/lib/librte_eal/common/include/rte_string_fns.h
@@ -16,6 +16,7 @@ extern "C" {
 #endif
 
 #include <stdio.h>
+#include <string.h>
 
 /**
  * Takes string "string" parameter and splits it at character "delim"
@@ -60,12 +61,10 @@ rte_strlcpy(char *dst, const char *src, size_t size)
 
 /* pull in a strlcpy function */
 #ifdef RTE_EXEC_ENV_BSDAPP
-#include <string.h>
 #ifndef __BSD_VISIBLE /* non-standard functions are hidden */
 #define strlcpy(dst, src, size) rte_strlcpy(dst, src, size)
 #endif
 
-
 #else /* non-BSD platforms */
 #ifdef RTE_USE_LIBBSD
 #include <bsd/string.h>
@@ -76,6 +75,29 @@ rte_strlcpy(char *dst, const char *src, size_t size)
 #endif /* RTE_USE_LIBBSD */
 #endif /* BSDAPP */
 
+/**
+ * Copy string src to buffer dst of size dsize.
+ * At most dsize-1 chars will be copied.
+ * Always NUL-terminates, unless (dsize == 0).
+ * Returns number of bytes copied (terminating NUL-byte excluded) on success ;
+ * negative errno on error.
+ *
+ * @param dst
+ *   The destination string.
+ *
+ * @param src
+ *   The input string to be copied.
+ *
+ * @param dsize
+ *   Length in bytes of the destination buffer.
+ *
+ * @return
+ *   The number of bytes copied on success
+ *   -E2BIG if the destination buffer is too small.
+ */
+ssize_t
+rte_strscpy(char *dst, const char *src, size_t dsize);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_eal/common/include/rte_version.h b/lib/librte_eal/common/include/rte_version.h
index 7c6714a2..412ed2db 100644
--- a/lib/librte_eal/common/include/rte_version.h
+++ b/lib/librte_eal/common/include/rte_version.h
@@ -32,7 +32,7 @@ extern "C" {
 /**
  * Minor version/month number i.e. the mm in yy.mm.z
  */
-#define RTE_VER_MONTH 8
+#define RTE_VER_MONTH 11
 
 /**
  * Patch level number i.e. the z in yy.mm.z
@@ -42,14 +42,14 @@ extern "C" {
 /**
  * Extra string to be appended to version number
  */
-#define RTE_VER_SUFFIX ""
+#define RTE_VER_SUFFIX "-rc"
 
 /**
  * Patch release number
  *   0-15 = release candidates
  *   16   = release
  */
-#define RTE_VER_RELEASE 16
+#define RTE_VER_RELEASE 1
 
 /**
  * Macro to compute a version number usable for comparisons
diff --git a/lib/librte_eal/common/include/rte_vfio.h b/lib/librte_eal/common/include/rte_vfio.h
index 5ca13fcc..cae96fab 100644
--- a/lib/librte_eal/common/include/rte_vfio.h
+++ b/lib/librte_eal/common/include/rte_vfio.h
@@ -14,6 +14,8 @@
 extern "C" {
 #endif
 
+#include <stdint.h>
+
 /*
  * determine if VFIO is present on the system
  */
@@ -22,6 +24,9 @@ extern "C" {
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0)
 #define VFIO_PRESENT
 #endif /* kernel version >= 3.6.0 */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 0, 0)
+#define HAVE_VFIO_DEV_REQ_INTERFACE
+#endif /* kernel version >= 4.0.0 */
 #endif /* RTE_EAL_VFIO */
 
 #ifdef VFIO_PRESENT
@@ -44,6 +49,30 @@ extern "C" {
 #define RTE_VFIO_NOIOMMU 8
 #endif
 
+/*
+ * capabilities are only supported on kernel 4.6+. there were also some API
+ * changes as well, so add a macro to get cap offset.
+ */
+#ifdef VFIO_REGION_INFO_FLAG_CAPS
+#define RTE_VFIO_INFO_FLAG_CAPS VFIO_REGION_INFO_FLAG_CAPS
+#define VFIO_CAP_OFFSET(x) (x->cap_offset)
+#else
+#define RTE_VFIO_INFO_FLAG_CAPS (1 << 3)
+#define VFIO_CAP_OFFSET(x) (x->resv)
+struct vfio_info_cap_header {
+	uint16_t id;
+	uint16_t version;
+	uint32_t next;
+};
+#endif
+
+/* kernels 4.16+ can map BAR containing MSI-X table */
+#ifdef VFIO_REGION_INFO_CAP_MSIX_MAPPABLE
+#define RTE_VFIO_CAP_MSIX_MAPPABLE VFIO_REGION_INFO_CAP_MSIX_MAPPABLE
+#else
+#define RTE_VFIO_CAP_MSIX_MAPPABLE 3
+#endif
+
 #else /* not VFIO_PRESENT */
 
 /* we don't need an actual definition, only pointer is used */
@@ -227,7 +256,7 @@ rte_vfio_get_group_num(const char *sysfs_base,
 		      const char *dev_addr, int *iommu_group_num);
 
 /**
- * Open VFIO container fd or get an existing one
+ * Open a new VFIO container fd
  *
  * This function is only relevant to linux and will return
  * an error on BSD.
diff --git a/lib/librte_eal/common/malloc_elem.c b/lib/librte_eal/common/malloc_elem.c
index e0a8ed15..1a74660d 100644
--- a/lib/librte_eal/common/malloc_elem.c
+++ b/lib/librte_eal/common/malloc_elem.c
@@ -39,10 +39,14 @@ malloc_elem_find_max_iova_contig(struct malloc_elem *elem, size_t align)
 	contig_seg_start = RTE_PTR_ALIGN_CEIL(data_start, align);
 
 	/* if we're in IOVA as VA mode, or if we're in legacy mode with
-	 * hugepages, all elements are IOVA-contiguous.
+	 * hugepages, all elements are IOVA-contiguous. however, we can only
+	 * make these assumptions about internal memory - externally allocated
+	 * segments have to be checked.
 	 */
-	if (rte_eal_iova_mode() == RTE_IOVA_VA ||
-			(internal_config.legacy_mem && rte_eal_has_hugepages()))
+	if (!elem->msl->external &&
+			(rte_eal_iova_mode() == RTE_IOVA_VA ||
+				(internal_config.legacy_mem &&
+					rte_eal_has_hugepages())))
 		return RTE_PTR_DIFF(data_end, contig_seg_start);
 
 	cur_page = RTE_PTR_ALIGN_FLOOR(contig_seg_start, page_sz);
diff --git a/lib/librte_eal/common/malloc_heap.c b/lib/librte_eal/common/malloc_heap.c
index 12aaf2d7..1973b6e6 100644
--- a/lib/librte_eal/common/malloc_heap.c
+++ b/lib/librte_eal/common/malloc_heap.c
@@ -29,6 +29,10 @@
 #include "malloc_heap.h"
 #include "malloc_mp.h"
 
+/* start external socket ID's at a very high number */
+#define CONST_MAX(a, b) (a > b ? a : b) /* RTE_MAX is not a constant */
+#define EXTERNAL_HEAP_MIN_SOCKET_ID (CONST_MAX((1 << 8), RTE_MAX_NUMA_NODES))
+
 static unsigned
 check_hugepage_sz(unsigned flags, uint64_t hugepage_sz)
 {
@@ -66,6 +70,21 @@ check_hugepage_sz(unsigned flags, uint64_t hugepage_sz)
 	return check_flag & flags;
 }
 
+int
+malloc_socket_to_heap_id(unsigned int socket_id)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	int i;
+
+	for (i = 0; i < RTE_MAX_HEAPS; i++) {
+		struct malloc_heap *heap = &mcfg->malloc_heaps[i];
+
+		if (heap->socket_id == socket_id)
+			return i;
+	}
+	return -1;
+}
+
 /*
  * Expand the heap with a memory area.
  */
@@ -93,9 +112,17 @@ malloc_add_seg(const struct rte_memseg_list *msl,
 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
 	struct rte_memseg_list *found_msl;
 	struct malloc_heap *heap;
-	int msl_idx;
+	int msl_idx, heap_idx;
+
+	if (msl->external)
+		return 0;
 
-	heap = &mcfg->malloc_heaps[msl->socket_id];
+	heap_idx = malloc_socket_to_heap_id(msl->socket_id);
+	if (heap_idx < 0) {
+		RTE_LOG(ERR, EAL, "Memseg list has invalid socket id\n");
+		return -1;
+	}
+	heap = &mcfg->malloc_heaps[heap_idx];
 
 	/* msl is const, so find it */
 	msl_idx = msl - mcfg->memsegs;
@@ -165,7 +192,9 @@ find_biggest_element(struct malloc_heap *heap, size_t *size,
 		for (elem = LIST_FIRST(&heap->free_head[idx]);
 				!!elem; elem = LIST_NEXT(elem, free_list)) {
 			size_t cur_size;
-			if (!check_hugepage_sz(flags, elem->msl->page_sz))
+			if ((flags & RTE_MEMZONE_SIZE_HINT_ONLY) == 0 &&
+					!check_hugepage_sz(flags,
+						elem->msl->page_sz))
 				continue;
 			if (contig) {
 				cur_size =
@@ -259,11 +288,13 @@ alloc_pages_on_heap(struct malloc_heap *heap, uint64_t pg_sz, size_t elt_size,
 		int socket, unsigned int flags, size_t align, size_t bound,
 		bool contig, struct rte_memseg **ms, int n_segs)
 {
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
 	struct rte_memseg_list *msl;
 	struct malloc_elem *elem = NULL;
 	size_t alloc_sz;
 	int allocd_pages;
 	void *ret, *map_addr;
+	uint64_t mask;
 
 	alloc_sz = (size_t)pg_sz * n_segs;
 
@@ -291,6 +322,16 @@ alloc_pages_on_heap(struct malloc_heap *heap, uint64_t pg_sz, size_t elt_size,
 		goto fail;
 	}
 
+	if (mcfg->dma_maskbits) {
+		mask = ~((1ULL << mcfg->dma_maskbits) - 1);
+		if (rte_eal_check_dma_mask(mask)) {
+			RTE_LOG(ERR, EAL,
+				"%s(): couldn't allocate memory due to DMA mask\n",
+				__func__);
+			goto fail;
+		}
+	}
+
 	/* add newly minted memsegs to malloc heap */
 	elem = malloc_heap_add_memory(heap, msl, map_addr, alloc_sz);
 
@@ -326,11 +367,9 @@ try_expand_heap_primary(struct malloc_heap *heap, uint64_t pg_sz,
 
 	/* we can't know in advance how many pages we'll need, so we malloc */
 	ms = malloc(sizeof(*ms) * n_segs);
-
-	memset(ms, 0, sizeof(*ms) * n_segs);
-
 	if (ms == NULL)
 		return -1;
+	memset(ms, 0, sizeof(*ms) * n_segs);
 
 	elem = alloc_pages_on_heap(heap, pg_sz, elt_size, socket, flags, align,
 			bound, contig, ms, n_segs);
@@ -560,12 +599,14 @@ alloc_more_mem_on_socket(struct malloc_heap *heap, size_t size, int socket,
 
 /* this will try lower page sizes first */
 static void *
-heap_alloc_on_socket(const char *type, size_t size, int socket,
-		unsigned int flags, size_t align, size_t bound, bool contig)
+malloc_heap_alloc_on_heap_id(const char *type, size_t size,
+		unsigned int heap_id, unsigned int flags, size_t align,
+		size_t bound, bool contig)
 {
 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
-	struct malloc_heap *heap = &mcfg->malloc_heaps[socket];
+	struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id];
 	unsigned int size_flags = flags & ~RTE_MEMZONE_SIZE_HINT_ONLY;
+	int socket_id;
 	void *ret;
 
 	rte_spinlock_lock(&(heap->lock));
@@ -583,12 +624,28 @@ heap_alloc_on_socket(const char *type, size_t size, int socket,
 	 * we may still be able to allocate memory from appropriate page sizes,
 	 * we just need to request more memory first.
 	 */
+
+	socket_id = rte_socket_id_by_idx(heap_id);
+	/*
+	 * if socket ID is negative, we cannot find a socket ID for this heap -
+	 * which means it's an external heap. those can have unexpected page
+	 * sizes, so if the user asked to allocate from there - assume user
+	 * knows what they're doing, and allow allocating from there with any
+	 * page size flags.
+	 */
+	if (socket_id < 0)
+		size_flags |= RTE_MEMZONE_SIZE_HINT_ONLY;
+
 	ret = heap_alloc(heap, type, size, size_flags, align, bound, contig);
 	if (ret != NULL)
 		goto alloc_unlock;
 
-	if (!alloc_more_mem_on_socket(heap, size, socket, flags, align, bound,
-			contig)) {
+	/* if socket ID is invalid, this is an external heap */
+	if (socket_id < 0)
+		goto alloc_unlock;
+
+	if (!alloc_more_mem_on_socket(heap, size, socket_id, flags, align,
+			bound, contig)) {
 		ret = heap_alloc(heap, type, size, flags, align, bound, contig);
 
 		/* this should have succeeded */
@@ -604,14 +661,14 @@ void *
 malloc_heap_alloc(const char *type, size_t size, int socket_arg,
 		unsigned int flags, size_t align, size_t bound, bool contig)
 {
-	int socket, i, cur_socket;
+	int socket, heap_id, i;
 	void *ret;
 
 	/* return NULL if size is 0 or alignment is not power-of-2 */
 	if (size == 0 || (align && !rte_is_power_of_2(align)))
 		return NULL;
 
-	if (!rte_eal_has_hugepages())
+	if (!rte_eal_has_hugepages() && socket_arg < RTE_MAX_NUMA_NODES)
 		socket_arg = SOCKET_ID_ANY;
 
 	if (socket_arg == SOCKET_ID_ANY)
@@ -619,22 +676,25 @@ malloc_heap_alloc(const char *type, size_t size, int socket_arg,
 	else
 		socket = socket_arg;
 
-	/* Check socket parameter */
-	if (socket >= RTE_MAX_NUMA_NODES)
+	/* turn socket ID into heap ID */
+	heap_id = malloc_socket_to_heap_id(socket);
+	/* if heap id is negative, socket ID was invalid */
+	if (heap_id < 0)
 		return NULL;
 
-	ret = heap_alloc_on_socket(type, size, socket, flags, align, bound,
-			contig);
+	ret = malloc_heap_alloc_on_heap_id(type, size, heap_id, flags, align,
+			bound, contig);
 	if (ret != NULL || socket_arg != SOCKET_ID_ANY)
 		return ret;
 
-	/* try other heaps */
+	/* try other heaps. we are only iterating through native DPDK sockets,
+	 * so external heaps won't be included.
+	 */
 	for (i = 0; i < (int) rte_socket_count(); i++) {
-		cur_socket = rte_socket_id_by_idx(i);
-		if (cur_socket == socket)
+		if (i == heap_id)
 			continue;
-		ret = heap_alloc_on_socket(type, size, cur_socket, flags,
-				align, bound, contig);
+		ret = malloc_heap_alloc_on_heap_id(type, size, i, flags, align,
+				bound, contig);
 		if (ret != NULL)
 			return ret;
 	}
@@ -642,11 +702,11 @@ malloc_heap_alloc(const char *type, size_t size, int socket_arg,
 }
 
 static void *
-heap_alloc_biggest_on_socket(const char *type, int socket, unsigned int flags,
-		size_t align, bool contig)
+heap_alloc_biggest_on_heap_id(const char *type, unsigned int heap_id,
+		unsigned int flags, size_t align, bool contig)
 {
 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
-	struct malloc_heap *heap = &mcfg->malloc_heaps[socket];
+	struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id];
 	void *ret;
 
 	rte_spinlock_lock(&(heap->lock));
@@ -664,7 +724,7 @@ void *
 malloc_heap_alloc_biggest(const char *type, int socket_arg, unsigned int flags,
 		size_t align, bool contig)
 {
-	int socket, i, cur_socket;
+	int socket, i, cur_socket, heap_id;
 	void *ret;
 
 	/* return NULL if align is not power-of-2 */
@@ -679,11 +739,13 @@ malloc_heap_alloc_biggest(const char *type, int socket_arg, unsigned int flags,
 	else
 		socket = socket_arg;
 
-	/* Check socket parameter */
-	if (socket >= RTE_MAX_NUMA_NODES)
+	/* turn socket ID into heap ID */
+	heap_id = malloc_socket_to_heap_id(socket);
+	/* if heap id is negative, socket ID was invalid */
+	if (heap_id < 0)
 		return NULL;
 
-	ret = heap_alloc_biggest_on_socket(type, socket, flags, align,
+	ret = heap_alloc_biggest_on_heap_id(type, heap_id, flags, align,
 			contig);
 	if (ret != NULL || socket_arg != SOCKET_ID_ANY)
 		return ret;
@@ -693,8 +755,8 @@ malloc_heap_alloc_biggest(const char *type, int socket_arg, unsigned int flags,
 		cur_socket = rte_socket_id_by_idx(i);
 		if (cur_socket == socket)
 			continue;
-		ret = heap_alloc_biggest_on_socket(type, cur_socket, flags,
-				align, contig);
+		ret = heap_alloc_biggest_on_heap_id(type, i, flags, align,
+				contig);
 		if (ret != NULL)
 			return ret;
 	}
@@ -756,8 +818,10 @@ malloc_heap_free(struct malloc_elem *elem)
 	/* anything after this is a bonus */
 	ret = 0;
 
-	/* ...of which we can't avail if we are in legacy mode */
-	if (internal_config.legacy_mem)
+	/* ...of which we can't avail if we are in legacy mode, or if this is an
+	 * externally allocated segment.
+	 */
+	if (internal_config.legacy_mem || (msl->external > 0))
 		goto free_unlock;
 
 	/* check if we can free any memory back to the system */
@@ -914,7 +978,7 @@ malloc_heap_resize(struct malloc_elem *elem, size_t size)
 }
 
 /*
- * Function to retrieve data for heap on given socket
+ * Function to retrieve data for a given heap
  */
 int
 malloc_heap_get_stats(struct malloc_heap *heap,
@@ -952,7 +1016,7 @@ malloc_heap_get_stats(struct malloc_heap *heap,
 }
 
 /*
- * Function to retrieve data for heap on given socket
+ * Function to retrieve data for a given heap
  */
 void
 malloc_heap_dump(struct malloc_heap *heap, FILE *f)
@@ -973,10 +1037,216 @@ malloc_heap_dump(struct malloc_heap *heap, FILE *f)
 	rte_spinlock_unlock(&heap->lock);
 }
 
+static int
+destroy_seg(struct malloc_elem *elem, size_t len)
+{
+	struct malloc_heap *heap = elem->heap;
+	struct rte_memseg_list *msl;
+
+	msl = elem->msl;
+
+	/* notify all subscribers that a memory area is going to be removed */
+	eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE, elem, len);
+
+	/* this element can be removed */
+	malloc_elem_free_list_remove(elem);
+	malloc_elem_hide_region(elem, elem, len);
+
+	heap->total_size -= len;
+
+	memset(elem, 0, sizeof(*elem));
+
+	/* destroy the fbarray backing this memory */
+	if (rte_fbarray_destroy(&msl->memseg_arr) < 0)
+		return -1;
+
+	/* reset the memseg list */
+	memset(msl, 0, sizeof(*msl));
+
+	return 0;
+}
+
+int
+malloc_heap_add_external_memory(struct malloc_heap *heap, void *va_addr,
+		rte_iova_t iova_addrs[], unsigned int n_pages, size_t page_sz)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	char fbarray_name[RTE_FBARRAY_NAME_LEN];
+	struct rte_memseg_list *msl = NULL;
+	struct rte_fbarray *arr;
+	size_t seg_len = n_pages * page_sz;
+	unsigned int i;
+
+	/* first, find a free memseg list */
+	for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
+		struct rte_memseg_list *tmp = &mcfg->memsegs[i];
+		if (tmp->base_va == NULL) {
+			msl = tmp;
+			break;
+		}
+	}
+	if (msl == NULL) {
+		RTE_LOG(ERR, EAL, "Couldn't find empty memseg list\n");
+		rte_errno = ENOSPC;
+		return -1;
+	}
+
+	snprintf(fbarray_name, sizeof(fbarray_name) - 1, "%s_%p",
+			heap->name, va_addr);
+
+	/* create the backing fbarray */
+	if (rte_fbarray_init(&msl->memseg_arr, fbarray_name, n_pages,
+			sizeof(struct rte_memseg)) < 0) {
+		RTE_LOG(ERR, EAL, "Couldn't create fbarray backing the memseg list\n");
+		return -1;
+	}
+	arr = &msl->memseg_arr;
+
+	/* fbarray created, fill it up */
+	for (i = 0; i < n_pages; i++) {
+		struct rte_memseg *ms;
+
+		rte_fbarray_set_used(arr, i);
+		ms = rte_fbarray_get(arr, i);
+		ms->addr = RTE_PTR_ADD(va_addr, i * page_sz);
+		ms->iova = iova_addrs == NULL ? RTE_BAD_IOVA : iova_addrs[i];
+		ms->hugepage_sz = page_sz;
+		ms->len = page_sz;
+		ms->nchannel = rte_memory_get_nchannel();
+		ms->nrank = rte_memory_get_nrank();
+		ms->socket_id = heap->socket_id;
+	}
+
+	/* set up the memseg list */
+	msl->base_va = va_addr;
+	msl->page_sz = page_sz;
+	msl->socket_id = heap->socket_id;
+	msl->len = seg_len;
+	msl->version = 0;
+	msl->external = 1;
+
+	/* erase contents of new memory */
+	memset(va_addr, 0, seg_len);
+
+	/* now, add newly minted memory to the malloc heap */
+	malloc_heap_add_memory(heap, msl, va_addr, seg_len);
+
+	heap->total_size += seg_len;
+
+	/* all done! */
+	RTE_LOG(DEBUG, EAL, "Added segment for heap %s starting at %p\n",
+			heap->name, va_addr);
+
+	/* notify all subscribers that a new memory area has been added */
+	eal_memalloc_mem_event_notify(RTE_MEM_EVENT_ALLOC,
+			va_addr, seg_len);
+
+	return 0;
+}
+
+int
+malloc_heap_remove_external_memory(struct malloc_heap *heap, void *va_addr,
+		size_t len)
+{
+	struct malloc_elem *elem = heap->first;
+
+	/* find element with specified va address */
+	while (elem != NULL && elem != va_addr) {
+		elem = elem->next;
+		/* stop if we've blown past our VA */
+		if (elem > (struct malloc_elem *)va_addr) {
+			rte_errno = ENOENT;
+			return -1;
+		}
+	}
+	/* check if element was found */
+	if (elem == NULL || elem->msl->len != len) {
+		rte_errno = ENOENT;
+		return -1;
+	}
+	/* if element's size is not equal to segment len, segment is busy */
+	if (elem->state == ELEM_BUSY || elem->size != len) {
+		rte_errno = EBUSY;
+		return -1;
+	}
+	return destroy_seg(elem, len);
+}
+
+int
+malloc_heap_create(struct malloc_heap *heap, const char *heap_name)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	uint32_t next_socket_id = mcfg->next_socket_id;
+
+	/* prevent overflow. did you really create 2 billion heaps??? */
+	if (next_socket_id > INT32_MAX) {
+		RTE_LOG(ERR, EAL, "Cannot assign new socket ID's\n");
+		rte_errno = ENOSPC;
+		return -1;
+	}
+
+	/* initialize empty heap */
+	heap->alloc_count = 0;
+	heap->first = NULL;
+	heap->last = NULL;
+	LIST_INIT(heap->free_head);
+	rte_spinlock_init(&heap->lock);
+	heap->total_size = 0;
+	heap->socket_id = next_socket_id;
+
+	/* we hold a global mem hotplug writelock, so it's safe to increment */
+	mcfg->next_socket_id++;
+
+	/* set up name */
+	strlcpy(heap->name, heap_name, RTE_HEAP_NAME_MAX_LEN);
+	return 0;
+}
+
+int
+malloc_heap_destroy(struct malloc_heap *heap)
+{
+	if (heap->alloc_count != 0) {
+		RTE_LOG(ERR, EAL, "Heap is still in use\n");
+		rte_errno = EBUSY;
+		return -1;
+	}
+	if (heap->first != NULL || heap->last != NULL) {
+		RTE_LOG(ERR, EAL, "Heap still contains memory segments\n");
+		rte_errno = EBUSY;
+		return -1;
+	}
+	if (heap->total_size != 0)
+		RTE_LOG(ERR, EAL, "Total size not zero, heap is likely corrupt\n");
+
+	/* after this, the lock will be dropped */
+	memset(heap, 0, sizeof(*heap));
+
+	return 0;
+}
+
 int
 rte_eal_malloc_heap_init(void)
 {
 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	unsigned int i;
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		/* assign min socket ID to external heaps */
+		mcfg->next_socket_id = EXTERNAL_HEAP_MIN_SOCKET_ID;
+
+		/* assign names to default DPDK heaps */
+		for (i = 0; i < rte_socket_count(); i++) {
+			struct malloc_heap *heap = &mcfg->malloc_heaps[i];
+			char heap_name[RTE_HEAP_NAME_MAX_LEN];
+			int socket_id = rte_socket_id_by_idx(i);
+
+			snprintf(heap_name, sizeof(heap_name) - 1,
+					"socket_%i", socket_id);
+			strlcpy(heap->name, heap_name, RTE_HEAP_NAME_MAX_LEN);
+			heap->socket_id = socket_id;
+		}
+	}
+
 
 	if (register_mp_requests()) {
 		RTE_LOG(ERR, EAL, "Couldn't register malloc multiprocess actions\n");
diff --git a/lib/librte_eal/common/malloc_heap.h b/lib/librte_eal/common/malloc_heap.h
index f52cb555..e48996d5 100644
--- a/lib/librte_eal/common/malloc_heap.h
+++ b/lib/librte_eal/common/malloc_heap.h
@@ -34,6 +34,20 @@ malloc_heap_alloc_biggest(const char *type, int socket, unsigned int flags,
 		size_t align, bool contig);
 
 int
+malloc_heap_create(struct malloc_heap *heap, const char *heap_name);
+
+int
+malloc_heap_destroy(struct malloc_heap *heap);
+
+int
+malloc_heap_add_external_memory(struct malloc_heap *heap, void *va_addr,
+		rte_iova_t iova_addrs[], unsigned int n_pages, size_t page_sz);
+
+int
+malloc_heap_remove_external_memory(struct malloc_heap *heap, void *va_addr,
+		size_t len);
+
+int
 malloc_heap_free(struct malloc_elem *elem);
 
 int
@@ -47,6 +61,9 @@ void
 malloc_heap_dump(struct malloc_heap *heap, FILE *f);
 
 int
+malloc_socket_to_heap_id(unsigned int socket_id);
+
+int
 rte_eal_malloc_heap_init(void);
 
 #ifdef __cplusplus
diff --git a/lib/librte_eal/common/malloc_mp.c b/lib/librte_eal/common/malloc_mp.c
index 931c14bc..5f2d4e0b 100644
--- a/lib/librte_eal/common/malloc_mp.c
+++ b/lib/librte_eal/common/malloc_mp.c
@@ -194,13 +194,11 @@ handle_alloc_request(const struct malloc_mp_req *m,
 
 	/* we can't know in advance how many pages we'll need, so we malloc */
 	ms = malloc(sizeof(*ms) * n_segs);
-
-	memset(ms, 0, sizeof(*ms) * n_segs);
-
 	if (ms == NULL) {
 		RTE_LOG(ERR, EAL, "Couldn't allocate memory for request state\n");
 		goto fail;
 	}
+	memset(ms, 0, sizeof(*ms) * n_segs);
 
 	elem = alloc_pages_on_heap(heap, ar->page_sz, ar->elt_size, ar->socket,
 			ar->flags, ar->align, ar->bound, ar->contig, ms,
diff --git a/lib/librte_eal/common/meson.build b/lib/librte_eal/common/meson.build
index 56005bea..2a10d57d 100644
--- a/lib/librte_eal/common/meson.build
+++ b/lib/librte_eal/common/meson.build
@@ -14,6 +14,7 @@ common_sources = files(
 	'eal_common_errno.c',
 	'eal_common_fbarray.c',
 	'eal_common_hexdump.c',
+	'eal_common_hypervisor.c',
 	'eal_common_launch.c',
 	'eal_common_lcore.c',
 	'eal_common_log.c',
@@ -27,11 +28,13 @@ common_sources = files(
 	'eal_common_thread.c',
 	'eal_common_timer.c',
 	'eal_common_uuid.c',
+	'hotplug_mp.c',
 	'malloc_elem.c',
 	'malloc_heap.c',
 	'malloc_mp.c',
 	'rte_keepalive.c',
 	'rte_malloc.c',
+	'rte_option.c',
 	'rte_reciprocal.c',
 	'rte_service.c'
 )
@@ -59,6 +62,7 @@ common_headers = files(
 	'include/rte_errno.h',
 	'include/rte_fbarray.h',
 	'include/rte_hexdump.h',
+	'include/rte_hypervisor.h',
 	'include/rte_interrupts.h',
 	'include/rte_keepalive.h',
 	'include/rte_launch.h',
@@ -68,6 +72,7 @@ common_headers = files(
 	'include/rte_malloc_heap.h',
 	'include/rte_memory.h',
 	'include/rte_memzone.h',
+	'include/rte_option.h',
 	'include/rte_pci_dev_feature_defs.h',
 	'include/rte_pci_dev_features.h',
 	'include/rte_per_lcore.h',
diff --git a/lib/librte_eal/common/rte_malloc.c b/lib/librte_eal/common/rte_malloc.c
index b51a6d11..9e61dc41 100644
--- a/lib/librte_eal/common/rte_malloc.c
+++ b/lib/librte_eal/common/rte_malloc.c
@@ -8,6 +8,7 @@
 #include <string.h>
 #include <sys/queue.h>
 
+#include <rte_errno.h>
 #include <rte_memcpy.h>
 #include <rte_memory.h>
 #include <rte_eal.h>
@@ -23,6 +24,7 @@
 #include <rte_malloc.h>
 #include "malloc_elem.h"
 #include "malloc_heap.h"
+#include "eal_memalloc.h"
 
 
 /* Free the memory space back to heap */
@@ -44,13 +46,15 @@ rte_malloc_socket(const char *type, size_t size, unsigned int align,
 	if (size == 0 || (align && !rte_is_power_of_2(align)))
 		return NULL;
 
-	if (!rte_eal_has_hugepages())
+	/* if there are no hugepages and if we are not allocating from an
+	 * external heap, use memory from any socket available. checking for
+	 * socket being external may return -1 in case of invalid socket, but
+	 * that's OK - if there are no hugepages, it doesn't matter.
+	 */
+	if (rte_malloc_heap_socket_is_external(socket_arg) != 1 &&
+				!rte_eal_has_hugepages())
 		socket_arg = SOCKET_ID_ANY;
 
-	/* Check socket parameter */
-	if (socket_arg >= RTE_MAX_NUMA_NODES)
-		return NULL;
-
 	return malloc_heap_alloc(type, size, socket_arg, 0,
 			align == 0 ? 1 : align, 0, false);
 }
@@ -152,11 +156,20 @@ rte_malloc_get_socket_stats(int socket,
 		struct rte_malloc_socket_stats *socket_stats)
 {
 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	int heap_idx, ret = -1;
 
-	if (socket >= RTE_MAX_NUMA_NODES || socket < 0)
-		return -1;
+	rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+
+	heap_idx = malloc_socket_to_heap_id(socket);
+	if (heap_idx < 0)
+		goto unlock;
 
-	return malloc_heap_get_stats(&mcfg->malloc_heaps[socket], socket_stats);
+	ret = malloc_heap_get_stats(&mcfg->malloc_heaps[heap_idx],
+			socket_stats);
+unlock:
+	rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+
+	return ret;
 }
 
 /*
@@ -168,12 +181,75 @@ rte_malloc_dump_heaps(FILE *f)
 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
 	unsigned int idx;
 
-	for (idx = 0; idx < rte_socket_count(); idx++) {
-		unsigned int socket = rte_socket_id_by_idx(idx);
-		fprintf(f, "Heap on socket %i:\n", socket);
-		malloc_heap_dump(&mcfg->malloc_heaps[socket], f);
+	rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+
+	for (idx = 0; idx < RTE_MAX_HEAPS; idx++) {
+		fprintf(f, "Heap id: %u\n", idx);
+		malloc_heap_dump(&mcfg->malloc_heaps[idx], f);
+	}
+
+	rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+}
+
+int
+rte_malloc_heap_get_socket(const char *name)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	struct malloc_heap *heap = NULL;
+	unsigned int idx;
+	int ret;
+
+	if (name == NULL ||
+			strnlen(name, RTE_HEAP_NAME_MAX_LEN) == 0 ||
+			strnlen(name, RTE_HEAP_NAME_MAX_LEN) ==
+				RTE_HEAP_NAME_MAX_LEN) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+	rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+	for (idx = 0; idx < RTE_MAX_HEAPS; idx++) {
+		struct malloc_heap *tmp = &mcfg->malloc_heaps[idx];
+
+		if (!strncmp(name, tmp->name, RTE_HEAP_NAME_MAX_LEN)) {
+			heap = tmp;
+			break;
+		}
+	}
+
+	if (heap != NULL) {
+		ret = heap->socket_id;
+	} else {
+		rte_errno = ENOENT;
+		ret = -1;
+	}
+	rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+
+	return ret;
+}
+
+int
+rte_malloc_heap_socket_is_external(int socket_id)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	unsigned int idx;
+	int ret = -1;
+
+	if (socket_id == SOCKET_ID_ANY)
+		return 0;
+
+	rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+	for (idx = 0; idx < RTE_MAX_HEAPS; idx++) {
+		struct malloc_heap *tmp = &mcfg->malloc_heaps[idx];
+
+		if ((int)tmp->socket_id == socket_id) {
+			/* external memory always has large socket ID's */
+			ret = tmp->socket_id >= RTE_MAX_NUMA_NODES;
+			break;
+		}
 	}
+	rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
 
+	return ret;
 }
 
 /*
@@ -182,14 +258,20 @@ rte_malloc_dump_heaps(FILE *f)
 void
 rte_malloc_dump_stats(FILE *f, __rte_unused const char *type)
 {
-	unsigned int socket;
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	unsigned int heap_id;
 	struct rte_malloc_socket_stats sock_stats;
+
+	rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+
 	/* Iterate through all initialised heaps */
-	for (socket=0; socket< RTE_MAX_NUMA_NODES; socket++) {
-		if ((rte_malloc_get_socket_stats(socket, &sock_stats) < 0))
-			continue;
+	for (heap_id = 0; heap_id < RTE_MAX_HEAPS; heap_id++) {
+		struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id];
+
+		malloc_heap_get_stats(heap, &sock_stats);
 
-		fprintf(f, "Socket:%u\n", socket);
+		fprintf(f, "Heap id:%u\n", heap_id);
+		fprintf(f, "\tHeap name:%s\n", heap->name);
 		fprintf(f, "\tHeap_size:%zu,\n", sock_stats.heap_totalsz_bytes);
 		fprintf(f, "\tFree_size:%zu,\n", sock_stats.heap_freesz_bytes);
 		fprintf(f, "\tAlloc_size:%zu,\n", sock_stats.heap_allocsz_bytes);
@@ -198,6 +280,7 @@ rte_malloc_dump_stats(FILE *f, __rte_unused const char *type)
 		fprintf(f, "\tAlloc_count:%u,\n",sock_stats.alloc_count);
 		fprintf(f, "\tFree_count:%u,\n", sock_stats.free_count);
 	}
+	rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
 	return;
 }
 
@@ -223,7 +306,7 @@ rte_malloc_virt2iova(const void *addr)
 	if (elem == NULL)
 		return RTE_BAD_IOVA;
 
-	if (rte_eal_iova_mode() == RTE_IOVA_VA)
+	if (!elem->msl->external && rte_eal_iova_mode() == RTE_IOVA_VA)
 		return (uintptr_t) addr;
 
 	ms = rte_mem_virt2memseg(addr, elem->msl);
@@ -235,3 +318,320 @@ rte_malloc_virt2iova(const void *addr)
 
 	return ms->iova + RTE_PTR_DIFF(addr, ms->addr);
 }
+
+static struct malloc_heap *
+find_named_heap(const char *name)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	unsigned int i;
+
+	for (i = 0; i < RTE_MAX_HEAPS; i++) {
+		struct malloc_heap *heap = &mcfg->malloc_heaps[i];
+
+		if (!strncmp(name, heap->name, RTE_HEAP_NAME_MAX_LEN))
+			return heap;
+	}
+	return NULL;
+}
+
+int
+rte_malloc_heap_memory_add(const char *heap_name, void *va_addr, size_t len,
+		rte_iova_t iova_addrs[], unsigned int n_pages, size_t page_sz)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	struct malloc_heap *heap = NULL;
+	unsigned int n;
+	int ret;
+
+	if (heap_name == NULL || va_addr == NULL ||
+			page_sz == 0 || !rte_is_power_of_2(page_sz) ||
+			strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) == 0 ||
+			strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) ==
+				RTE_HEAP_NAME_MAX_LEN) {
+		rte_errno = EINVAL;
+		ret = -1;
+		goto unlock;
+	}
+	rte_rwlock_write_lock(&mcfg->memory_hotplug_lock);
+
+	/* find our heap */
+	heap = find_named_heap(heap_name);
+	if (heap == NULL) {
+		rte_errno = ENOENT;
+		ret = -1;
+		goto unlock;
+	}
+	if (heap->socket_id < RTE_MAX_NUMA_NODES) {
+		/* cannot add memory to internal heaps */
+		rte_errno = EPERM;
+		ret = -1;
+		goto unlock;
+	}
+	n = len / page_sz;
+	if (n != n_pages && iova_addrs != NULL) {
+		rte_errno = EINVAL;
+		ret = -1;
+		goto unlock;
+	}
+
+	rte_spinlock_lock(&heap->lock);
+	ret = malloc_heap_add_external_memory(heap, va_addr, iova_addrs, n,
+			page_sz);
+	rte_spinlock_unlock(&heap->lock);
+
+unlock:
+	rte_rwlock_write_unlock(&mcfg->memory_hotplug_lock);
+
+	return ret;
+}
+
+int
+rte_malloc_heap_memory_remove(const char *heap_name, void *va_addr, size_t len)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	struct malloc_heap *heap = NULL;
+	int ret;
+
+	if (heap_name == NULL || va_addr == NULL || len == 0 ||
+			strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) == 0 ||
+			strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) ==
+				RTE_HEAP_NAME_MAX_LEN) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+	rte_rwlock_write_lock(&mcfg->memory_hotplug_lock);
+	/* find our heap */
+	heap = find_named_heap(heap_name);
+	if (heap == NULL) {
+		rte_errno = ENOENT;
+		ret = -1;
+		goto unlock;
+	}
+	if (heap->socket_id < RTE_MAX_NUMA_NODES) {
+		/* cannot remove memory from internal heaps */
+		rte_errno = EPERM;
+		ret = -1;
+		goto unlock;
+	}
+
+	rte_spinlock_lock(&heap->lock);
+	ret = malloc_heap_remove_external_memory(heap, va_addr, len);
+	rte_spinlock_unlock(&heap->lock);
+
+unlock:
+	rte_rwlock_write_unlock(&mcfg->memory_hotplug_lock);
+
+	return ret;
+}
+
+struct sync_mem_walk_arg {
+	void *va_addr;
+	size_t len;
+	int result;
+	bool attach;
+};
+
+static int
+sync_mem_walk(const struct rte_memseg_list *msl, void *arg)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	struct sync_mem_walk_arg *wa = arg;
+	size_t len = msl->page_sz * msl->memseg_arr.len;
+
+	if (msl->base_va == wa->va_addr &&
+			len == wa->len) {
+		struct rte_memseg_list *found_msl;
+		int msl_idx, ret;
+
+		/* msl is const */
+		msl_idx = msl - mcfg->memsegs;
+		found_msl = &mcfg->memsegs[msl_idx];
+
+		if (wa->attach) {
+			ret = rte_fbarray_attach(&found_msl->memseg_arr);
+		} else {
+			/* notify all subscribers that a memory area is about to
+			 * be removed
+			 */
+			eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE,
+					msl->base_va, msl->len);
+			ret = rte_fbarray_detach(&found_msl->memseg_arr);
+		}
+
+		if (ret < 0) {
+			wa->result = -rte_errno;
+		} else {
+			/* notify all subscribers that a new memory area was
+			 * added
+			 */
+			if (wa->attach)
+				eal_memalloc_mem_event_notify(
+						RTE_MEM_EVENT_ALLOC,
+						msl->base_va, msl->len);
+			wa->result = 0;
+		}
+		return 1;
+	}
+	return 0;
+}
+
+static int
+sync_memory(const char *heap_name, void *va_addr, size_t len, bool attach)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	struct malloc_heap *heap = NULL;
+	struct sync_mem_walk_arg wa;
+	int ret;
+
+	if (heap_name == NULL || va_addr == NULL || len == 0 ||
+			strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) == 0 ||
+			strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) ==
+				RTE_HEAP_NAME_MAX_LEN) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+	rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+
+	/* find our heap */
+	heap = find_named_heap(heap_name);
+	if (heap == NULL) {
+		rte_errno = ENOENT;
+		ret = -1;
+		goto unlock;
+	}
+	/* we shouldn't be able to sync to internal heaps */
+	if (heap->socket_id < RTE_MAX_NUMA_NODES) {
+		rte_errno = EPERM;
+		ret = -1;
+		goto unlock;
+	}
+
+	/* find corresponding memseg list to sync to */
+	wa.va_addr = va_addr;
+	wa.len = len;
+	wa.result = -ENOENT; /* fail unless explicitly told to succeed */
+	wa.attach = attach;
+
+	/* we're already holding a read lock */
+	rte_memseg_list_walk_thread_unsafe(sync_mem_walk, &wa);
+
+	if (wa.result < 0) {
+		rte_errno = -wa.result;
+		ret = -1;
+	} else {
+		/* notify all subscribers that a new memory area was added */
+		if (attach)
+			eal_memalloc_mem_event_notify(RTE_MEM_EVENT_ALLOC,
+					va_addr, len);
+		ret = 0;
+	}
+unlock:
+	rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+	return ret;
+}
+
+int
+rte_malloc_heap_memory_attach(const char *heap_name, void *va_addr, size_t len)
+{
+	return sync_memory(heap_name, va_addr, len, true);
+}
+
+int
+rte_malloc_heap_memory_detach(const char *heap_name, void *va_addr, size_t len)
+{
+	return sync_memory(heap_name, va_addr, len, false);
+}
+
+int
+rte_malloc_heap_create(const char *heap_name)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	struct malloc_heap *heap = NULL;
+	int i, ret;
+
+	if (heap_name == NULL ||
+			strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) == 0 ||
+			strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) ==
+				RTE_HEAP_NAME_MAX_LEN) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+	/* check if there is space in the heap list, or if heap with this name
+	 * already exists.
+	 */
+	rte_rwlock_write_lock(&mcfg->memory_hotplug_lock);
+
+	for (i = 0; i < RTE_MAX_HEAPS; i++) {
+		struct malloc_heap *tmp = &mcfg->malloc_heaps[i];
+		/* existing heap */
+		if (strncmp(heap_name, tmp->name,
+				RTE_HEAP_NAME_MAX_LEN) == 0) {
+			RTE_LOG(ERR, EAL, "Heap %s already exists\n",
+				heap_name);
+			rte_errno = EEXIST;
+			ret = -1;
+			goto unlock;
+		}
+		/* empty heap */
+		if (strnlen(tmp->name, RTE_HEAP_NAME_MAX_LEN) == 0) {
+			heap = tmp;
+			break;
+		}
+	}
+	if (heap == NULL) {
+		RTE_LOG(ERR, EAL, "Cannot create new heap: no space\n");
+		rte_errno = ENOSPC;
+		ret = -1;
+		goto unlock;
+	}
+
+	/* we're sure that we can create a new heap, so do it */
+	ret = malloc_heap_create(heap, heap_name);
+unlock:
+	rte_rwlock_write_unlock(&mcfg->memory_hotplug_lock);
+
+	return ret;
+}
+
+int
+rte_malloc_heap_destroy(const char *heap_name)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	struct malloc_heap *heap = NULL;
+	int ret;
+
+	if (heap_name == NULL ||
+			strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) == 0 ||
+			strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) ==
+				RTE_HEAP_NAME_MAX_LEN) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+	rte_rwlock_write_lock(&mcfg->memory_hotplug_lock);
+
+	/* start from non-socket heaps */
+	heap = find_named_heap(heap_name);
+	if (heap == NULL) {
+		RTE_LOG(ERR, EAL, "Heap %s not found\n", heap_name);
+		rte_errno = ENOENT;
+		ret = -1;
+		goto unlock;
+	}
+	/* we shouldn't be able to destroy internal heaps */
+	if (heap->socket_id < RTE_MAX_NUMA_NODES) {
+		rte_errno = EPERM;
+		ret = -1;
+		goto unlock;
+	}
+	/* sanity checks done, now we can destroy the heap */
+	rte_spinlock_lock(&heap->lock);
+	ret = malloc_heap_destroy(heap);
+
+	/* if we failed, lock is still active */
+	if (ret < 0)
+		rte_spinlock_unlock(&heap->lock);
+unlock:
+	rte_rwlock_write_unlock(&mcfg->memory_hotplug_lock);
+
+	return ret;
+}
diff --git a/lib/librte_eal/common/rte_option.c b/lib/librte_eal/common/rte_option.c
new file mode 100644
index 00000000..02d59a86
--- /dev/null
+++ b/lib/librte_eal/common/rte_option.c
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation.
+ */
+
+#include <unistd.h>
+#include <string.h>
+
+#include <rte_eal.h>
+#include <rte_option.h>
+
+#include "eal_private.h"
+
+TAILQ_HEAD(rte_option_list, rte_option);
+
+struct rte_option_list rte_option_list =
+	TAILQ_HEAD_INITIALIZER(rte_option_list);
+
+static struct rte_option *option;
+
+int
+rte_option_parse(const char *opt)
+{
+	/* Check if the option is registered */
+	TAILQ_FOREACH(option, &rte_option_list, next) {
+		if (strcmp(opt, option->opt_str) == 0) {
+			option->enabled = 1;
+			return 0;
+		}
+	}
+
+	return -1;
+}
+
+void __rte_experimental
+rte_option_register(struct rte_option *opt)
+{
+	TAILQ_FOREACH(option, &rte_option_list, next) {
+		if (strcmp(opt->opt_str, option->opt_str) == 0)
+			RTE_LOG(INFO, EAL, "Option %s has already been registered.",
+					opt->opt_str);
+			return;
+	}
+
+	TAILQ_INSERT_HEAD(&rte_option_list, opt, next);
+}
+
+void
+rte_option_init(void)
+{
+	TAILQ_FOREACH(option, &rte_option_list, next) {
+		if (option->enabled)
+			option->cb();
+	}
+}
diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile
index fd92c75c..51deb579 100644
--- a/lib/librte_eal/linuxapp/eal/Makefile
+++ b/lib/librte_eal/linuxapp/eal/Makefile
@@ -10,7 +10,7 @@ ARCH_DIR ?= $(RTE_ARCH)
 EXPORT_MAP := ../../rte_eal_version.map
 VPATH += $(RTE_SDK)/lib/librte_eal/common/arch/$(ARCH_DIR)
 
-LIBABIVER := 8
+LIBABIVER := 9
 
 VPATH += $(RTE_SDK)/lib/librte_eal/common
 
@@ -70,10 +70,12 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_proc.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_fbarray.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_uuid.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_malloc.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += hotplug_mp.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += malloc_elem.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += malloc_heap.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += malloc_mp.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_keepalive.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_option.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_service.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_reciprocal.c
 
@@ -85,22 +87,6 @@ SRCS-y += rte_cycles.c
 
 CFLAGS_eal_common_cpuflags.o := $(CPUFLAGS_LIST)
 
-CFLAGS_eal.o := -D_GNU_SOURCE
-CFLAGS_eal_interrupts.o := -D_GNU_SOURCE
-CFLAGS_eal_vfio_mp_sync.o := -D_GNU_SOURCE
-CFLAGS_eal_timer.o := -D_GNU_SOURCE
-CFLAGS_eal_lcore.o := -D_GNU_SOURCE
-CFLAGS_eal_memalloc.o := -D_GNU_SOURCE
-CFLAGS_eal_thread.o := -D_GNU_SOURCE
-CFLAGS_eal_log.o := -D_GNU_SOURCE
-CFLAGS_eal_common_log.o := -D_GNU_SOURCE
-CFLAGS_eal_hugepage_info.o := -D_GNU_SOURCE
-CFLAGS_eal_common_whitelist.o := -D_GNU_SOURCE
-CFLAGS_eal_common_options.o := -D_GNU_SOURCE
-CFLAGS_eal_common_thread.o := -D_GNU_SOURCE
-CFLAGS_eal_common_lcore.o := -D_GNU_SOURCE
-CFLAGS_rte_cycles.o := -D_GNU_SOURCE
-
 # workaround for a gcc bug with noreturn attribute
 # http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
 ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
index e59ac657..361744d4 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -48,6 +48,7 @@
 #include <rte_atomic.h>
 #include <malloc_heap.h>
 #include <rte_vfio.h>
+#include <rte_option.h>
 
 #include "eal_private.h"
 #include "eal_thread.h"
@@ -149,7 +150,7 @@ eal_create_runtime_dir(void)
 }
 
 const char *
-eal_get_runtime_dir(void)
+rte_eal_get_runtime_dir(void)
 {
 	return runtime_dir;
 }
@@ -263,6 +264,8 @@ rte_eal_config_create(void)
 	 * processes could later map the config into this exact location */
 	rte_config.mem_config->mem_cfg_addr = (uintptr_t) rte_mem_cfg_addr;
 
+	rte_config.mem_config->dma_maskbits = 0;
+
 }
 
 /* attach to an existing shared memory config */
@@ -352,6 +355,24 @@ eal_proc_type_detect(void)
 	return ptype;
 }
 
+/* copies data from internal config to shared config */
+static void
+eal_update_mem_config(void)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	mcfg->legacy_mem = internal_config.legacy_mem;
+	mcfg->single_file_segments = internal_config.single_file_segments;
+}
+
+/* copies data from shared config to internal config */
+static void
+eal_update_internal_config(void)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	internal_config.legacy_mem = mcfg->legacy_mem;
+	internal_config.single_file_segments = mcfg->single_file_segments;
+}
+
 /* Sets up rte_config structure with the pointer to shared memory config.*/
 static void
 rte_config_init(void)
@@ -361,11 +382,13 @@ rte_config_init(void)
 	switch (rte_config.process_type){
 	case RTE_PROC_PRIMARY:
 		rte_eal_config_create();
+		eal_update_mem_config();
 		break;
 	case RTE_PROC_SECONDARY:
 		rte_eal_config_attach();
 		rte_eal_mcfg_wait_complete(rte_config.mem_config);
 		rte_eal_config_reattach();
+		eal_update_internal_config();
 		break;
 	case RTE_PROC_AUTO:
 	case RTE_PROC_INVALID:
@@ -580,12 +603,20 @@ eal_parse_args(int argc, char **argv)
 
 	argvopt = argv;
 	optind = 1;
+	opterr = 0;
 
 	while ((opt = getopt_long(argc, argvopt, eal_short_options,
 				  eal_long_options, &option_index)) != EOF) {
 
-		/* getopt is not happy, stop right now */
+		/*
+		 * getopt didn't recognise the option, lets parse the
+		 * registered options to see if the flag is valid
+		 */
 		if (opt == '?') {
+			ret = rte_option_parse(argv[optind-1]);
+			if (ret == 0)
+				continue;
+
 			eal_usage(prgname);
 			ret = -1;
 			goto out;
@@ -725,6 +756,9 @@ check_socket(const struct rte_memseg_list *msl, void *arg)
 {
 	int *socket_id = arg;
 
+	if (msl->external)
+		return 0;
+
 	return *socket_id == msl->socket_id;
 }
 
@@ -793,7 +827,8 @@ rte_eal_init(int argc, char **argv)
 	int i, fctret, ret;
 	pthread_t thread_id;
 	static rte_atomic32_t run_once = RTE_ATOMIC32_INIT(0);
-	const char *logid;
+	const char *p;
+	static char logid[PATH_MAX];
 	char cpuset[RTE_CPU_AFFINITY_STR_LEN];
 	char thread_name[RTE_MAX_THREAD_NAME_LEN];
 
@@ -810,9 +845,8 @@ rte_eal_init(int argc, char **argv)
 		return -1;
 	}
 
-	logid = strrchr(argv[0], '/');
-	logid = strdup(logid ? logid + 1: argv[0]);
-
+	p = strrchr(argv[0], '/');
+	strlcpy(logid, p ? p + 1 : argv[0], sizeof(logid));
 	thread_id = pthread_self();
 
 	eal_reset_internal_config(&internal_config);
@@ -835,7 +869,7 @@ rte_eal_init(int argc, char **argv)
 	}
 
 	if (eal_plugins_init() < 0) {
-		rte_eal_init_alert("Cannot init plugins\n");
+		rte_eal_init_alert("Cannot init plugins");
 		rte_errno = EINVAL;
 		rte_atomic32_clear(&run_once);
 		return -1;
@@ -850,7 +884,7 @@ rte_eal_init(int argc, char **argv)
 	rte_config_init();
 
 	if (rte_eal_intr_init() < 0) {
-		rte_eal_init_alert("Cannot init interrupt-handling thread\n");
+		rte_eal_init_alert("Cannot init interrupt-handling thread");
 		return -1;
 	}
 
@@ -858,30 +892,43 @@ rte_eal_init(int argc, char **argv)
 	 * bus through mp channel in the secondary process before the bus scan.
 	 */
 	if (rte_mp_channel_init() < 0) {
-		rte_eal_init_alert("failed to init mp channel\n");
+		rte_eal_init_alert("failed to init mp channel");
 		if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
 			rte_errno = EFAULT;
 			return -1;
 		}
 	}
 
+	/* register multi-process action callbacks for hotplug */
+	if (rte_mp_dev_hotplug_init() < 0) {
+		rte_eal_init_alert("failed to register mp callback for hotplug");
+		return -1;
+	}
+
 	if (rte_bus_scan()) {
-		rte_eal_init_alert("Cannot scan the buses for devices\n");
+		rte_eal_init_alert("Cannot scan the buses for devices");
 		rte_errno = ENODEV;
 		rte_atomic32_clear(&run_once);
 		return -1;
 	}
 
-	/* autodetect the iova mapping mode (default is iova_pa) */
-	rte_eal_get_configuration()->iova_mode = rte_bus_get_iommu_class();
-
-	/* Workaround for KNI which requires physical address to work */
-	if (rte_eal_get_configuration()->iova_mode == RTE_IOVA_VA &&
-			rte_eal_check_module("rte_kni") == 1) {
-		rte_eal_get_configuration()->iova_mode = RTE_IOVA_PA;
-		RTE_LOG(WARNING, EAL,
-			"Some devices want IOVA as VA but PA will be used because.. "
-			"KNI module inserted\n");
+	/* if no EAL option "--iova-mode=<pa|va>", use bus IOVA scheme */
+	if (internal_config.iova_mode == RTE_IOVA_DC) {
+		/* autodetect the IOVA mapping mode (default is RTE_IOVA_PA) */
+		rte_eal_get_configuration()->iova_mode =
+			rte_bus_get_iommu_class();
+
+		/* Workaround for KNI which requires physical address to work */
+		if (rte_eal_get_configuration()->iova_mode == RTE_IOVA_VA &&
+				rte_eal_check_module("rte_kni") == 1) {
+			rte_eal_get_configuration()->iova_mode = RTE_IOVA_PA;
+			RTE_LOG(WARNING, EAL,
+				"Some devices want IOVA as VA but PA will be used because.. "
+				"KNI module inserted\n");
+		}
+	} else {
+		rte_eal_get_configuration()->iova_mode =
+			internal_config.iova_mode;
 	}
 
 	if (internal_config.no_hugetlbfs == 0) {
@@ -924,7 +971,7 @@ rte_eal_init(int argc, char **argv)
 
 #ifdef VFIO_PRESENT
 	if (rte_eal_vfio_setup() < 0) {
-		rte_eal_init_alert("Cannot init VFIO\n");
+		rte_eal_init_alert("Cannot init VFIO");
 		rte_errno = EAGAIN;
 		rte_atomic32_clear(&run_once);
 		return -1;
@@ -935,13 +982,13 @@ rte_eal_init(int argc, char **argv)
 	 * initialize memzones first.
 	 */
 	if (rte_eal_memzone_init() < 0) {
-		rte_eal_init_alert("Cannot init memzone\n");
+		rte_eal_init_alert("Cannot init memzone");
 		rte_errno = ENODEV;
 		return -1;
 	}
 
 	if (rte_eal_memory_init() < 0) {
-		rte_eal_init_alert("Cannot init memory\n");
+		rte_eal_init_alert("Cannot init memory");
 		rte_errno = ENOMEM;
 		return -1;
 	}
@@ -950,25 +997,25 @@ rte_eal_init(int argc, char **argv)
 	eal_hugedirs_unlock();
 
 	if (rte_eal_malloc_heap_init() < 0) {
-		rte_eal_init_alert("Cannot init malloc heap\n");
+		rte_eal_init_alert("Cannot init malloc heap");
 		rte_errno = ENODEV;
 		return -1;
 	}
 
 	if (rte_eal_tailqs_init() < 0) {
-		rte_eal_init_alert("Cannot init tail queues for objects\n");
+		rte_eal_init_alert("Cannot init tail queues for objects");
 		rte_errno = EFAULT;
 		return -1;
 	}
 
 	if (rte_eal_alarm_init() < 0) {
-		rte_eal_init_alert("Cannot init interrupt-handling thread\n");
+		rte_eal_init_alert("Cannot init interrupt-handling thread");
 		/* rte_eal_alarm_init sets rte_errno on failure. */
 		return -1;
 	}
 
 	if (rte_eal_timer_init() < 0) {
-		rte_eal_init_alert("Cannot init HPET or TSC timers\n");
+		rte_eal_init_alert("Cannot init HPET or TSC timers");
 		rte_errno = ENOTSUP;
 		return -1;
 	}
@@ -979,8 +1026,8 @@ rte_eal_init(int argc, char **argv)
 
 	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
 
-	RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%x;cpuset=[%s%s])\n",
-		rte_config.master_lcore, (int)thread_id, cpuset,
+	RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%zx;cpuset=[%s%s])\n",
+		rte_config.master_lcore, (uintptr_t)thread_id, cpuset,
 		ret == 0 ? "" : "...");
 
 	RTE_LCORE_FOREACH_SLAVE(i) {
@@ -1022,14 +1069,14 @@ rte_eal_init(int argc, char **argv)
 	/* initialize services so vdevs register service during bus_probe. */
 	ret = rte_service_init();
 	if (ret) {
-		rte_eal_init_alert("rte_service_init() failed\n");
+		rte_eal_init_alert("rte_service_init() failed");
 		rte_errno = ENOEXEC;
 		return -1;
 	}
 
 	/* Probe all the buses and devices/drivers on them */
 	if (rte_bus_probe()) {
-		rte_eal_init_alert("Cannot probe devices\n");
+		rte_eal_init_alert("Cannot probe devices");
 		rte_errno = ENOTSUP;
 		return -1;
 	}
@@ -1051,6 +1098,9 @@ rte_eal_init(int argc, char **argv)
 
 	rte_eal_mcfg_complete();
 
+	/* Call each registered callback, if enabled */
+	rte_option_init();
+
 	return fctret;
 }
 
@@ -1059,7 +1109,12 @@ mark_freeable(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
 		void *arg __rte_unused)
 {
 	/* ms is const, so find this memseg */
-	struct rte_memseg *found = rte_mem_virt2memseg(ms->addr, msl);
+	struct rte_memseg *found;
+
+	if (msl->external)
+		return 0;
+
+	found = rte_mem_virt2memseg(ms->addr, msl);
 
 	found->flags &= ~RTE_MEMSEG_FLAG_DO_NOT_FREE;
 
diff --git a/lib/librte_eal/linuxapp/eal/eal_dev.c b/lib/librte_eal/linuxapp/eal/eal_dev.c
index 1cf6aebf..d589c692 100644
--- a/lib/librte_eal/linuxapp/eal/eal_dev.c
+++ b/lib/librte_eal/linuxapp/eal/eal_dev.c
@@ -4,6 +4,8 @@
 
 #include <string.h>
 #include <unistd.h>
+#include <fcntl.h>
+#include <signal.h>
 #include <sys/socket.h>
 #include <linux/netlink.h>
 
@@ -14,15 +16,32 @@
 #include <rte_malloc.h>
 #include <rte_interrupts.h>
 #include <rte_alarm.h>
+#include <rte_bus.h>
+#include <rte_eal.h>
+#include <rte_spinlock.h>
+#include <rte_errno.h>
 
 #include "eal_private.h"
 
 static struct rte_intr_handle intr_handle = {.fd = -1 };
 static bool monitor_started;
+static bool hotplug_handle;
 
 #define EAL_UEV_MSG_LEN 4096
 #define EAL_UEV_MSG_ELEM_LEN 128
 
+/*
+ * spinlock for device hot-unplug failure handling. If it try to access bus or
+ * device, such as handle sigbus on bus or handle memory failure for device
+ * just need to use this lock. It could protect the bus and the device to avoid
+ * race condition.
+ */
+static rte_spinlock_t failure_handle_lock = RTE_SPINLOCK_INITIALIZER;
+
+static struct sigaction sigbus_action_old;
+
+static int sigbus_need_recover;
+
 static void dev_uev_handler(__rte_unused void *param);
 
 /* identify the system layer which reports this event. */
@@ -33,6 +52,55 @@ enum eal_dev_event_subsystem {
 	EAL_DEV_EVENT_SUBSYSTEM_MAX
 };
 
+static void
+sigbus_action_recover(void)
+{
+	if (sigbus_need_recover) {
+		sigaction(SIGBUS, &sigbus_action_old, NULL);
+		sigbus_need_recover = 0;
+	}
+}
+
+static void sigbus_handler(int signum, siginfo_t *info,
+				void *ctx __rte_unused)
+{
+	int ret;
+
+	RTE_LOG(DEBUG, EAL, "Thread[%d] catch SIGBUS, fault address:%p\n",
+		(int)pthread_self(), info->si_addr);
+
+	rte_spinlock_lock(&failure_handle_lock);
+	ret = rte_bus_sigbus_handler(info->si_addr);
+	rte_spinlock_unlock(&failure_handle_lock);
+	if (ret == -1) {
+		rte_exit(EXIT_FAILURE,
+			 "Failed to handle SIGBUS for hot-unplug, "
+			 "(rte_errno: %s)!", strerror(rte_errno));
+	} else if (ret == 1) {
+		if (sigbus_action_old.sa_flags == SA_SIGINFO
+		    && sigbus_action_old.sa_sigaction) {
+			(*(sigbus_action_old.sa_sigaction))(signum,
+							    info, ctx);
+		} else if (sigbus_action_old.sa_flags != SA_SIGINFO
+			   && sigbus_action_old.sa_handler) {
+			(*(sigbus_action_old.sa_handler))(signum);
+		} else {
+			rte_exit(EXIT_FAILURE,
+				 "Failed to handle generic SIGBUS!");
+		}
+	}
+
+	RTE_LOG(DEBUG, EAL, "Success to handle SIGBUS for hot-unplug!\n");
+}
+
+static int cmp_dev_name(const struct rte_device *dev,
+	const void *_name)
+{
+	const char *name = _name;
+
+	return strcmp(dev->name, name);
+}
+
 static int
 dev_uev_socket_fd_create(void)
 {
@@ -147,6 +215,9 @@ dev_uev_handler(__rte_unused void *param)
 	struct rte_dev_event uevent;
 	int ret;
 	char buf[EAL_UEV_MSG_LEN];
+	struct rte_bus *bus;
+	struct rte_device *dev;
+	const char *busname = "";
 
 	memset(&uevent, 0, sizeof(struct rte_dev_event));
 	memset(buf, 0, EAL_UEV_MSG_LEN);
@@ -171,8 +242,43 @@ dev_uev_handler(__rte_unused void *param)
 	RTE_LOG(DEBUG, EAL, "receive uevent(name:%s, type:%d, subsystem:%d)\n",
 		uevent.devname, uevent.type, uevent.subsystem);
 
-	if (uevent.devname)
-		dev_callback_process(uevent.devname, uevent.type);
+	switch (uevent.subsystem) {
+	case EAL_DEV_EVENT_SUBSYSTEM_PCI:
+	case EAL_DEV_EVENT_SUBSYSTEM_UIO:
+		busname = "pci";
+		break;
+	default:
+		break;
+	}
+
+	if (uevent.devname) {
+		if (uevent.type == RTE_DEV_EVENT_REMOVE && hotplug_handle) {
+			rte_spinlock_lock(&failure_handle_lock);
+			bus = rte_bus_find_by_name(busname);
+			if (bus == NULL) {
+				RTE_LOG(ERR, EAL, "Cannot find bus (%s)\n",
+					busname);
+				return;
+			}
+
+			dev = bus->find_device(NULL, cmp_dev_name,
+					       uevent.devname);
+			if (dev == NULL) {
+				RTE_LOG(ERR, EAL, "Cannot find device (%s) on "
+					"bus (%s)\n", uevent.devname, busname);
+				return;
+			}
+
+			ret = bus->hot_unplug_handler(dev);
+			rte_spinlock_unlock(&failure_handle_lock);
+			if (ret) {
+				RTE_LOG(ERR, EAL, "Can not handle hot-unplug "
+					"for device (%s)\n", dev->name);
+				return;
+			}
+		}
+		rte_dev_event_callback_process(uevent.devname, uevent.type);
+	}
 }
 
 int __rte_experimental
@@ -220,5 +326,67 @@ rte_dev_event_monitor_stop(void)
 	close(intr_handle.fd);
 	intr_handle.fd = -1;
 	monitor_started = false;
+
 	return 0;
 }
+
+int
+dev_sigbus_handler_register(void)
+{
+	sigset_t mask;
+	struct sigaction action;
+
+	rte_errno = 0;
+
+	if (sigbus_need_recover)
+		return 0;
+
+	sigemptyset(&mask);
+	sigaddset(&mask, SIGBUS);
+	action.sa_flags = SA_SIGINFO;
+	action.sa_mask = mask;
+	action.sa_sigaction = sigbus_handler;
+	sigbus_need_recover = !sigaction(SIGBUS, &action, &sigbus_action_old);
+
+	return rte_errno;
+}
+
+int
+dev_sigbus_handler_unregister(void)
+{
+	rte_errno = 0;
+
+	sigbus_action_recover();
+
+	return rte_errno;
+}
+
+int __rte_experimental
+rte_dev_hotplug_handle_enable(void)
+{
+	int ret = 0;
+
+	ret = dev_sigbus_handler_register();
+	if (ret < 0)
+		RTE_LOG(ERR, EAL,
+			"fail to register sigbus handler for devices.\n");
+
+	hotplug_handle = true;
+
+	return ret;
+}
+
+int __rte_experimental
+rte_dev_hotplug_handle_disable(void)
+{
+	int ret = 0;
+
+	ret = dev_sigbus_handler_unregister();
+	if (ret < 0)
+		RTE_LOG(ERR, EAL,
+			"fail to unregister sigbus handler for devices.\n");
+
+	hotplug_handle = false;
+
+	return ret;
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c
index 3a7d4b22..0eab1cf7 100644
--- a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c
+++ b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c
@@ -6,6 +6,7 @@
 #include <sys/types.h>
 #include <sys/file.h>
 #include <dirent.h>
+#include <fcntl.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include <stdio.h>
diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
index 4076c6d6..39252a88 100644
--- a/lib/librte_eal/linuxapp/eal/eal_interrupts.c
+++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
@@ -33,6 +33,7 @@
 #include <rte_errno.h>
 #include <rte_spinlock.h>
 #include <rte_pause.h>
+#include <rte_vfio.h>
 
 #include "eal_private.h"
 #include "eal_vfio.h"
@@ -308,6 +309,66 @@ vfio_disable_msix(const struct rte_intr_handle *intr_handle) {
 
 	return ret;
 }
+
+#ifdef HAVE_VFIO_DEV_REQ_INTERFACE
+/* enable req notifier */
+static int
+vfio_enable_req(const struct rte_intr_handle *intr_handle)
+{
+	int len, ret;
+	char irq_set_buf[IRQ_SET_BUF_LEN];
+	struct vfio_irq_set *irq_set;
+	int *fd_ptr;
+
+	len = sizeof(irq_set_buf);
+
+	irq_set = (struct vfio_irq_set *) irq_set_buf;
+	irq_set->argsz = len;
+	irq_set->count = 1;
+	irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
+			 VFIO_IRQ_SET_ACTION_TRIGGER;
+	irq_set->index = VFIO_PCI_REQ_IRQ_INDEX;
+	irq_set->start = 0;
+	fd_ptr = (int *) &irq_set->data;
+	*fd_ptr = intr_handle->fd;
+
+	ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
+
+	if (ret) {
+		RTE_LOG(ERR, EAL, "Error enabling req interrupts for fd %d\n",
+						intr_handle->fd);
+		return -1;
+	}
+
+	return 0;
+}
+
+/* disable req notifier */
+static int
+vfio_disable_req(const struct rte_intr_handle *intr_handle)
+{
+	struct vfio_irq_set *irq_set;
+	char irq_set_buf[IRQ_SET_BUF_LEN];
+	int len, ret;
+
+	len = sizeof(struct vfio_irq_set);
+
+	irq_set = (struct vfio_irq_set *) irq_set_buf;
+	irq_set->argsz = len;
+	irq_set->count = 0;
+	irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
+	irq_set->index = VFIO_PCI_REQ_IRQ_INDEX;
+	irq_set->start = 0;
+
+	ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
+
+	if (ret)
+		RTE_LOG(ERR, EAL, "Error disabling req interrupts for fd %d\n",
+			intr_handle->fd);
+
+	return ret;
+}
+#endif
 #endif
 
 static int
@@ -556,6 +617,12 @@ rte_intr_enable(const struct rte_intr_handle *intr_handle)
 		if (vfio_enable_intx(intr_handle))
 			return -1;
 		break;
+#ifdef HAVE_VFIO_DEV_REQ_INTERFACE
+	case RTE_INTR_HANDLE_VFIO_REQ:
+		if (vfio_enable_req(intr_handle))
+			return -1;
+		break;
+#endif
 #endif
 	/* not used at this moment */
 	case RTE_INTR_HANDLE_DEV_EVENT:
@@ -606,6 +673,12 @@ rte_intr_disable(const struct rte_intr_handle *intr_handle)
 		if (vfio_disable_intx(intr_handle))
 			return -1;
 		break;
+#ifdef HAVE_VFIO_DEV_REQ_INTERFACE
+	case RTE_INTR_HANDLE_VFIO_REQ:
+		if (vfio_disable_req(intr_handle))
+			return -1;
+		break;
+#endif
 #endif
 	/* not used at this moment */
 	case RTE_INTR_HANDLE_DEV_EVENT:
@@ -672,6 +745,12 @@ eal_intr_process_interrupts(struct epoll_event *events, int nfds)
 		case RTE_INTR_HANDLE_VFIO_LEGACY:
 			bytes_read = sizeof(buf.vfio_intr_count);
 			break;
+#ifdef HAVE_VFIO_DEV_REQ_INTERFACE
+		case RTE_INTR_HANDLE_VFIO_REQ:
+			bytes_read = 0;
+			call = true;
+			break;
+#endif
 #endif
 		case RTE_INTR_HANDLE_VDEV:
 		case RTE_INTR_HANDLE_EXT:
diff --git a/lib/librte_eal/linuxapp/eal/eal_memalloc.c b/lib/librte_eal/linuxapp/eal/eal_memalloc.c
index aa95551a..48b9c736 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memalloc.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memalloc.c
@@ -34,6 +34,7 @@
 #include <rte_log.h>
 #include <rte_eal_memconfig.h>
 #include <rte_eal.h>
+#include <rte_errno.h>
 #include <rte_memory.h>
 #include <rte_spinlock.h>
 
@@ -52,30 +53,55 @@ const int anonymous_hugepages_supported =
 #endif
 
 /*
+ * we don't actually care if memfd itself is supported - we only need to check
+ * if memfd supports hugetlbfs, as that already implies memfd support.
+ *
+ * also, this is not a constant, because while we may be *compiled* with memfd
+ * hugetlbfs support, we might not be *running* on a system that supports memfd
+ * and/or memfd with hugetlbfs, so we need to be able to adjust this flag at
+ * runtime, and fall back to anonymous memory.
+ */
+static int memfd_create_supported =
+#ifdef MFD_HUGETLB
+#define MEMFD_SUPPORTED
+		1;
+#else
+		0;
+#endif
+
+/*
  * not all kernel version support fallocate on hugetlbfs, so fall back to
  * ftruncate and disallow deallocation if fallocate is not supported.
  */
 static int fallocate_supported = -1; /* unknown */
 
-/* for single-file segments, we need some kind of mechanism to keep track of
+/*
+ * we have two modes - single file segments, and file-per-page mode.
+ *
+ * for single-file segments, we need some kind of mechanism to keep track of
  * which hugepages can be freed back to the system, and which cannot. we cannot
  * use flock() because they don't allow locking parts of a file, and we cannot
  * use fcntl() due to issues with their semantics, so we will have to rely on a
- * bunch of lockfiles for each page.
+ * bunch of lockfiles for each page. so, we will use 'fds' array to keep track
+ * of per-page lockfiles. we will store the actual segment list fd in the
+ * 'memseg_list_fd' field.
+ *
+ * for file-per-page mode, each page will have its own fd, so 'memseg_list_fd'
+ * will be invalid (set to -1), and we'll use 'fds' to keep track of page fd's.
  *
  * we cannot know how many pages a system will have in advance, but we do know
  * that they come in lists, and we know lengths of these lists. so, simply store
  * a malloc'd array of fd's indexed by list and segment index.
  *
  * they will be initialized at startup, and filled as we allocate/deallocate
- * segments. also, use this to track memseg list proper fd.
+ * segments.
  */
 static struct {
 	int *fds; /**< dynamically allocated array of segment lock fd's */
 	int memseg_list_fd; /**< memseg list fd */
 	int len; /**< total length of the array */
 	int count; /**< entries used in an array */
-} lock_fds[RTE_MAX_MEMSEG_LISTS];
+} fd_list[RTE_MAX_MEMSEG_LISTS];
 
 /** local copy of a memory map, used to synchronize memory hotplug in MP */
 static struct rte_memseg_list local_memsegs[RTE_MAX_MEMSEG_LISTS];
@@ -182,6 +208,31 @@ get_file_size(int fd)
 	return st.st_size;
 }
 
+static inline uint32_t
+bsf64(uint64_t v)
+{
+	return (uint32_t)__builtin_ctzll(v);
+}
+
+static inline uint32_t
+log2_u64(uint64_t v)
+{
+	if (v == 0)
+		return 0;
+	v = rte_align64pow2(v);
+	return bsf64(v);
+}
+
+static int
+pagesz_flags(uint64_t page_sz)
+{
+	/* as per mmap() manpage, all page sizes are log2 of page size
+	 * shifted by MAP_HUGE_SHIFT
+	 */
+	int log2 = log2_u64(page_sz);
+	return log2 << RTE_MAP_HUGE_SHIFT;
+}
+
 /* returns 1 on successful lock, 0 on unsuccessful lock, -1 on error */
 static int lock(int fd, int type)
 {
@@ -209,12 +260,12 @@ static int get_segment_lock_fd(int list_idx, int seg_idx)
 	char path[PATH_MAX] = {0};
 	int fd;
 
-	if (list_idx < 0 || list_idx >= (int)RTE_DIM(lock_fds))
+	if (list_idx < 0 || list_idx >= (int)RTE_DIM(fd_list))
 		return -1;
-	if (seg_idx < 0 || seg_idx >= lock_fds[list_idx].len)
+	if (seg_idx < 0 || seg_idx >= fd_list[list_idx].len)
 		return -1;
 
-	fd = lock_fds[list_idx].fds[seg_idx];
+	fd = fd_list[list_idx].fds[seg_idx];
 	/* does this lock already exist? */
 	if (fd >= 0)
 		return fd;
@@ -236,8 +287,8 @@ static int get_segment_lock_fd(int list_idx, int seg_idx)
 		return -1;
 	}
 	/* store it for future reference */
-	lock_fds[list_idx].fds[seg_idx] = fd;
-	lock_fds[list_idx].count++;
+	fd_list[list_idx].fds[seg_idx] = fd;
+	fd_list[list_idx].count++;
 	return fd;
 }
 
@@ -245,12 +296,12 @@ static int unlock_segment(int list_idx, int seg_idx)
 {
 	int fd, ret;
 
-	if (list_idx < 0 || list_idx >= (int)RTE_DIM(lock_fds))
+	if (list_idx < 0 || list_idx >= (int)RTE_DIM(fd_list))
 		return -1;
-	if (seg_idx < 0 || seg_idx >= lock_fds[list_idx].len)
+	if (seg_idx < 0 || seg_idx >= fd_list[list_idx].len)
 		return -1;
 
-	fd = lock_fds[list_idx].fds[seg_idx];
+	fd = fd_list[list_idx].fds[seg_idx];
 
 	/* upgrade lock to exclusive to see if we can remove the lockfile */
 	ret = lock(fd, LOCK_EX);
@@ -270,8 +321,8 @@ static int unlock_segment(int list_idx, int seg_idx)
 	 * and remove it from list anyway.
 	 */
 	close(fd);
-	lock_fds[list_idx].fds[seg_idx] = -1;
-	lock_fds[list_idx].count--;
+	fd_list[list_idx].fds[seg_idx] = -1;
+	fd_list[list_idx].count--;
 
 	if (ret < 0)
 		return -1;
@@ -279,16 +330,68 @@ static int unlock_segment(int list_idx, int seg_idx)
 }
 
 static int
+get_seg_memfd(struct hugepage_info *hi __rte_unused,
+		unsigned int list_idx __rte_unused,
+		unsigned int seg_idx __rte_unused)
+{
+#ifdef MEMFD_SUPPORTED
+	int fd;
+	char segname[250]; /* as per manpage, limit is 249 bytes plus null */
+
+	if (internal_config.single_file_segments) {
+		fd = fd_list[list_idx].memseg_list_fd;
+
+		if (fd < 0) {
+			int flags = MFD_HUGETLB | pagesz_flags(hi->hugepage_sz);
+
+			snprintf(segname, sizeof(segname), "seg_%i", list_idx);
+			fd = memfd_create(segname, flags);
+			if (fd < 0) {
+				RTE_LOG(DEBUG, EAL, "%s(): memfd create failed: %s\n",
+					__func__, strerror(errno));
+				return -1;
+			}
+			fd_list[list_idx].memseg_list_fd = fd;
+		}
+	} else {
+		fd = fd_list[list_idx].fds[seg_idx];
+
+		if (fd < 0) {
+			int flags = MFD_HUGETLB | pagesz_flags(hi->hugepage_sz);
+
+			snprintf(segname, sizeof(segname), "seg_%i-%i",
+					list_idx, seg_idx);
+			fd = memfd_create(segname, flags);
+			if (fd < 0) {
+				RTE_LOG(DEBUG, EAL, "%s(): memfd create failed: %s\n",
+					__func__, strerror(errno));
+				return -1;
+			}
+			fd_list[list_idx].fds[seg_idx] = fd;
+		}
+	}
+	return fd;
+#endif
+	return -1;
+}
+
+static int
 get_seg_fd(char *path, int buflen, struct hugepage_info *hi,
 		unsigned int list_idx, unsigned int seg_idx)
 {
 	int fd;
 
+	/* for in-memory mode, we only make it here when we're sure we support
+	 * memfd, and this is a special case.
+	 */
+	if (internal_config.in_memory)
+		return get_seg_memfd(hi, list_idx, seg_idx);
+
 	if (internal_config.single_file_segments) {
 		/* create a hugepage file path */
 		eal_get_hugefile_path(path, buflen, hi->hugedir, list_idx);
 
-		fd = lock_fds[list_idx].memseg_list_fd;
+		fd = fd_list[list_idx].memseg_list_fd;
 
 		if (fd < 0) {
 			fd = open(path, O_CREAT | O_RDWR, 0600);
@@ -304,24 +407,30 @@ get_seg_fd(char *path, int buflen, struct hugepage_info *hi,
 				close(fd);
 				return -1;
 			}
-			lock_fds[list_idx].memseg_list_fd = fd;
+			fd_list[list_idx].memseg_list_fd = fd;
 		}
 	} else {
 		/* create a hugepage file path */
 		eal_get_hugefile_path(path, buflen, hi->hugedir,
 				list_idx * RTE_MAX_MEMSEG_PER_LIST + seg_idx);
-		fd = open(path, O_CREAT | O_RDWR, 0600);
+
+		fd = fd_list[list_idx].fds[seg_idx];
+
 		if (fd < 0) {
-			RTE_LOG(DEBUG, EAL, "%s(): open failed: %s\n", __func__,
-					strerror(errno));
-			return -1;
-		}
-		/* take out a read lock */
-		if (lock(fd, LOCK_SH) < 0) {
-			RTE_LOG(ERR, EAL, "%s(): lock failed: %s\n",
-				__func__, strerror(errno));
-			close(fd);
-			return -1;
+			fd = open(path, O_CREAT | O_RDWR, 0600);
+			if (fd < 0) {
+				RTE_LOG(DEBUG, EAL, "%s(): open failed: %s\n",
+					__func__, strerror(errno));
+				return -1;
+			}
+			/* take out a read lock */
+			if (lock(fd, LOCK_SH) < 0) {
+				RTE_LOG(ERR, EAL, "%s(): lock failed: %s\n",
+					__func__, strerror(errno));
+				close(fd);
+				return -1;
+			}
+			fd_list[list_idx].fds[seg_idx] = fd;
 		}
 	}
 	return fd;
@@ -332,6 +441,33 @@ resize_hugefile(int fd, char *path, int list_idx, int seg_idx,
 		uint64_t fa_offset, uint64_t page_sz, bool grow)
 {
 	bool again = false;
+
+	/* in-memory mode is a special case, because we don't need to perform
+	 * any locking, and we can be sure that fallocate() is supported.
+	 */
+	if (internal_config.in_memory) {
+		int flags = grow ? 0 : FALLOC_FL_PUNCH_HOLE |
+				FALLOC_FL_KEEP_SIZE;
+		int ret;
+
+		/* grow or shrink the file */
+		ret = fallocate(fd, flags, fa_offset, page_sz);
+
+		if (ret < 0) {
+			RTE_LOG(DEBUG, EAL, "%s(): fallocate() failed: %s\n",
+					__func__,
+					strerror(errno));
+			return -1;
+		}
+		/* increase/decrease total segment count */
+		fd_list[list_idx].count += (grow ? 1 : -1);
+		if (!grow && fd_list[list_idx].count == 0) {
+			close(fd_list[list_idx].memseg_list_fd);
+			fd_list[list_idx].memseg_list_fd = -1;
+		}
+		return 0;
+	}
+
 	do {
 		if (fallocate_supported == 0) {
 			/* we cannot deallocate memory if fallocate() is not
@@ -410,9 +546,9 @@ resize_hugefile(int fd, char *path, int list_idx, int seg_idx,
 				 * page file fd, so that one of the processes
 				 * could then delete the file after shrinking.
 				 */
-				if (ret < 1 && lock_fds[list_idx].count == 0) {
+				if (ret < 1 && fd_list[list_idx].count == 0) {
 					close(fd);
-					lock_fds[list_idx].memseg_list_fd = -1;
+					fd_list[list_idx].memseg_list_fd = -1;
 				}
 
 				if (ret < 0) {
@@ -448,13 +584,13 @@ resize_hugefile(int fd, char *path, int list_idx, int seg_idx,
 				 * more segments active in this segment list,
 				 * and remove the file if there aren't.
 				 */
-				if (lock_fds[list_idx].count == 0) {
+				if (fd_list[list_idx].count == 0) {
 					if (unlink(path))
 						RTE_LOG(ERR, EAL, "%s(): unlinking '%s' failed: %s\n",
 							__func__, path,
 							strerror(errno));
 					close(fd);
-					lock_fds[list_idx].memseg_list_fd = -1;
+					fd_list[list_idx].memseg_list_fd = -1;
 				}
 			}
 		}
@@ -481,26 +617,34 @@ alloc_seg(struct rte_memseg *ms, void *addr, int socket_id,
 	void *new_addr;
 
 	alloc_sz = hi->hugepage_sz;
-	if (!internal_config.single_file_segments &&
-			internal_config.in_memory &&
-			anonymous_hugepages_supported) {
-		int log2, flags;
-
-		log2 = rte_log2_u32(alloc_sz);
-		/* as per mmap() manpage, all page sizes are log2 of page size
-		 * shifted by MAP_HUGE_SHIFT
-		 */
-		flags = (log2 << RTE_MAP_HUGE_SHIFT) | MAP_HUGETLB | MAP_FIXED |
+
+	/* these are checked at init, but code analyzers don't know that */
+	if (internal_config.in_memory && !anonymous_hugepages_supported) {
+		RTE_LOG(ERR, EAL, "Anonymous hugepages not supported, in-memory mode cannot allocate memory\n");
+		return -1;
+	}
+	if (internal_config.in_memory && !memfd_create_supported &&
+			internal_config.single_file_segments) {
+		RTE_LOG(ERR, EAL, "Single-file segments are not supported without memfd support\n");
+		return -1;
+	}
+
+	/* in-memory without memfd is a special case */
+	int mmap_flags;
+
+	if (internal_config.in_memory && !memfd_create_supported) {
+		int pagesz_flag, flags;
+
+		pagesz_flag = pagesz_flags(alloc_sz);
+		flags = pagesz_flag | MAP_HUGETLB | MAP_FIXED |
 				MAP_PRIVATE | MAP_ANONYMOUS;
 		fd = -1;
-		va = mmap(addr, alloc_sz, PROT_READ | PROT_WRITE, flags, -1, 0);
-
-		/* single-file segments codepath will never be active because
-		 * in-memory mode is incompatible with it and it's stopped at
-		 * EAL initialization stage, however the compiler doesn't know
-		 * that and complains about map_offset being used uninitialized
-		 * on failure codepaths while having in-memory mode enabled. so,
-		 * assign a value here.
+		mmap_flags = flags;
+
+		/* single-file segments codepath will never be active
+		 * here because in-memory mode is incompatible with the
+		 * fallback path, and it's stopped at EAL initialization
+		 * stage.
 		 */
 		map_offset = 0;
 	} else {
@@ -524,7 +668,8 @@ alloc_seg(struct rte_memseg *ms, void *addr, int socket_id,
 					__func__, strerror(errno));
 				goto resized;
 			}
-			if (internal_config.hugepage_unlink) {
+			if (internal_config.hugepage_unlink &&
+					!internal_config.in_memory) {
 				if (unlink(path)) {
 					RTE_LOG(DEBUG, EAL, "%s(): unlink() failed: %s\n",
 						__func__, strerror(errno));
@@ -532,16 +677,16 @@ alloc_seg(struct rte_memseg *ms, void *addr, int socket_id,
 				}
 			}
 		}
-
-		/*
-		 * map the segment, and populate page tables, the kernel fills
-		 * this segment with zeros if it's a new page.
-		 */
-		va = mmap(addr, alloc_sz, PROT_READ | PROT_WRITE,
-				MAP_SHARED | MAP_POPULATE | MAP_FIXED, fd,
-				map_offset);
+		mmap_flags = MAP_SHARED | MAP_POPULATE | MAP_FIXED;
 	}
 
+	/*
+	 * map the segment, and populate page tables, the kernel fills
+	 * this segment with zeros if it's a new page.
+	 */
+	va = mmap(addr, alloc_sz, PROT_READ | PROT_WRITE, mmap_flags, fd,
+			map_offset);
+
 	if (va == MAP_FAILED) {
 		RTE_LOG(DEBUG, EAL, "%s(): mmap() failed: %s\n", __func__,
 			strerror(errno));
@@ -593,10 +738,6 @@ alloc_seg(struct rte_memseg *ms, void *addr, int socket_id,
 		goto mapped;
 	}
 #endif
-	/* for non-single file segments that aren't in-memory, we can close fd
-	 * here */
-	if (!internal_config.single_file_segments && !internal_config.in_memory)
-		close(fd);
 
 	ms->addr = addr;
 	ms->hugepage_sz = alloc_sz;
@@ -626,7 +767,10 @@ unmapped:
 		RTE_LOG(CRIT, EAL, "Can't mmap holes in our virtual address space\n");
 	}
 resized:
-	/* in-memory mode will never be single-file-segments mode */
+	/* some codepaths will return negative fd, so exit early */
+	if (fd < 0)
+		return -1;
+
 	if (internal_config.single_file_segments) {
 		resize_hugefile(fd, path, list_idx, seg_idx, map_offset,
 				alloc_sz, false);
@@ -638,6 +782,7 @@ resized:
 				lock(fd, LOCK_EX) == 1)
 			unlink(path);
 		close(fd);
+		fd_list[list_idx].fds[seg_idx] = -1;
 	}
 	return -1;
 }
@@ -648,7 +793,8 @@ free_seg(struct rte_memseg *ms, struct hugepage_info *hi,
 {
 	uint64_t map_offset;
 	char path[PATH_MAX];
-	int fd, ret;
+	int fd, ret = 0;
+	bool exit_early;
 
 	/* erase page data */
 	memset(ms->addr, 0, ms->len);
@@ -660,8 +806,17 @@ free_seg(struct rte_memseg *ms, struct hugepage_info *hi,
 		return -1;
 	}
 
+	exit_early = false;
+
+	/* if we're using anonymous hugepages, nothing to be done */
+	if (internal_config.in_memory && !memfd_create_supported)
+		exit_early = true;
+
 	/* if we've already unlinked the page, nothing needs to be done */
-	if (internal_config.hugepage_unlink) {
+	if (!internal_config.in_memory && internal_config.hugepage_unlink)
+		exit_early = true;
+
+	if (exit_early) {
 		memset(ms, 0, sizeof(*ms));
 		return 0;
 	}
@@ -684,14 +839,17 @@ free_seg(struct rte_memseg *ms, struct hugepage_info *hi,
 		/* if we're able to take out a write lock, we're the last one
 		 * holding onto this page.
 		 */
-		ret = lock(fd, LOCK_EX);
-		if (ret >= 0) {
-			/* no one else is using this page */
-			if (ret == 1)
-				unlink(path);
+		if (!internal_config.in_memory) {
+			ret = lock(fd, LOCK_EX);
+			if (ret >= 0) {
+				/* no one else is using this page */
+				if (ret == 1)
+					unlink(path);
+			}
 		}
 		/* closing fd will drop the lock */
 		close(fd);
+		fd_list[list_idx].fds[seg_idx] = -1;
 	}
 
 	memset(ms, 0, sizeof(*ms));
@@ -828,7 +986,7 @@ free_seg_walk(const struct rte_memseg_list *msl, void *arg)
 	int msl_idx, seg_idx, ret, dir_fd = -1;
 
 	start_addr = (uintptr_t) msl->base_va;
-	end_addr = start_addr + msl->memseg_arr.len * (size_t)msl->page_sz;
+	end_addr = start_addr + msl->len;
 
 	if ((uintptr_t)wa->ms->addr < start_addr ||
 			(uintptr_t)wa->ms->addr >= end_addr)
@@ -1250,6 +1408,9 @@ sync_walk(const struct rte_memseg_list *msl, void *arg __rte_unused)
 	unsigned int i;
 	int msl_idx;
 
+	if (msl->external)
+		return 0;
+
 	msl_idx = msl - mcfg->memsegs;
 	primary_msl = &mcfg->memsegs[msl_idx];
 	local_msl = &local_memsegs[msl_idx];
@@ -1298,6 +1459,9 @@ secondary_msl_create_walk(const struct rte_memseg_list *msl,
 	char name[PATH_MAX];
 	int msl_idx, ret;
 
+	if (msl->external)
+		return 0;
+
 	msl_idx = msl - mcfg->memsegs;
 	primary_msl = &mcfg->memsegs[msl_idx];
 	local_msl = &local_memsegs[msl_idx];
@@ -1314,50 +1478,176 @@ secondary_msl_create_walk(const struct rte_memseg_list *msl,
 		return -1;
 	}
 	local_msl->base_va = primary_msl->base_va;
+	local_msl->len = primary_msl->len;
 
 	return 0;
 }
 
 static int
-secondary_lock_list_create_walk(const struct rte_memseg_list *msl,
-		void *arg __rte_unused)
+alloc_list(int list_idx, int len)
 {
-	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
-	unsigned int i, len;
-	int msl_idx;
 	int *data;
+	int i;
 
-	msl_idx = msl - mcfg->memsegs;
-	len = msl->memseg_arr.len;
-
-	/* ensure we have space to store lock fd per each possible segment */
+	/* ensure we have space to store fd per each possible segment */
 	data = malloc(sizeof(int) * len);
 	if (data == NULL) {
-		RTE_LOG(ERR, EAL, "Unable to allocate space for lock descriptors\n");
+		RTE_LOG(ERR, EAL, "Unable to allocate space for file descriptors\n");
 		return -1;
 	}
 	/* set all fd's as invalid */
 	for (i = 0; i < len; i++)
 		data[i] = -1;
 
-	lock_fds[msl_idx].fds = data;
-	lock_fds[msl_idx].len = len;
-	lock_fds[msl_idx].count = 0;
-	lock_fds[msl_idx].memseg_list_fd = -1;
+	fd_list[list_idx].fds = data;
+	fd_list[list_idx].len = len;
+	fd_list[list_idx].count = 0;
+	fd_list[list_idx].memseg_list_fd = -1;
+
+	return 0;
+}
+
+static int
+fd_list_create_walk(const struct rte_memseg_list *msl,
+		void *arg __rte_unused)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	unsigned int len;
+	int msl_idx;
+
+	if (msl->external)
+		return 0;
+
+	msl_idx = msl - mcfg->memsegs;
+	len = msl->memseg_arr.len;
+
+	return alloc_list(msl_idx, len);
+}
+
+int
+eal_memalloc_set_seg_fd(int list_idx, int seg_idx, int fd)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+
+	/* if list is not allocated, allocate it */
+	if (fd_list[list_idx].len == 0) {
+		int len = mcfg->memsegs[list_idx].memseg_arr.len;
+
+		if (alloc_list(list_idx, len) < 0)
+			return -ENOMEM;
+	}
+	fd_list[list_idx].fds[seg_idx] = fd;
 
 	return 0;
 }
 
 int
+eal_memalloc_get_seg_fd(int list_idx, int seg_idx)
+{
+	int fd;
+	if (internal_config.single_file_segments) {
+		fd = fd_list[list_idx].memseg_list_fd;
+	} else if (fd_list[list_idx].len == 0) {
+		/* list not initialized */
+		fd = -1;
+	} else {
+		fd = fd_list[list_idx].fds[seg_idx];
+	}
+	if (fd < 0)
+		return -ENODEV;
+	return fd;
+}
+
+static int
+test_memfd_create(void)
+{
+#ifdef MEMFD_SUPPORTED
+	unsigned int i;
+	for (i = 0; i < internal_config.num_hugepage_sizes; i++) {
+		uint64_t pagesz = internal_config.hugepage_info[i].hugepage_sz;
+		int pagesz_flag = pagesz_flags(pagesz);
+		int flags;
+
+		flags = pagesz_flag | MFD_HUGETLB;
+		int fd = memfd_create("test", flags);
+		if (fd < 0) {
+			/* we failed - let memalloc know this isn't working */
+			if (errno == EINVAL) {
+				memfd_create_supported = 0;
+				return 0; /* not supported */
+			}
+
+			/* we got other error - something's wrong */
+			return -1; /* error */
+		}
+		close(fd);
+		return 1; /* supported */
+	}
+#endif
+	return 0; /* not supported */
+}
+
+int
+eal_memalloc_get_seg_fd_offset(int list_idx, int seg_idx, size_t *offset)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+
+	/* fd_list not initialized? */
+	if (fd_list[list_idx].len == 0)
+		return -ENODEV;
+	if (internal_config.single_file_segments) {
+		size_t pgsz = mcfg->memsegs[list_idx].page_sz;
+
+		/* segment not active? */
+		if (fd_list[list_idx].memseg_list_fd < 0)
+			return -ENOENT;
+		*offset = pgsz * seg_idx;
+	} else {
+		/* segment not active? */
+		if (fd_list[list_idx].fds[seg_idx] < 0)
+			return -ENOENT;
+		*offset = 0;
+	}
+	return 0;
+}
+
+int
 eal_memalloc_init(void)
 {
 	if (rte_eal_process_type() == RTE_PROC_SECONDARY)
 		if (rte_memseg_list_walk(secondary_msl_create_walk, NULL) < 0)
 			return -1;
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY &&
+			internal_config.in_memory) {
+		int mfd_res = test_memfd_create();
 
-	/* initialize all of the lock fd lists */
-	if (internal_config.single_file_segments)
-		if (rte_memseg_list_walk(secondary_lock_list_create_walk, NULL))
+		if (mfd_res < 0) {
+			RTE_LOG(ERR, EAL, "Unable to check if memfd is supported\n");
+			return -1;
+		}
+		if (mfd_res == 1)
+			RTE_LOG(DEBUG, EAL, "Using memfd for anonymous memory\n");
+		else
+			RTE_LOG(INFO, EAL, "Using memfd is not supported, falling back to anonymous hugepages\n");
+
+		/* we only support single-file segments mode with in-memory mode
+		 * if we support hugetlbfs with memfd_create. this code will
+		 * test if we do.
+		 */
+		if (internal_config.single_file_segments &&
+				mfd_res != 1) {
+			RTE_LOG(ERR, EAL, "Single-file segments mode cannot be used without memfd support\n");
 			return -1;
+		}
+		/* this cannot ever happen but better safe than sorry */
+		if (!anonymous_hugepages_supported) {
+			RTE_LOG(ERR, EAL, "Using anonymous memory is not supported\n");
+			return -1;
+		}
+	}
+
+	/* initialize all of the fd lists */
+	if (rte_memseg_list_walk(fd_list_create_walk, NULL))
+		return -1;
 	return 0;
 }
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
index dbf19499..fce86fda 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -5,6 +5,7 @@
 
 #define _FILE_OFFSET_BITS 64
 #include <errno.h>
+#include <fcntl.h>
 #include <stdarg.h>
 #include <stdbool.h>
 #include <stdlib.h>
@@ -17,6 +18,7 @@
 #include <sys/stat.h>
 #include <sys/queue.h>
 #include <sys/file.h>
+#include <sys/resource.h>
 #include <unistd.h>
 #include <limits.h>
 #include <sys/ioctl.h>
@@ -263,7 +265,7 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi,
 	int node_id = -1;
 	int essential_prev = 0;
 	int oldpolicy;
-	struct bitmask *oldmask = numa_allocate_nodemask();
+	struct bitmask *oldmask = NULL;
 	bool have_numa = true;
 	unsigned long maxnode = 0;
 
@@ -275,6 +277,7 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi,
 
 	if (have_numa) {
 		RTE_LOG(DEBUG, EAL, "Trying to obtain current memory policy.\n");
+		oldmask = numa_allocate_nodemask();
 		if (get_mempolicy(&oldpolicy, oldmask->maskp,
 				  oldmask->size + 1, 0, 0) < 0) {
 			RTE_LOG(ERR, EAL,
@@ -402,7 +405,8 @@ out:
 			numa_set_localalloc();
 		}
 	}
-	numa_free_cpumask(oldmask);
+	if (oldmask != NULL)
+		numa_free_cpumask(oldmask);
 #endif
 	return i;
 }
@@ -584,7 +588,7 @@ unlink_hugepage_files(struct hugepage_file *hugepg_tbl,
 	for (page = 0; page < nrpages; page++) {
 		struct hugepage_file *hp = &hugepg_tbl[page];
 
-		if (hp->final_va != NULL && unlink(hp->filepath)) {
+		if (hp->orig_va != NULL && unlink(hp->filepath)) {
 			RTE_LOG(WARNING, EAL, "%s(): Removing %s failed: %s\n",
 				__func__, hp->filepath, strerror(errno));
 		}
@@ -771,7 +775,10 @@ remap_segment(struct hugepage_file *hugepages, int seg_start, int seg_end)
 
 		rte_fbarray_set_used(arr, ms_idx);
 
-		close(fd);
+		/* store segment fd internally */
+		if (eal_memalloc_set_seg_fd(msl_idx, ms_idx, fd) < 0)
+			RTE_LOG(ERR, EAL, "Could not store segment fd: %s\n",
+				rte_strerror(rte_errno));
 	}
 	RTE_LOG(DEBUG, EAL, "Allocated %" PRIu64 "M on socket %i\n",
 			(seg_len * page_sz) >> 20, socket_id);
@@ -857,6 +864,7 @@ alloc_va_space(struct rte_memseg_list *msl)
 		return -1;
 	}
 	msl->base_va = addr;
+	msl->len = mem_sz;
 
 	return 0;
 }
@@ -1365,6 +1373,7 @@ eal_legacy_hugepage_init(void)
 		msl->base_va = addr;
 		msl->page_sz = page_sz;
 		msl->socket_id = 0;
+		msl->len = internal_config.memory;
 
 		/* populate memsegs. each memseg is one page long */
 		for (cur_seg = 0; cur_seg < n_segs; cur_seg++) {
@@ -1611,7 +1620,7 @@ eal_legacy_hugepage_init(void)
 		if (msl->memseg_arr.count > 0)
 			continue;
 		/* this is an unused list, deallocate it */
-		mem_sz = (size_t)msl->page_sz * msl->memseg_arr.len;
+		mem_sz = msl->len;
 		munmap(msl->base_va, mem_sz);
 		msl->base_va = NULL;
 
@@ -1770,6 +1779,7 @@ getFileSize(int fd)
 static int
 eal_legacy_hugepage_attach(void)
 {
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
 	struct hugepage_file *hp = NULL;
 	unsigned int num_hp = 0;
 	unsigned int i = 0;
@@ -1813,6 +1823,9 @@ eal_legacy_hugepage_attach(void)
 		struct hugepage_file *hf = &hp[i];
 		size_t map_sz = hf->size;
 		void *map_addr = hf->final_va;
+		int msl_idx, ms_idx;
+		struct rte_memseg_list *msl;
+		struct rte_memseg *ms;
 
 		/* if size is zero, no more pages left */
 		if (map_sz == 0)
@@ -1830,25 +1843,50 @@ eal_legacy_hugepage_attach(void)
 		if (map_addr == MAP_FAILED) {
 			RTE_LOG(ERR, EAL, "Could not map %s: %s\n",
 				hf->filepath, strerror(errno));
-			close(fd);
-			goto error;
+			goto fd_error;
 		}
 
 		/* set shared lock on the file. */
 		if (flock(fd, LOCK_SH) < 0) {
 			RTE_LOG(DEBUG, EAL, "%s(): Locking file failed: %s\n",
 				__func__, strerror(errno));
-			close(fd);
-			goto error;
+			goto fd_error;
 		}
 
-		close(fd);
+		/* find segment data */
+		msl = rte_mem_virt2memseg_list(map_addr);
+		if (msl == NULL) {
+			RTE_LOG(DEBUG, EAL, "%s(): Cannot find memseg list\n",
+				__func__);
+			goto fd_error;
+		}
+		ms = rte_mem_virt2memseg(map_addr, msl);
+		if (ms == NULL) {
+			RTE_LOG(DEBUG, EAL, "%s(): Cannot find memseg\n",
+				__func__);
+			goto fd_error;
+		}
+
+		msl_idx = msl - mcfg->memsegs;
+		ms_idx = rte_fbarray_find_idx(&msl->memseg_arr, ms);
+		if (ms_idx < 0) {
+			RTE_LOG(DEBUG, EAL, "%s(): Cannot find memseg idx\n",
+				__func__);
+			goto fd_error;
+		}
+
+		/* store segment fd internally */
+		if (eal_memalloc_set_seg_fd(msl_idx, ms_idx, fd) < 0)
+			RTE_LOG(ERR, EAL, "Could not store segment fd: %s\n",
+				rte_strerror(rte_errno));
 	}
 	/* unmap the hugepage config file, since we are done using it */
 	munmap(hp, size);
 	close(fd_hugepage);
 	return 0;
 
+fd_error:
+	close(fd);
 error:
 	/* map all segments into memory to make sure we get the addrs */
 	cur_seg = 0;
@@ -2093,18 +2131,65 @@ static int __rte_unused
 memseg_primary_init(void)
 {
 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
-	int i, socket_id, hpi_idx, msl_idx = 0;
+	struct memtype {
+		uint64_t page_sz;
+		int socket_id;
+	} *memtypes = NULL;
+	int i, hpi_idx, msl_idx, ret = -1; /* fail unless told to succeed */
 	struct rte_memseg_list *msl;
-	uint64_t max_mem, total_mem;
+	uint64_t max_mem, max_mem_per_type;
+	unsigned int max_seglists_per_type;
+	unsigned int n_memtypes, cur_type;
 
 	/* no-huge does not need this at all */
 	if (internal_config.no_hugetlbfs)
 		return 0;
 
-	max_mem = (uint64_t)RTE_MAX_MEM_MB << 20;
-	total_mem = 0;
+	/*
+	 * figuring out amount of memory we're going to have is a long and very
+	 * involved process. the basic element we're operating with is a memory
+	 * type, defined as a combination of NUMA node ID and page size (so that
+	 * e.g. 2 sockets with 2 page sizes yield 4 memory types in total).
+	 *
+	 * deciding amount of memory going towards each memory type is a
+	 * balancing act between maximum segments per type, maximum memory per
+	 * type, and number of detected NUMA nodes. the goal is to make sure
+	 * each memory type gets at least one memseg list.
+	 *
+	 * the total amount of memory is limited by RTE_MAX_MEM_MB value.
+	 *
+	 * the total amount of memory per type is limited by either
+	 * RTE_MAX_MEM_MB_PER_TYPE, or by RTE_MAX_MEM_MB divided by the number
+	 * of detected NUMA nodes. additionally, maximum number of segments per
+	 * type is also limited by RTE_MAX_MEMSEG_PER_TYPE. this is because for
+	 * smaller page sizes, it can take hundreds of thousands of segments to
+	 * reach the above specified per-type memory limits.
+	 *
+	 * additionally, each type may have multiple memseg lists associated
+	 * with it, each limited by either RTE_MAX_MEM_MB_PER_LIST for bigger
+	 * page sizes, or RTE_MAX_MEMSEG_PER_LIST segments for smaller ones.
+	 *
+	 * the number of memseg lists per type is decided based on the above
+	 * limits, and also taking number of detected NUMA nodes, to make sure
+	 * that we don't run out of memseg lists before we populate all NUMA
+	 * nodes with memory.
+	 *
+	 * we do this in three stages. first, we collect the number of types.
+	 * then, we figure out memory constraints and populate the list of
+	 * would-be memseg lists. then, we go ahead and allocate the memseg
+	 * lists.
+	 */
 
-	/* create memseg lists */
+	/* create space for mem types */
+	n_memtypes = internal_config.num_hugepage_sizes * rte_socket_count();
+	memtypes = calloc(n_memtypes, sizeof(*memtypes));
+	if (memtypes == NULL) {
+		RTE_LOG(ERR, EAL, "Cannot allocate space for memory types\n");
+		return -1;
+	}
+
+	/* populate mem types */
+	cur_type = 0;
 	for (hpi_idx = 0; hpi_idx < (int) internal_config.num_hugepage_sizes;
 			hpi_idx++) {
 		struct hugepage_info *hpi;
@@ -2113,62 +2198,114 @@ memseg_primary_init(void)
 		hpi = &internal_config.hugepage_info[hpi_idx];
 		hugepage_sz = hpi->hugepage_sz;
 
-		for (i = 0; i < (int) rte_socket_count(); i++) {
-			uint64_t max_type_mem, total_type_mem = 0;
-			int type_msl_idx, max_segs, total_segs = 0;
-
-			socket_id = rte_socket_id_by_idx(i);
+		for (i = 0; i < (int) rte_socket_count(); i++, cur_type++) {
+			int socket_id = rte_socket_id_by_idx(i);
 
 #ifndef RTE_EAL_NUMA_AWARE_HUGEPAGES
 			if (socket_id > 0)
 				break;
 #endif
+			memtypes[cur_type].page_sz = hugepage_sz;
+			memtypes[cur_type].socket_id = socket_id;
 
-			if (total_mem >= max_mem)
-				break;
-
-			max_type_mem = RTE_MIN(max_mem - total_mem,
-				(uint64_t)RTE_MAX_MEM_MB_PER_TYPE << 20);
-			max_segs = RTE_MAX_MEMSEG_PER_TYPE;
+			RTE_LOG(DEBUG, EAL, "Detected memory type: "
+				"socket_id:%u hugepage_sz:%" PRIu64 "\n",
+				socket_id, hugepage_sz);
+		}
+	}
 
-			type_msl_idx = 0;
-			while (total_type_mem < max_type_mem &&
-					total_segs < max_segs) {
-				uint64_t cur_max_mem, cur_mem;
-				unsigned int n_segs;
+	/* set up limits for types */
+	max_mem = (uint64_t)RTE_MAX_MEM_MB << 20;
+	max_mem_per_type = RTE_MIN((uint64_t)RTE_MAX_MEM_MB_PER_TYPE << 20,
+			max_mem / n_memtypes);
+	/*
+	 * limit maximum number of segment lists per type to ensure there's
+	 * space for memseg lists for all NUMA nodes with all page sizes
+	 */
+	max_seglists_per_type = RTE_MAX_MEMSEG_LISTS / n_memtypes;
 
-				if (msl_idx >= RTE_MAX_MEMSEG_LISTS) {
-					RTE_LOG(ERR, EAL,
-						"No more space in memseg lists, please increase %s\n",
-						RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS));
-					return -1;
-				}
+	if (max_seglists_per_type == 0) {
+		RTE_LOG(ERR, EAL, "Cannot accommodate all memory types, please increase %s\n",
+			RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS));
+		goto out;
+	}
 
-				msl = &mcfg->memsegs[msl_idx++];
+	/* go through all mem types and create segment lists */
+	msl_idx = 0;
+	for (cur_type = 0; cur_type < n_memtypes; cur_type++) {
+		unsigned int cur_seglist, n_seglists, n_segs;
+		unsigned int max_segs_per_type, max_segs_per_list;
+		struct memtype *type = &memtypes[cur_type];
+		uint64_t max_mem_per_list, pagesz;
+		int socket_id;
 
-				cur_max_mem = max_type_mem - total_type_mem;
+		pagesz = type->page_sz;
+		socket_id = type->socket_id;
 
-				cur_mem = get_mem_amount(hugepage_sz,
-						cur_max_mem);
-				n_segs = cur_mem / hugepage_sz;
+		/*
+		 * we need to create segment lists for this type. we must take
+		 * into account the following things:
+		 *
+		 * 1. total amount of memory we can use for this memory type
+		 * 2. total amount of memory per memseg list allowed
+		 * 3. number of segments needed to fit the amount of memory
+		 * 4. number of segments allowed per type
+		 * 5. number of segments allowed per memseg list
+		 * 6. number of memseg lists we are allowed to take up
+		 */
 
-				if (alloc_memseg_list(msl, hugepage_sz, n_segs,
-						socket_id, type_msl_idx))
-					return -1;
+		/* calculate how much segments we will need in total */
+		max_segs_per_type = max_mem_per_type / pagesz;
+		/* limit number of segments to maximum allowed per type */
+		max_segs_per_type = RTE_MIN(max_segs_per_type,
+				(unsigned int)RTE_MAX_MEMSEG_PER_TYPE);
+		/* limit number of segments to maximum allowed per list */
+		max_segs_per_list = RTE_MIN(max_segs_per_type,
+				(unsigned int)RTE_MAX_MEMSEG_PER_LIST);
+
+		/* calculate how much memory we can have per segment list */
+		max_mem_per_list = RTE_MIN(max_segs_per_list * pagesz,
+				(uint64_t)RTE_MAX_MEM_MB_PER_LIST << 20);
+
+		/* calculate how many segments each segment list will have */
+		n_segs = RTE_MIN(max_segs_per_list, max_mem_per_list / pagesz);
+
+		/* calculate how many segment lists we can have */
+		n_seglists = RTE_MIN(max_segs_per_type / n_segs,
+				max_mem_per_type / max_mem_per_list);
+
+		/* limit number of segment lists according to our maximum */
+		n_seglists = RTE_MIN(n_seglists, max_seglists_per_type);
+
+		RTE_LOG(DEBUG, EAL, "Creating %i segment lists: "
+				"n_segs:%i socket_id:%i hugepage_sz:%" PRIu64 "\n",
+			n_seglists, n_segs, socket_id, pagesz);
+
+		/* create all segment lists */
+		for (cur_seglist = 0; cur_seglist < n_seglists; cur_seglist++) {
+			if (msl_idx >= RTE_MAX_MEMSEG_LISTS) {
+				RTE_LOG(ERR, EAL,
+					"No more space in memseg lists, please increase %s\n",
+					RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS));
+				goto out;
+			}
+			msl = &mcfg->memsegs[msl_idx++];
 
-				total_segs += msl->memseg_arr.len;
-				total_type_mem = total_segs * hugepage_sz;
-				type_msl_idx++;
+			if (alloc_memseg_list(msl, pagesz, n_segs,
+					socket_id, cur_seglist))
+				goto out;
 
-				if (alloc_va_space(msl)) {
-					RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list\n");
-					return -1;
-				}
+			if (alloc_va_space(msl)) {
+				RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list\n");
+				goto out;
 			}
-			total_mem += total_type_mem;
 		}
 	}
-	return 0;
+	/* we're successful */
+	ret = 0;
+out:
+	free(memtypes);
+	return ret;
 }
 
 static int
@@ -2204,6 +2341,25 @@ memseg_secondary_init(void)
 int
 rte_eal_memseg_init(void)
 {
+	/* increase rlimit to maximum */
+	struct rlimit lim;
+
+	if (getrlimit(RLIMIT_NOFILE, &lim) == 0) {
+		/* set limit to maximum */
+		lim.rlim_cur = lim.rlim_max;
+
+		if (setrlimit(RLIMIT_NOFILE, &lim) < 0) {
+			RTE_LOG(DEBUG, EAL, "Setting maximum number of open files failed: %s\n",
+					strerror(errno));
+		} else {
+			RTE_LOG(DEBUG, EAL, "Setting maximum number of open files to %"
+					PRIu64 "\n",
+					(uint64_t)lim.rlim_cur);
+		}
+	} else {
+		RTE_LOG(ERR, EAL, "Cannot get current resource limits\n");
+	}
+
 	return rte_eal_process_type() == RTE_PROC_PRIMARY ?
 #ifndef RTE_ARCH_64
 			memseg_primary_init_32() :
diff --git a/lib/librte_eal/linuxapp/eal/eal_thread.c b/lib/librte_eal/linuxapp/eal/eal_thread.c
index b496fc71..379773b6 100644
--- a/lib/librte_eal/linuxapp/eal/eal_thread.c
+++ b/lib/librte_eal/linuxapp/eal/eal_thread.c
@@ -121,8 +121,8 @@ eal_thread_loop(__attribute__((unused)) void *arg)
 
 	ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
 
-	RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%x;cpuset=[%s%s])\n",
-		lcore_id, (int)thread_id, cpuset, ret == 0 ? "" : "...");
+	RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%zx;cpuset=[%s%s])\n",
+		lcore_id, (uintptr_t)thread_id, cpuset, ret == 0 ? "" : "...");
 
 	/* read on our pipe to get commands */
 	while (1) {
diff --git a/lib/librte_eal/linuxapp/eal/eal_timer.c b/lib/librte_eal/linuxapp/eal/eal_timer.c
index 2766bd78..bc8f0519 100644
--- a/lib/librte_eal/linuxapp/eal/eal_timer.c
+++ b/lib/librte_eal/linuxapp/eal/eal_timer.c
@@ -87,7 +87,7 @@ static pthread_t msb_inc_thread_id;
  * containing used to process MSB of the HPET (unfortunately, we need
  * this because hpet is 32 bits by default under linux).
  */
-static void
+static void *
 hpet_msb_inc(__attribute__((unused)) void *arg)
 {
 	uint32_t t;
@@ -98,6 +98,7 @@ hpet_msb_inc(__attribute__((unused)) void *arg)
 			eal_hpet_msb ++;
 		sleep(10);
 	}
+	return NULL;
 }
 
 uint64_t
@@ -178,7 +179,7 @@ rte_eal_hpet_init(int make_default)
 	/* create a thread that will increment a global variable for
 	 * msb (hpet is 32 bits by default under linux) */
 	ret = rte_ctrl_thread_create(&msb_inc_thread_id, "hpet-msb-inc", NULL,
-			(void *(*)(void *))hpet_msb_inc, NULL);
+				     hpet_msb_inc, NULL);
 	if (ret != 0) {
 		RTE_LOG(ERR, EAL, "ERROR: Cannot create HPET timer thread!\n");
 		internal_config.no_hpet = 1;
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c
index c68dc38e..0516b159 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c
@@ -345,46 +345,13 @@ get_vfio_cfg_by_group_num(int iommu_group_num)
 	return NULL;
 }
 
-static struct vfio_config *
-get_vfio_cfg_by_group_fd(int vfio_group_fd)
-{
-	struct vfio_config *vfio_cfg;
-	int i, j;
-
-	for (i = 0; i < VFIO_MAX_CONTAINERS; i++) {
-		vfio_cfg = &vfio_cfgs[i];
-		for (j = 0; j < VFIO_MAX_GROUPS; j++)
-			if (vfio_cfg->vfio_groups[j].fd == vfio_group_fd)
-				return vfio_cfg;
-	}
-
-	return NULL;
-}
-
-static struct vfio_config *
-get_vfio_cfg_by_container_fd(int container_fd)
-{
-	int i;
-
-	for (i = 0; i < VFIO_MAX_CONTAINERS; i++) {
-		if (vfio_cfgs[i].vfio_container_fd == container_fd)
-			return &vfio_cfgs[i];
-	}
-
-	return NULL;
-}
-
-int
-rte_vfio_get_group_fd(int iommu_group_num)
+static int
+vfio_get_group_fd(struct vfio_config *vfio_cfg,
+		int iommu_group_num)
 {
 	int i;
 	int vfio_group_fd;
 	struct vfio_group *cur_grp;
-	struct vfio_config *vfio_cfg;
-
-	/* get the vfio_config it belongs to */
-	vfio_cfg = get_vfio_cfg_by_group_num(iommu_group_num);
-	vfio_cfg = vfio_cfg ? vfio_cfg : default_vfio_cfg;
 
 	/* check if we already have the group descriptor open */
 	for (i = 0; i < VFIO_MAX_GROUPS; i++)
@@ -423,6 +390,47 @@ rte_vfio_get_group_fd(int iommu_group_num)
 	return vfio_group_fd;
 }
 
+static struct vfio_config *
+get_vfio_cfg_by_group_fd(int vfio_group_fd)
+{
+	struct vfio_config *vfio_cfg;
+	int i, j;
+
+	for (i = 0; i < VFIO_MAX_CONTAINERS; i++) {
+		vfio_cfg = &vfio_cfgs[i];
+		for (j = 0; j < VFIO_MAX_GROUPS; j++)
+			if (vfio_cfg->vfio_groups[j].fd == vfio_group_fd)
+				return vfio_cfg;
+	}
+
+	return NULL;
+}
+
+static struct vfio_config *
+get_vfio_cfg_by_container_fd(int container_fd)
+{
+	int i;
+
+	for (i = 0; i < VFIO_MAX_CONTAINERS; i++) {
+		if (vfio_cfgs[i].vfio_container_fd == container_fd)
+			return &vfio_cfgs[i];
+	}
+
+	return NULL;
+}
+
+int
+rte_vfio_get_group_fd(int iommu_group_num)
+{
+	struct vfio_config *vfio_cfg;
+
+	/* get the vfio_config it belongs to */
+	vfio_cfg = get_vfio_cfg_by_group_num(iommu_group_num);
+	vfio_cfg = vfio_cfg ? vfio_cfg : default_vfio_cfg;
+
+	return vfio_get_group_fd(vfio_cfg, iommu_group_num);
+}
+
 static int
 get_vfio_group_idx(int vfio_group_fd)
 {
@@ -509,7 +517,7 @@ vfio_mem_event_callback(enum rte_mem_event type, const void *addr, size_t len,
 	msl = rte_mem_virt2memseg_list(addr);
 
 	/* for IOVA as VA mode, no need to care for IOVA addresses */
-	if (rte_eal_iova_mode() == RTE_IOVA_VA) {
+	if (rte_eal_iova_mode() == RTE_IOVA_VA && msl->external == 0) {
 		uint64_t vfio_va = (uint64_t)(uintptr_t)addr;
 		if (type == RTE_MEM_EVENT_ALLOC)
 			vfio_dma_mem_map(default_vfio_cfg, vfio_va, vfio_va,
@@ -523,13 +531,19 @@ vfio_mem_event_callback(enum rte_mem_event type, const void *addr, size_t len,
 	/* memsegs are contiguous in memory */
 	ms = rte_mem_virt2memseg(addr, msl);
 	while (cur_len < len) {
+		/* some memory segments may have invalid IOVA */
+		if (ms->iova == RTE_BAD_IOVA) {
+			RTE_LOG(DEBUG, EAL, "Memory segment at %p has bad IOVA, skipping\n",
+					ms->addr);
+			goto next;
+		}
 		if (type == RTE_MEM_EVENT_ALLOC)
 			vfio_dma_mem_map(default_vfio_cfg, ms->addr_64,
 					ms->iova, ms->len, 1);
 		else
 			vfio_dma_mem_map(default_vfio_cfg, ms->addr_64,
 					ms->iova, ms->len, 0);
-
+next:
 		cur_len += ms->len;
 		++ms;
 	}
@@ -896,7 +910,15 @@ rte_vfio_enable(const char *modname)
 		return 0;
 	}
 
-	default_vfio_cfg->vfio_container_fd = rte_vfio_get_container_fd();
+	if (internal_config.process_type == RTE_PROC_PRIMARY) {
+		/* open a new container */
+		default_vfio_cfg->vfio_container_fd =
+				rte_vfio_get_container_fd();
+	} else {
+		/* get the default container from the primary process */
+		default_vfio_cfg->vfio_container_fd =
+				vfio_get_default_container_fd();
+	}
 
 	/* check if we have VFIO driver enabled */
 	if (default_vfio_cfg->vfio_container_fd != -1) {
@@ -916,6 +938,45 @@ rte_vfio_is_enabled(const char *modname)
 	return default_vfio_cfg->vfio_enabled && mod_available;
 }
 
+int
+vfio_get_default_container_fd(void)
+{
+	struct rte_mp_msg mp_req, *mp_rep;
+	struct rte_mp_reply mp_reply;
+	struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
+	struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param;
+
+	if (default_vfio_cfg->vfio_enabled)
+		return default_vfio_cfg->vfio_container_fd;
+
+	if (internal_config.process_type == RTE_PROC_PRIMARY) {
+		/* if we were secondary process we would try requesting
+		 * container fd from the primary, but we're the primary
+		 * process so just exit here
+		 */
+		return -1;
+	}
+
+	p->req = SOCKET_REQ_DEFAULT_CONTAINER;
+	strcpy(mp_req.name, EAL_VFIO_MP);
+	mp_req.len_param = sizeof(*p);
+	mp_req.num_fds = 0;
+
+	if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0 &&
+	    mp_reply.nb_received == 1) {
+		mp_rep = &mp_reply.msgs[0];
+		p = (struct vfio_mp_param *)mp_rep->param;
+		if (p->result == SOCKET_OK && mp_rep->num_fds == 1) {
+			free(mp_reply.msgs);
+			return mp_rep->fds[0];
+		}
+		free(mp_reply.msgs);
+	}
+
+	RTE_LOG(ERR, EAL, "  cannot request default container fd\n");
+	return -1;
+}
+
 const struct vfio_iommu_type *
 vfio_set_iommu_type(int vfio_container_fd)
 {
@@ -1028,8 +1089,9 @@ rte_vfio_get_container_fd(void)
 		mp_rep = &mp_reply.msgs[0];
 		p = (struct vfio_mp_param *)mp_rep->param;
 		if (p->result == SOCKET_OK && mp_rep->num_fds == 1) {
+			vfio_container_fd = mp_rep->fds[0];
 			free(mp_reply.msgs);
-			return mp_rep->fds[0];
+			return vfio_container_fd;
 		}
 		free(mp_reply.msgs);
 	}
@@ -1082,11 +1144,14 @@ rte_vfio_get_group_num(const char *sysfs_base,
 }
 
 static int
-type1_map(const struct rte_memseg_list *msl __rte_unused,
-		const struct rte_memseg *ms, void *arg)
+type1_map(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
+		void *arg)
 {
 	int *vfio_container_fd = arg;
 
+	if (msl->external)
+		return 0;
+
 	return vfio_type1_dma_mem_map(*vfio_container_fd, ms->addr_64, ms->iova,
 			ms->len, 1);
 }
@@ -1145,8 +1210,22 @@ vfio_spapr_dma_do_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 	struct vfio_iommu_type1_dma_map dma_map;
 	struct vfio_iommu_type1_dma_unmap dma_unmap;
 	int ret;
+	struct vfio_iommu_spapr_register_memory reg = {
+		.argsz = sizeof(reg),
+		.flags = 0
+	};
+	reg.vaddr = (uintptr_t) vaddr;
+	reg.size = len;
 
 	if (do_map != 0) {
+		ret = ioctl(vfio_container_fd,
+				VFIO_IOMMU_SPAPR_REGISTER_MEMORY, &reg);
+		if (ret) {
+			RTE_LOG(ERR, EAL, "  cannot register vaddr for IOMMU, "
+				"error %i (%s)\n", errno, strerror(errno));
+			return -1;
+		}
+
 		memset(&dma_map, 0, sizeof(dma_map));
 		dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);
 		dma_map.vaddr = vaddr;
@@ -1163,13 +1242,6 @@ vfio_spapr_dma_do_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 		}
 
 	} else {
-		struct vfio_iommu_spapr_register_memory reg = {
-			.argsz = sizeof(reg),
-			.flags = 0
-		};
-		reg.vaddr = (uintptr_t) vaddr;
-		reg.size = len;
-
 		ret = ioctl(vfio_container_fd,
 				VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY, &reg);
 		if (ret) {
@@ -1196,12 +1268,15 @@ vfio_spapr_dma_do_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 }
 
 static int
-vfio_spapr_map_walk(const struct rte_memseg_list *msl __rte_unused,
+vfio_spapr_map_walk(const struct rte_memseg_list *msl,
 		const struct rte_memseg *ms, void *arg)
 {
 	int *vfio_container_fd = arg;
 
-	return vfio_spapr_dma_mem_map(*vfio_container_fd, ms->addr_64, ms->iova,
+	if (msl->external)
+		return 0;
+
+	return vfio_spapr_dma_do_map(*vfio_container_fd, ms->addr_64, ms->iova,
 			ms->len, 1);
 }
 
@@ -1210,12 +1285,15 @@ struct spapr_walk_param {
 	uint64_t hugepage_sz;
 };
 static int
-vfio_spapr_window_size_walk(const struct rte_memseg_list *msl __rte_unused,
+vfio_spapr_window_size_walk(const struct rte_memseg_list *msl,
 		const struct rte_memseg *ms, void *arg)
 {
 	struct spapr_walk_param *param = arg;
 	uint64_t max = ms->iova + ms->len;
 
+	if (msl->external)
+		return 0;
+
 	if (max > param->window_size) {
 		param->hugepage_sz = ms->hugepage_sz;
 		param->window_size = max;
@@ -1670,9 +1748,6 @@ int
 rte_vfio_container_group_bind(int container_fd, int iommu_group_num)
 {
 	struct vfio_config *vfio_cfg;
-	struct vfio_group *cur_grp;
-	int vfio_group_fd;
-	int i;
 
 	vfio_cfg = get_vfio_cfg_by_container_fd(container_fd);
 	if (vfio_cfg == NULL) {
@@ -1680,36 +1755,7 @@ rte_vfio_container_group_bind(int container_fd, int iommu_group_num)
 		return -1;
 	}
 
-	/* Check room for new group */
-	if (vfio_cfg->vfio_active_groups == VFIO_MAX_GROUPS) {
-		RTE_LOG(ERR, EAL, "Maximum number of VFIO groups reached!\n");
-		return -1;
-	}
-
-	/* Get an index for the new group */
-	for (i = 0; i < VFIO_MAX_GROUPS; i++)
-		if (vfio_cfg->vfio_groups[i].group_num == -1) {
-			cur_grp = &vfio_cfg->vfio_groups[i];
-			break;
-		}
-
-	/* This should not happen */
-	if (i == VFIO_MAX_GROUPS) {
-		RTE_LOG(ERR, EAL, "No VFIO group free slot found\n");
-		return -1;
-	}
-
-	vfio_group_fd = vfio_open_group_fd(iommu_group_num);
-	if (vfio_group_fd < 0) {
-		RTE_LOG(ERR, EAL, "Failed to open group %d\n", iommu_group_num);
-		return -1;
-	}
-	cur_grp->group_num = iommu_group_num;
-	cur_grp->fd = vfio_group_fd;
-	cur_grp->devices = 0;
-	vfio_cfg->vfio_active_groups++;
-
-	return vfio_group_fd;
+	return vfio_get_group_fd(vfio_cfg, iommu_group_num);
 }
 
 int
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h b/lib/librte_eal/linuxapp/eal/eal_vfio.h
index 68d4750a..63ae115c 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.h
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h
@@ -115,6 +115,9 @@ struct vfio_iommu_type {
 	vfio_dma_func_t dma_map_func;
 };
 
+/* get the vfio container that devices are bound to by default */
+int vfio_get_default_container_fd(void);
+
 /* pick IOMMU type. returns a pointer to vfio_iommu_type or NULL for error */
 const struct vfio_iommu_type *
 vfio_set_iommu_type(int vfio_container_fd);
@@ -129,6 +132,7 @@ int vfio_mp_sync_setup(void);
 
 #define SOCKET_REQ_CONTAINER 0x100
 #define SOCKET_REQ_GROUP 0x200
+#define SOCKET_REQ_DEFAULT_CONTAINER 0x400
 #define SOCKET_OK 0x0
 #define SOCKET_NO_FD 0x1
 #define SOCKET_ERR 0xFF
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c b/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c
index 680a24aa..a1e8c834 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c
@@ -66,6 +66,17 @@ vfio_mp_primary(const struct rte_mp_msg *msg, const void *peer)
 			reply.fds[0] = fd;
 		}
 		break;
+	case SOCKET_REQ_DEFAULT_CONTAINER:
+		r->req = SOCKET_REQ_DEFAULT_CONTAINER;
+		fd = vfio_get_default_container_fd();
+		if (fd < 0)
+			r->result = SOCKET_ERR;
+		else {
+			r->result = SOCKET_OK;
+			reply.num_fds = 1;
+			reply.fds[0] = fd;
+		}
+		break;
 	default:
 		RTE_LOG(ERR, EAL, "vfio received invalid message!\n");
 		return -1;
diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
index cfa9448b..5afa0871 100644
--- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
+++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
@@ -8,6 +8,7 @@
 
 #ifdef __KERNEL__
 #include <linux/if.h>
+#include <asm/barrier.h>
 #define RTE_STD_C11
 #else
 #include <rte_common.h>
@@ -54,8 +55,13 @@ struct rte_kni_request {
  * Writing should never overwrite the read position
  */
 struct rte_kni_fifo {
+#ifdef RTE_USE_C11_MEM_MODEL
+	unsigned write;              /**< Next position to be written*/
+	unsigned read;               /**< Next position to be read */
+#else
 	volatile unsigned write;     /**< Next position to be written*/
 	volatile unsigned read;      /**< Next position to be read */
+#endif
 	unsigned len;                /**< Circular buffer length */
 	unsigned elem_size;          /**< Pointer size - for 32/64 bit OS */
 	void *volatile buffer[];     /**< The buffer contains mbuf pointers */
diff --git a/lib/librte_eal/meson.build b/lib/librte_eal/meson.build
index e1fde15d..a18f3a82 100644
--- a/lib/librte_eal/meson.build
+++ b/lib/librte_eal/meson.build
@@ -21,11 +21,10 @@ else
 	error('unsupported system type "@0@"'.format(host_machine.system()))
 endif
 
-version = 8  # the version of the EAL API
+version = 9  # the version of the EAL API
 allow_experimental_apis = true
 deps += 'compat'
 deps += 'kvargs'
-cflags += '-D_GNU_SOURCE'
 sources = common_sources + env_sources
 objs = common_objs + env_objs
 headers = common_headers + env_headers
diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
index 344a43d3..04f62424 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -19,9 +19,6 @@ DPDK_2.0 {
 	rte_dump_tailq;
 	rte_eal_alarm_cancel;
 	rte_eal_alarm_set;
-	rte_eal_devargs_add;
-	rte_eal_devargs_dump;
-	rte_eal_devargs_type_count;
 	rte_eal_get_configuration;
 	rte_eal_get_lcore_state;
 	rte_eal_get_physmem_size;
@@ -32,7 +29,6 @@ DPDK_2.0 {
 	rte_eal_lcore_role;
 	rte_eal_mp_remote_launch;
 	rte_eal_mp_wait_lcore;
-	rte_eal_parse_devargs_str;
 	rte_eal_process_type;
 	rte_eal_remote_launch;
 	rte_eal_tailq_lookup;
@@ -134,8 +130,6 @@ DPDK_16.11 {
 
 	rte_delay_us_block;
 	rte_delay_us_callback_register;
-	rte_eal_dev_attach;
-	rte_eal_dev_detach;
 
 } DPDK_16.07;
 
@@ -262,6 +256,16 @@ DPDK_18.08 {
 
 } DPDK_18.05;
 
+DPDK_18.11 {
+	global:
+
+	rte_eal_get_runtime_dir;
+	rte_eal_hotplug_add;
+	rte_eal_hotplug_remove;
+	rte_strscpy;
+
+} DPDK_18.08;
+
 EXPERIMENTAL {
 	global:
 
@@ -270,12 +274,19 @@ EXPERIMENTAL {
 	rte_class_register;
 	rte_class_unregister;
 	rte_ctrl_thread_create;
+	rte_delay_us_sleep;
+	rte_dev_event_callback_process;
 	rte_dev_event_callback_register;
 	rte_dev_event_callback_unregister;
 	rte_dev_event_monitor_start;
 	rte_dev_event_monitor_stop;
+	rte_dev_hotplug_handle_disable;
+	rte_dev_hotplug_handle_enable;
+	rte_dev_is_probed;
 	rte_dev_iterator_init;
 	rte_dev_iterator_next;
+	rte_dev_probe;
+	rte_dev_remove;
 	rte_devargs_add;
 	rte_devargs_dump;
 	rte_devargs_insert;
@@ -284,9 +295,8 @@ EXPERIMENTAL {
 	rte_devargs_parsef;
 	rte_devargs_remove;
 	rte_devargs_type_count;
+	rte_eal_check_dma_mask;
 	rte_eal_cleanup;
-	rte_eal_hotplug_add;
-	rte_eal_hotplug_remove;
 	rte_fbarray_attach;
 	rte_fbarray_destroy;
 	rte_fbarray_detach;
@@ -311,6 +321,14 @@ EXPERIMENTAL {
 	rte_fbarray_set_used;
 	rte_log_register_type_and_pick_level;
 	rte_malloc_dump_heaps;
+	rte_malloc_heap_create;
+	rte_malloc_heap_destroy;
+	rte_malloc_heap_get_socket;
+	rte_malloc_heap_memory_add;
+	rte_malloc_heap_memory_attach;
+	rte_malloc_heap_memory_detach;
+	rte_malloc_heap_memory_remove;
+	rte_malloc_heap_socket_is_external;
 	rte_mem_alloc_validator_register;
 	rte_mem_alloc_validator_unregister;
 	rte_mem_event_callback_register;
@@ -320,6 +338,10 @@ EXPERIMENTAL {
 	rte_mem_virt2memseg_list;
 	rte_memseg_contig_walk;
 	rte_memseg_contig_walk_thread_unsafe;
+	rte_memseg_get_fd;
+	rte_memseg_get_fd_offset;
+	rte_memseg_get_fd_thread_unsafe;
+	rte_memseg_get_fd_offset_thread_unsafe;
 	rte_memseg_list_walk;
 	rte_memseg_list_walk_thread_unsafe;
 	rte_memseg_walk;
@@ -330,6 +352,7 @@ EXPERIMENTAL {
 	rte_mp_request_sync;
 	rte_mp_request_async;
 	rte_mp_sendmsg;
+	rte_option_register;
 	rte_service_lcore_attr_get;
 	rte_service_lcore_attr_reset_all;
 	rte_service_may_be_active;
diff --git a/lib/librte_ethdev/Makefile b/lib/librte_ethdev/Makefile
index 0935a275..3e27ae46 100644
--- a/lib/librte_ethdev/Makefile
+++ b/lib/librte_ethdev/Makefile
@@ -12,13 +12,15 @@ CFLAGS += -DALLOW_EXPERIMENTAL_API
 CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS)
 LDLIBS += -lrte_net -lrte_eal -lrte_mempool -lrte_ring
-LDLIBS += -lrte_mbuf
+LDLIBS += -lrte_mbuf -lrte_kvargs -lrte_cmdline
 
 EXPORT_MAP := rte_ethdev_version.map
 
-LIBABIVER := 10
+LIBABIVER := 11
 
+SRCS-y += ethdev_private.c
 SRCS-y += rte_ethdev.c
+SRCS-y += rte_class_eth.c
 SRCS-y += rte_flow.c
 SRCS-y += rte_tm.c
 SRCS-y += rte_mtr.c
diff --git a/lib/librte_ethdev/ethdev_private.c b/lib/librte_ethdev/ethdev_private.c
new file mode 100644
index 00000000..162a502f
--- /dev/null
+++ b/lib/librte_ethdev/ethdev_private.c
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Gaëtan Rivet
+ */
+
+#include "rte_ethdev.h"
+#include "rte_ethdev_driver.h"
+#include "ethdev_private.h"
+
+uint16_t
+eth_dev_to_id(const struct rte_eth_dev *dev)
+{
+	if (dev == NULL)
+		return RTE_MAX_ETHPORTS;
+	return dev - rte_eth_devices;
+}
+
+struct rte_eth_dev *
+eth_find_device(const struct rte_eth_dev *start, rte_eth_cmp_t cmp,
+		const void *data)
+{
+	struct rte_eth_dev *edev;
+	ptrdiff_t idx;
+
+	/* Avoid Undefined Behaviour */
+	if (start != NULL &&
+	    (start < &rte_eth_devices[0] ||
+	     start > &rte_eth_devices[RTE_MAX_ETHPORTS]))
+		return NULL;
+	if (start != NULL)
+		idx = eth_dev_to_id(start) + 1;
+	else
+		idx = 0;
+	for (; idx < RTE_MAX_ETHPORTS; idx++) {
+		edev = &rte_eth_devices[idx];
+		if (cmp(edev, data) == 0)
+			return edev;
+	}
+	return NULL;
+}
+
+int
+rte_eth_devargs_parse_list(char *str, rte_eth_devargs_callback_t callback,
+	void *data)
+{
+	char *str_start;
+	int state;
+	int result;
+
+	if (*str != '[')
+		/* Single element, not a list */
+		return callback(str, data);
+
+	/* Sanity check, then strip the brackets */
+	str_start = &str[strlen(str) - 1];
+	if (*str_start != ']') {
+		RTE_LOG(ERR, EAL, "(%s): List does not end with ']'\n", str);
+		return -EINVAL;
+	}
+	str++;
+	*str_start = '\0';
+
+	/* Process list elements */
+	state = 0;
+	while (1) {
+		if (state == 0) {
+			if (*str == '\0')
+				break;
+			if (*str != ',') {
+				str_start = str;
+				state = 1;
+			}
+		} else if (state == 1) {
+			if (*str == ',' || *str == '\0') {
+				if (str > str_start) {
+					/* Non-empty string fragment */
+					*str = '\0';
+					result = callback(str_start, data);
+					if (result < 0)
+						return result;
+				}
+				state = 0;
+			}
+		}
+		str++;
+	}
+	return 0;
+}
+
+static int
+rte_eth_devargs_process_range(char *str, uint16_t *list, uint16_t *len_list,
+	const uint16_t max_list)
+{
+	uint16_t lo, hi, val;
+	int result;
+
+	result = sscanf(str, "%hu-%hu", &lo, &hi);
+	if (result == 1) {
+		if (*len_list >= max_list)
+			return -ENOMEM;
+		list[(*len_list)++] = lo;
+	} else if (result == 2) {
+		if (lo >= hi || lo > RTE_MAX_ETHPORTS || hi > RTE_MAX_ETHPORTS)
+			return -EINVAL;
+		for (val = lo; val <= hi; val++) {
+			if (*len_list >= max_list)
+				return -ENOMEM;
+			list[(*len_list)++] = val;
+		}
+	} else
+		return -EINVAL;
+	return 0;
+}
+
+int
+rte_eth_devargs_parse_representor_ports(char *str, void *data)
+{
+	struct rte_eth_devargs *eth_da = data;
+
+	return rte_eth_devargs_process_range(str, eth_da->representor_ports,
+		&eth_da->nb_representor_ports, RTE_MAX_ETHPORTS);
+}
diff --git a/lib/librte_ethdev/ethdev_private.h b/lib/librte_ethdev/ethdev_private.h
new file mode 100644
index 00000000..7b787bf9
--- /dev/null
+++ b/lib/librte_ethdev/ethdev_private.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Gaëtan Rivet
+ */
+
+#ifndef _RTE_ETH_PRIVATE_H_
+#define _RTE_ETH_PRIVATE_H_
+
+#include "rte_ethdev.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Convert rte_eth_dev pointer to port id.
+ * NULL will be translated to RTE_MAX_ETHPORTS.
+ */
+uint16_t eth_dev_to_id(const struct rte_eth_dev *dev);
+
+/* Generic rte_eth_dev comparison function. */
+typedef int (*rte_eth_cmp_t)(const struct rte_eth_dev *, const void *);
+
+/* Generic rte_eth_dev iterator. */
+struct rte_eth_dev *
+eth_find_device(const struct rte_eth_dev *_start, rte_eth_cmp_t cmp,
+		const void *data);
+
+/* Parse devargs value for representor parameter. */
+typedef int (*rte_eth_devargs_callback_t)(char *str, void *data);
+int rte_eth_devargs_parse_list(char *str, rte_eth_devargs_callback_t callback,
+	void *data);
+int rte_eth_devargs_parse_representor_ports(char *str, void *data);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_ETH_PRIVATE_H_ */
diff --git a/lib/librte_ethdev/ethdev_profile.c b/lib/librte_ethdev/ethdev_profile.c
index 0d1dcda3..a3c303f6 100644
--- a/lib/librte_ethdev/ethdev_profile.c
+++ b/lib/librte_ethdev/ethdev_profile.c
@@ -1,87 +1,33 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2010-2017 Intel Corporation
+ * Copyright(c) 2010-2018 Intel Corporation
  */
 
 #include "ethdev_profile.h"
 
 /**
- * This conditional block enables RX queues profiling by tracking wasted
- * iterations, i.e. iterations which yielded no RX packets. Profiling is
- * performed using the Instrumentation and Tracing Technology (ITT) API,
- * employed by the Intel (R) VTune (TM) Amplifier.
+ * This conditional block enables Ethernet device profiling with
+ * Intel (R) VTune (TM) Amplifier.
  */
-#ifdef RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS
-
-#include <ittnotify.h>
-
-#define ITT_MAX_NAME_LEN (100)
-
-/**
- * Auxiliary ITT structure belonging to Ethernet device and using to:
- *   -  track RX queue state to determine whether it is wasting loop iterations
- *   -  begin or end ITT task using task domain and task name (handle)
- */
-struct itt_profile_rx_data {
-	/**
-	 * ITT domains for each queue.
-	 */
-	__itt_domain *domains[RTE_MAX_QUEUES_PER_PORT];
-	/**
-	 * ITT task names for each queue.
-	 */
-	__itt_string_handle *handles[RTE_MAX_QUEUES_PER_PORT];
-	/**
-	 * Flags indicating the queues state. Possible values:
-	 *   1 - queue is wasting iterations,
-	 *   0 - otherwise.
-	 */
-	uint8_t queue_state[RTE_MAX_QUEUES_PER_PORT];
-};
-
-/**
- * The pool of *itt_profile_rx_data* structures.
- */
-struct itt_profile_rx_data itt_rx_data[RTE_MAX_ETHPORTS];
-
+#ifdef RTE_ETHDEV_PROFILE_WITH_VTUNE
 
 /**
- * This callback function manages ITT tasks collection on given port and queue.
- * It must be registered with rte_eth_add_rx_callback() to be called from
- * rte_eth_rx_burst(). To find more comments see rte_rx_callback_fn function
- * type declaration.
+ * Hook callback to trace rte_eth_rx_burst() calls.
  */
-static uint16_t
-collect_itt_rx_burst_cb(uint16_t port_id, uint16_t queue_id,
+uint16_t
+profile_hook_rx_burst_cb(
+	__rte_unused uint16_t port_id, __rte_unused uint16_t queue_id,
 	__rte_unused struct rte_mbuf *pkts[], uint16_t nb_pkts,
 	__rte_unused uint16_t max_pkts, __rte_unused void *user_param)
 {
-	if (unlikely(nb_pkts == 0)) {
-		if (!itt_rx_data[port_id].queue_state[queue_id]) {
-			__itt_task_begin(
-				itt_rx_data[port_id].domains[queue_id],
-				__itt_null, __itt_null,
-				itt_rx_data[port_id].handles[queue_id]);
-			itt_rx_data[port_id].queue_state[queue_id] = 1;
-		}
-	} else {
-		if (unlikely(itt_rx_data[port_id].queue_state[queue_id])) {
-			__itt_task_end(
-				itt_rx_data[port_id].domains[queue_id]);
-			itt_rx_data[port_id].queue_state[queue_id] = 0;
-		}
-	}
 	return nb_pkts;
 }
 
 /**
- * Initialization of itt_profile_rx_data for a given Ethernet device.
+ * Setting profiling rx callback for a given Ethernet device.
  * This function must be invoked when ethernet device is being configured.
- * Result will be stored in the global array *itt_rx_data*.
  *
  * @param port_id
  *  The port identifier of the Ethernet device.
- * @param port_name
- *  The name of the Ethernet device.
  * @param rx_queue_num
  *  The number of RX queues on specified port.
  *
@@ -90,46 +36,27 @@ collect_itt_rx_burst_cb(uint16_t port_id, uint16_t queue_id,
  *  - On failure, a negative value.
  */
 static inline int
-itt_profile_rx_init(uint16_t port_id, char *port_name, uint8_t rx_queue_num)
+vtune_profile_rx_init(uint16_t port_id, uint8_t rx_queue_num)
 {
 	uint16_t q_id;
 
 	for (q_id = 0; q_id < rx_queue_num; ++q_id) {
-		char domain_name[ITT_MAX_NAME_LEN];
-
-		snprintf(domain_name, sizeof(domain_name),
-			"RXBurst.WastedIterations.Port_%s.Queue_%d",
-			port_name, q_id);
-		itt_rx_data[port_id].domains[q_id]
-			= __itt_domain_create(domain_name);
-
-		char task_name[ITT_MAX_NAME_LEN];
-
-		snprintf(task_name, sizeof(task_name),
-			"port id: %d; queue id: %d",
-			port_id, q_id);
-		itt_rx_data[port_id].handles[q_id]
-			= __itt_string_handle_create(task_name);
-
-		itt_rx_data[port_id].queue_state[q_id] = 0;
-
 		if (!rte_eth_add_rx_callback(
-			port_id, q_id, collect_itt_rx_burst_cb, NULL)) {
+			port_id, q_id, profile_hook_rx_burst_cb, NULL)) {
 			return -rte_errno;
 		}
 	}
 
 	return 0;
 }
-#endif /* RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS */
+#endif /* RTE_ETHDEV_PROFILE_WITH_VTUNE */
 
 int
-__rte_eth_profile_rx_init(__rte_unused uint16_t port_id,
+__rte_eth_dev_profile_init(__rte_unused uint16_t port_id,
 	__rte_unused struct rte_eth_dev *dev)
 {
-#ifdef RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS
-	return itt_profile_rx_init(
-		port_id, dev->data->name, dev->data->nb_rx_queues);
+#ifdef RTE_ETHDEV_PROFILE_WITH_VTUNE
+	return vtune_profile_rx_init(port_id, dev->data->nb_rx_queues);
 #endif
 	return 0;
 }
diff --git a/lib/librte_ethdev/ethdev_profile.h b/lib/librte_ethdev/ethdev_profile.h
index e5ea3682..65031e6f 100644
--- a/lib/librte_ethdev/ethdev_profile.h
+++ b/lib/librte_ethdev/ethdev_profile.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2010-2017 Intel Corporation
+ * Copyright(c) 2010-2018 Intel Corporation
  */
 
 #ifndef _RTE_ETHDEV_PROFILE_H_
@@ -8,7 +8,7 @@
 #include "rte_ethdev.h"
 
 /**
- * Initialization of profiling RX queues for the Ethernet device.
+ * Initialization of the Ethernet device profiling.
  * Implementation of this function depends on chosen profiling method,
  * defined in configs.
  *
@@ -22,6 +22,6 @@
  *  - On failure, a negative value.
  */
 int
-__rte_eth_profile_rx_init(uint16_t port_id, struct rte_eth_dev *dev);
+__rte_eth_dev_profile_init(uint16_t port_id, struct rte_eth_dev *dev);
 
 #endif
diff --git a/lib/librte_ethdev/meson.build b/lib/librte_ethdev/meson.build
index 596cd0f3..a4d85026 100644
--- a/lib/librte_ethdev/meson.build
+++ b/lib/librte_ethdev/meson.build
@@ -2,9 +2,11 @@
 # Copyright(c) 2017 Intel Corporation
 
 name = 'ethdev'
-version = 10
+version = 11
 allow_experimental_apis = true
-sources = files('ethdev_profile.c',
+sources = files('ethdev_private.c',
+	'ethdev_profile.c',
+	'rte_class_eth.c',
 	'rte_ethdev.c',
 	'rte_flow.c',
 	'rte_mtr.c',
@@ -24,4 +26,4 @@ headers = files('rte_ethdev.h',
 	'rte_tm.h',
 	'rte_tm_driver.h')
 
-deps += ['net', 'kvargs']
+deps += ['net', 'kvargs', 'cmdline']
diff --git a/lib/librte_ethdev/rte_class_eth.c b/lib/librte_ethdev/rte_class_eth.c
new file mode 100644
index 00000000..cb99c92e
--- /dev/null
+++ b/lib/librte_ethdev/rte_class_eth.c
@@ -0,0 +1,173 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Gaëtan Rivet
+ */
+
+#include <string.h>
+
+#include <cmdline_parse_etheraddr.h>
+#include <rte_class.h>
+#include <rte_compat.h>
+#include <rte_errno.h>
+#include <rte_kvargs.h>
+#include <rte_log.h>
+
+#include "rte_ethdev.h"
+#include "rte_ethdev_core.h"
+#include "rte_ethdev_driver.h"
+#include "ethdev_private.h"
+
+enum eth_params {
+	RTE_ETH_PARAM_MAC,
+	RTE_ETH_PARAM_REPRESENTOR,
+	RTE_ETH_PARAM_MAX,
+};
+
+static const char * const eth_params_keys[] = {
+	[RTE_ETH_PARAM_MAC] = "mac",
+	[RTE_ETH_PARAM_REPRESENTOR] = "representor",
+	[RTE_ETH_PARAM_MAX] = NULL,
+};
+
+struct eth_dev_match_arg {
+	struct rte_device *device;
+	struct rte_kvargs *kvlist;
+};
+
+#define eth_dev_match_arg(d, k) \
+	(&(const struct eth_dev_match_arg) { \
+		.device = (d), \
+		.kvlist = (k), \
+	})
+
+static int
+eth_mac_cmp(const char *key __rte_unused,
+		const char *value, void *opaque)
+{
+	int ret;
+	struct ether_addr mac;
+	const struct rte_eth_dev_data *data = opaque;
+	struct rte_eth_dev_info dev_info;
+	uint32_t index;
+
+	/* Parse devargs MAC address. */
+	/*
+	 * cannot use ether_aton_r(value, &mac)
+	 * because of include conflict with rte_ether.h
+	 */
+	ret = cmdline_parse_etheraddr(NULL, value, &mac, sizeof(mac));
+	if (ret < 0)
+		return -1; /* invalid devargs value */
+
+	/* Return 0 if devargs MAC is matching one of the device MACs. */
+	rte_eth_dev_info_get(data->port_id, &dev_info);
+	for (index = 0; index < dev_info.max_mac_addrs; index++)
+		if (is_same_ether_addr(&mac, &data->mac_addrs[index]))
+			return 0;
+	return -1; /* no match */
+}
+
+static int
+eth_representor_cmp(const char *key __rte_unused,
+		const char *value, void *opaque)
+{
+	int ret;
+	char *values;
+	const struct rte_eth_dev_data *data = opaque;
+	struct rte_eth_devargs representors;
+	uint16_t index;
+
+	if ((data->dev_flags & RTE_ETH_DEV_REPRESENTOR) == 0)
+		return -1; /* not a representor port */
+
+	/* Parse devargs representor values. */
+	values = strdup(value);
+	if (values == NULL)
+		return -1;
+	memset(&representors, 0, sizeof(representors));
+	ret = rte_eth_devargs_parse_list(values,
+			rte_eth_devargs_parse_representor_ports,
+			&representors);
+	free(values);
+	if (ret != 0)
+		return -1; /* invalid devargs value */
+
+	/* Return 0 if representor id is matching one of the values. */
+	for (index = 0; index < representors.nb_representor_ports; index++)
+		if (data->representor_id ==
+				representors.representor_ports[index])
+			return 0;
+	return -1; /* no match */
+}
+
+static int
+eth_dev_match(const struct rte_eth_dev *edev,
+	      const void *_arg)
+{
+	int ret;
+	const struct eth_dev_match_arg *arg = _arg;
+	const struct rte_kvargs *kvlist = arg->kvlist;
+	unsigned int pair;
+
+	if (edev->state == RTE_ETH_DEV_UNUSED)
+		return -1;
+	if (arg->device != NULL && arg->device != edev->device)
+		return -1;
+
+	ret = rte_kvargs_process(kvlist,
+			eth_params_keys[RTE_ETH_PARAM_MAC],
+			eth_mac_cmp, edev->data);
+	if (ret != 0)
+		return -1;
+
+	ret = rte_kvargs_process(kvlist,
+			eth_params_keys[RTE_ETH_PARAM_REPRESENTOR],
+			eth_representor_cmp, edev->data);
+	if (ret != 0)
+		return -1;
+	/* search for representor key */
+	for (pair = 0; pair < kvlist->count; pair++) {
+		ret = strcmp(kvlist->pairs[pair].key,
+				eth_params_keys[RTE_ETH_PARAM_REPRESENTOR]);
+		if (ret == 0)
+			break; /* there is a representor key */
+	}
+	/* if no representor key, default is to not match representor ports */
+	if (ret != 0)
+		if ((edev->data->dev_flags & RTE_ETH_DEV_REPRESENTOR) != 0)
+			return -1; /* do not match any representor */
+
+	return 0;
+}
+
+static void *
+eth_dev_iterate(const void *start,
+		const char *str,
+		const struct rte_dev_iterator *it)
+{
+	struct rte_kvargs *kvargs = NULL;
+	struct rte_eth_dev *edev = NULL;
+	const char * const *valid_keys = NULL;
+
+	if (str != NULL) {
+		if (str[0] == '+') /* no validation of keys */
+			str++;
+		else
+			valid_keys = eth_params_keys;
+		kvargs = rte_kvargs_parse(str, valid_keys);
+		if (kvargs == NULL) {
+			RTE_LOG(ERR, EAL, "cannot parse argument list\n");
+			rte_errno = EINVAL;
+			return NULL;
+		}
+	}
+	edev = eth_find_device(start, eth_dev_match,
+			       eth_dev_match_arg(it->device, kvargs));
+	rte_kvargs_free(kvargs);
+	return edev;
+}
+
+static struct rte_class rte_class_eth = {
+	.dev_iterate = eth_dev_iterate,
+};
+
+RTE_REGISTER_CLASS(eth, rte_class_eth);
diff --git a/lib/librte_ethdev/rte_ethdev.c b/lib/librte_ethdev/rte_ethdev.c
index 4c320250..9d348138 100644
--- a/lib/librte_ethdev/rte_ethdev.c
+++ b/lib/librte_ethdev/rte_ethdev.c
@@ -36,11 +36,13 @@
 #include <rte_spinlock.h>
 #include <rte_string_fns.h>
 #include <rte_kvargs.h>
+#include <rte_class.h>
 
 #include "rte_ether.h"
 #include "rte_ethdev.h"
 #include "rte_ethdev_driver.h"
 #include "ethdev_profile.h"
+#include "ethdev_private.h"
 
 int rte_eth_dev_logtype;
 
@@ -122,11 +124,12 @@ static const struct {
 	RTE_RX_OFFLOAD_BIT2STR(VLAN_FILTER),
 	RTE_RX_OFFLOAD_BIT2STR(VLAN_EXTEND),
 	RTE_RX_OFFLOAD_BIT2STR(JUMBO_FRAME),
-	RTE_RX_OFFLOAD_BIT2STR(CRC_STRIP),
 	RTE_RX_OFFLOAD_BIT2STR(SCATTER),
 	RTE_RX_OFFLOAD_BIT2STR(TIMESTAMP),
 	RTE_RX_OFFLOAD_BIT2STR(SECURITY),
 	RTE_RX_OFFLOAD_BIT2STR(KEEP_CRC),
+	RTE_RX_OFFLOAD_BIT2STR(SCTP_CKSUM),
+	RTE_RX_OFFLOAD_BIT2STR(OUTER_UDP_CKSUM),
 };
 
 #undef RTE_RX_OFFLOAD_BIT2STR
@@ -156,6 +159,10 @@ static const struct {
 	RTE_TX_OFFLOAD_BIT2STR(MULTI_SEGS),
 	RTE_TX_OFFLOAD_BIT2STR(MBUF_FAST_FREE),
 	RTE_TX_OFFLOAD_BIT2STR(SECURITY),
+	RTE_TX_OFFLOAD_BIT2STR(UDP_TNL_TSO),
+	RTE_TX_OFFLOAD_BIT2STR(IP_TNL_TSO),
+	RTE_TX_OFFLOAD_BIT2STR(OUTER_UDP_CKSUM),
+	RTE_TX_OFFLOAD_BIT2STR(MATCH_METADATA),
 };
 
 #undef RTE_TX_OFFLOAD_BIT2STR
@@ -180,6 +187,146 @@ enum {
 	STAT_QMAP_RX
 };
 
+int __rte_experimental
+rte_eth_iterator_init(struct rte_dev_iterator *iter, const char *devargs_str)
+{
+	int ret;
+	struct rte_devargs devargs = {.args = NULL};
+	const char *bus_param_key;
+	char *bus_str = NULL;
+	char *cls_str = NULL;
+	int str_size;
+
+	memset(iter, 0, sizeof(*iter));
+
+	/*
+	 * The devargs string may use various syntaxes:
+	 *   - 0000:08:00.0,representor=[1-3]
+	 *   - pci:0000:06:00.0,representor=[0,5]
+	 *   - class=eth,mac=00:11:22:33:44:55
+	 * A new syntax is in development (not yet supported):
+	 *   - bus=X,paramX=x/class=Y,paramY=y/driver=Z,paramZ=z
+	 */
+
+	/*
+	 * Handle pure class filter (i.e. without any bus-level argument),
+	 * from future new syntax.
+	 * rte_devargs_parse() is not yet supporting the new syntax,
+	 * that's why this simple case is temporarily parsed here.
+	 */
+#define iter_anybus_str "class=eth,"
+	if (strncmp(devargs_str, iter_anybus_str,
+			strlen(iter_anybus_str)) == 0) {
+		iter->cls_str = devargs_str + strlen(iter_anybus_str);
+		goto end;
+	}
+
+	/* Split bus, device and parameters. */
+	ret = rte_devargs_parse(&devargs, devargs_str);
+	if (ret != 0)
+		goto error;
+
+	/*
+	 * Assume parameters of old syntax can match only at ethdev level.
+	 * Extra parameters will be ignored, thanks to "+" prefix.
+	 */
+	str_size = strlen(devargs.args) + 2;
+	cls_str = malloc(str_size);
+	if (cls_str == NULL) {
+		ret = -ENOMEM;
+		goto error;
+	}
+	ret = snprintf(cls_str, str_size, "+%s", devargs.args);
+	if (ret != str_size - 1) {
+		ret = -EINVAL;
+		goto error;
+	}
+	iter->cls_str = cls_str;
+	free(devargs.args); /* allocated by rte_devargs_parse() */
+	devargs.args = NULL;
+
+	iter->bus = devargs.bus;
+	if (iter->bus->dev_iterate == NULL) {
+		ret = -ENOTSUP;
+		goto error;
+	}
+
+	/* Convert bus args to new syntax for use with new API dev_iterate. */
+	if (strcmp(iter->bus->name, "vdev") == 0) {
+		bus_param_key = "name";
+	} else if (strcmp(iter->bus->name, "pci") == 0) {
+		bus_param_key = "addr";
+	} else {
+		ret = -ENOTSUP;
+		goto error;
+	}
+	str_size = strlen(bus_param_key) + strlen(devargs.name) + 2;
+	bus_str = malloc(str_size);
+	if (bus_str == NULL) {
+		ret = -ENOMEM;
+		goto error;
+	}
+	ret = snprintf(bus_str, str_size, "%s=%s",
+			bus_param_key, devargs.name);
+	if (ret != str_size - 1) {
+		ret = -EINVAL;
+		goto error;
+	}
+	iter->bus_str = bus_str;
+
+end:
+	iter->cls = rte_class_find_by_name("eth");
+	return 0;
+
+error:
+	if (ret == -ENOTSUP)
+		RTE_LOG(ERR, EAL, "Bus %s does not support iterating.\n",
+				iter->bus->name);
+	free(devargs.args);
+	free(bus_str);
+	free(cls_str);
+	return ret;
+}
+
+uint16_t __rte_experimental
+rte_eth_iterator_next(struct rte_dev_iterator *iter)
+{
+	if (iter->cls == NULL) /* invalid ethdev iterator */
+		return RTE_MAX_ETHPORTS;
+
+	do { /* loop to try all matching rte_device */
+		/* If not pure ethdev filter and */
+		if (iter->bus != NULL &&
+				/* not in middle of rte_eth_dev iteration, */
+				iter->class_device == NULL) {
+			/* get next rte_device to try. */
+			iter->device = iter->bus->dev_iterate(
+					iter->device, iter->bus_str, iter);
+			if (iter->device == NULL)
+				break; /* no more rte_device candidate */
+		}
+		/* A device is matching bus part, need to check ethdev part. */
+		iter->class_device = iter->cls->dev_iterate(
+				iter->class_device, iter->cls_str, iter);
+		if (iter->class_device != NULL)
+			return eth_dev_to_id(iter->class_device); /* match */
+	} while (iter->bus != NULL); /* need to try next rte_device */
+
+	/* No more ethdev port to iterate. */
+	rte_eth_iterator_cleanup(iter);
+	return RTE_MAX_ETHPORTS;
+}
+
+void __rte_experimental
+rte_eth_iterator_cleanup(struct rte_dev_iterator *iter)
+{
+	if (iter->bus_str == NULL)
+		return; /* nothing to free in pure class filter */
+	free(RTE_CAST_FIELD(iter, bus_str, char *)); /* workaround const */
+	free(RTE_CAST_FIELD(iter, cls_str, char *)); /* workaround const */
+	memset(iter, 0, sizeof(*iter));
+}
+
 uint16_t
 rte_eth_find_next(uint16_t port_id)
 {
@@ -366,13 +513,22 @@ rte_eth_dev_release_port(struct rte_eth_dev *eth_dev)
 
 	rte_eth_dev_shared_data_prepare();
 
-	_rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_DESTROY, NULL);
+	if (eth_dev->state != RTE_ETH_DEV_UNUSED)
+		_rte_eth_dev_callback_process(eth_dev,
+				RTE_ETH_EVENT_DESTROY, NULL);
 
 	rte_spinlock_lock(&rte_eth_dev_shared_data->ownership_lock);
 
 	eth_dev->state = RTE_ETH_DEV_UNUSED;
 
-	memset(eth_dev->data, 0, sizeof(struct rte_eth_dev_data));
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		rte_free(eth_dev->data->rx_queues);
+		rte_free(eth_dev->data->tx_queues);
+		rte_free(eth_dev->data->mac_addrs);
+		rte_free(eth_dev->data->hash_mac_addrs);
+		rte_free(eth_dev->data->dev_private);
+		memset(eth_dev->data, 0, sizeof(struct rte_eth_dev_data));
+	}
 
 	rte_spinlock_unlock(&rte_eth_dev_shared_data->ownership_lock);
 
@@ -393,11 +549,8 @@ static int
 rte_eth_is_valid_owner_id(uint64_t owner_id)
 {
 	if (owner_id == RTE_ETH_DEV_NO_OWNER ||
-	    rte_eth_dev_shared_data->next_owner_id <= owner_id) {
-		RTE_ETHDEV_LOG(ERR, "Invalid owner_id=%016"PRIx64"\n",
-			owner_id);
+	    rte_eth_dev_shared_data->next_owner_id <= owner_id)
 		return 0;
-	}
 	return 1;
 }
 
@@ -444,8 +597,12 @@ _rte_eth_dev_owner_set(const uint16_t port_id, const uint64_t old_owner_id,
 	}
 
 	if (!rte_eth_is_valid_owner_id(new_owner->id) &&
-	    !rte_eth_is_valid_owner_id(old_owner_id))
+	    !rte_eth_is_valid_owner_id(old_owner_id)) {
+		RTE_ETHDEV_LOG(ERR,
+			"Invalid owner old_id=%016"PRIx64" new_id=%016"PRIx64"\n",
+		       old_owner_id, new_owner->id);
 		return -EINVAL;
+	}
 
 	port_owner = &rte_eth_devices[port_id].data->owner;
 	if (port_owner->id != old_owner_id) {
@@ -516,9 +673,13 @@ rte_eth_dev_owner_delete(const uint64_t owner_id)
 			if (rte_eth_devices[port_id].data->owner.id == owner_id)
 				memset(&rte_eth_devices[port_id].data->owner, 0,
 				       sizeof(struct rte_eth_dev_owner));
-		RTE_ETHDEV_LOG(ERR,
+		RTE_ETHDEV_LOG(NOTICE,
 			"All port owners owned by %016"PRIx64" identifier have removed\n",
 			owner_id);
+	} else {
+		RTE_ETHDEV_LOG(ERR,
+			       "Invalid owner id=%016"PRIx64"\n",
+			       owner_id);
 	}
 
 	rte_spinlock_unlock(&rte_eth_dev_shared_data->ownership_lock);
@@ -642,87 +803,6 @@ eth_err(uint16_t port_id, int ret)
 	return ret;
 }
 
-/* attach the new device, then store port_id of the device */
-int
-rte_eth_dev_attach(const char *devargs, uint16_t *port_id)
-{
-	int current = rte_eth_dev_count_total();
-	struct rte_devargs da;
-	int ret = -1;
-
-	memset(&da, 0, sizeof(da));
-
-	if ((devargs == NULL) || (port_id == NULL)) {
-		ret = -EINVAL;
-		goto err;
-	}
-
-	/* parse devargs */
-	if (rte_devargs_parse(&da, devargs))
-		goto err;
-
-	ret = rte_eal_hotplug_add(da.bus->name, da.name, da.args);
-	if (ret < 0)
-		goto err;
-
-	/* no point looking at the port count if no port exists */
-	if (!rte_eth_dev_count_total()) {
-		RTE_ETHDEV_LOG(ERR, "No port found for device (%s)\n", da.name);
-		ret = -1;
-		goto err;
-	}
-
-	/* if nothing happened, there is a bug here, since some driver told us
-	 * it did attach a device, but did not create a port.
-	 * FIXME: race condition in case of plug-out of another device
-	 */
-	if (current == rte_eth_dev_count_total()) {
-		ret = -1;
-		goto err;
-	}
-
-	*port_id = eth_dev_last_created_port;
-	ret = 0;
-
-err:
-	free(da.args);
-	return ret;
-}
-
-/* detach the device, then store the name of the device */
-int
-rte_eth_dev_detach(uint16_t port_id, char *name __rte_unused)
-{
-	struct rte_device *dev;
-	struct rte_bus *bus;
-	uint32_t dev_flags;
-	int ret = -1;
-
-	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
-
-	dev_flags = rte_eth_devices[port_id].data->dev_flags;
-	if (dev_flags & RTE_ETH_DEV_BONDED_SLAVE) {
-		RTE_ETHDEV_LOG(ERR,
-			"Port %"PRIu16" is bonded, cannot detach\n", port_id);
-		return -ENOTSUP;
-	}
-
-	dev = rte_eth_devices[port_id].device;
-	if (dev == NULL)
-		return -EINVAL;
-
-	bus = rte_bus_find_by_device(dev);
-	if (bus == NULL)
-		return -ENOENT;
-
-	ret = rte_eal_hotplug_remove(bus->name, dev->name);
-	if (ret < 0)
-		return ret;
-
-	rte_eth_dev_release_port(&rte_eth_devices[port_id]);
-	return 0;
-}
-
 static int
 rte_eth_dev_rx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
 {
@@ -974,7 +1054,7 @@ rte_eth_speed_bitflag(uint32_t speed, int duplex)
 	}
 }
 
-const char * __rte_experimental
+const char *
 rte_eth_dev_rx_offload_name(uint64_t offload)
 {
 	const char *name = "UNKNOWN";
@@ -990,7 +1070,7 @@ rte_eth_dev_rx_offload_name(uint64_t offload)
 	return name;
 }
 
-const char * __rte_experimental
+const char *
 rte_eth_dev_tx_offload_name(uint64_t offload)
 {
 	const char *name = "UNKNOWN";
@@ -1142,14 +1222,6 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 		return -EINVAL;
 	}
 
-	if ((local_conf.rxmode.offloads & DEV_RX_OFFLOAD_CRC_STRIP) &&
-			(local_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC)) {
-		RTE_ETHDEV_LOG(ERR,
-			"Port id=%u not allowed to set both CRC STRIP and KEEP CRC offload flags\n",
-			port_id);
-		return -EINVAL;
-	}
-
 	/* Check that device supports requested rss hash functions. */
 	if ((dev_info.flow_type_rss_offloads |
 	     dev_conf->rx_adv_conf.rss_conf.rss_hf) !=
@@ -1191,9 +1263,9 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 	}
 
 	/* Initialize Rx profiling if enabled at compilation time. */
-	diag = __rte_eth_profile_rx_init(port_id, dev);
+	diag = __rte_eth_dev_profile_init(port_id, dev);
 	if (diag != 0) {
-		RTE_ETHDEV_LOG(ERR, "Port%u __rte_eth_profile_rx_init = %d\n",
+		RTE_ETHDEV_LOG(ERR, "Port%u __rte_eth_dev_profile_init = %d\n",
 			port_id, diag);
 		rte_eth_dev_rx_queue_config(dev, 0);
 		rte_eth_dev_tx_queue_config(dev, 0);
@@ -1219,19 +1291,14 @@ _rte_eth_dev_reset(struct rte_eth_dev *dev)
 }
 
 static void
-rte_eth_dev_config_restore(uint16_t port_id)
+rte_eth_dev_mac_restore(struct rte_eth_dev *dev,
+			struct rte_eth_dev_info *dev_info)
 {
-	struct rte_eth_dev *dev;
-	struct rte_eth_dev_info dev_info;
 	struct ether_addr *addr;
 	uint16_t i;
 	uint32_t pool = 0;
 	uint64_t pool_mask;
 
-	dev = &rte_eth_devices[port_id];
-
-	rte_eth_dev_info_get(port_id, &dev_info);
-
 	/* replay MAC address configuration including default MAC */
 	addr = &dev->data->mac_addrs[0];
 	if (*dev->dev_ops->mac_addr_set != NULL)
@@ -1240,7 +1307,7 @@ rte_eth_dev_config_restore(uint16_t port_id)
 		(*dev->dev_ops->mac_addr_add)(dev, addr, 0, pool);
 
 	if (*dev->dev_ops->mac_addr_add != NULL) {
-		for (i = 1; i < dev_info.max_mac_addrs; i++) {
+		for (i = 1; i < dev_info->max_mac_addrs; i++) {
 			addr = &dev->data->mac_addrs[i];
 
 			/* skip zero address */
@@ -1259,6 +1326,14 @@ rte_eth_dev_config_restore(uint16_t port_id)
 			} while (pool_mask);
 		}
 	}
+}
+
+static void
+rte_eth_dev_config_restore(struct rte_eth_dev *dev,
+			   struct rte_eth_dev_info *dev_info, uint16_t port_id)
+{
+	if (!(*dev_info->dev_flags & RTE_ETH_DEV_NOLIVE_MAC_ADDR))
+		rte_eth_dev_mac_restore(dev, dev_info);
 
 	/* replay promiscuous configuration */
 	if (rte_eth_promiscuous_get(port_id) == 1)
@@ -1277,6 +1352,7 @@ int
 rte_eth_dev_start(uint16_t port_id)
 {
 	struct rte_eth_dev *dev;
+	struct rte_eth_dev_info dev_info;
 	int diag;
 
 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
@@ -1292,13 +1368,19 @@ rte_eth_dev_start(uint16_t port_id)
 		return 0;
 	}
 
+	rte_eth_dev_info_get(port_id, &dev_info);
+
+	/* Lets restore MAC now if device does not support live change */
+	if (*dev_info.dev_flags & RTE_ETH_DEV_NOLIVE_MAC_ADDR)
+		rte_eth_dev_mac_restore(dev, &dev_info);
+
 	diag = (*dev->dev_ops->dev_start)(dev);
 	if (diag == 0)
 		dev->data->dev_started = 1;
 	else
 		return eth_err(port_id, diag);
 
-	rte_eth_dev_config_restore(port_id);
+	rte_eth_dev_config_restore(dev, &dev_info, port_id);
 
 	if (dev->data->dev_conf.intr_conf.lsc == 0) {
 		RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->link_update, -ENOTSUP);
@@ -1366,6 +1448,16 @@ rte_eth_dev_close(uint16_t port_id)
 	dev->data->dev_started = 0;
 	(*dev->dev_ops->dev_close)(dev);
 
+	/* check behaviour flag - temporary for PMD migration */
+	if ((dev->data->dev_flags & RTE_ETH_DEV_CLOSE_REMOVE) != 0) {
+		/* new behaviour: send event + reset state + free all data */
+		rte_eth_dev_release_port(dev);
+		return;
+	}
+	RTE_ETHDEV_LOG(DEBUG, "Port closing is using an old behaviour.\n"
+			"The driver %s should migrate to the new behaviour.\n",
+			dev->device->driver->name);
+	/* old behaviour: only free queue arrays */
 	dev->data->nb_rx_queues = 0;
 	rte_free(dev->data->rx_queues);
 	dev->data->rx_queues = NULL;
@@ -3425,6 +3517,43 @@ rte_eth_dev_rx_intr_ctl(uint16_t port_id, int epfd, int op, void *data)
 	return 0;
 }
 
+int __rte_experimental
+rte_eth_dev_rx_intr_ctl_q_get_fd(uint16_t port_id, uint16_t queue_id)
+{
+	struct rte_intr_handle *intr_handle;
+	struct rte_eth_dev *dev;
+	unsigned int efd_idx;
+	uint32_t vec;
+	int fd;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -1);
+
+	dev = &rte_eth_devices[port_id];
+
+	if (queue_id >= dev->data->nb_rx_queues) {
+		RTE_ETHDEV_LOG(ERR, "Invalid RX queue_id=%u\n", queue_id);
+		return -1;
+	}
+
+	if (!dev->intr_handle) {
+		RTE_ETHDEV_LOG(ERR, "RX Intr handle unset\n");
+		return -1;
+	}
+
+	intr_handle = dev->intr_handle;
+	if (!intr_handle->intr_vec) {
+		RTE_ETHDEV_LOG(ERR, "RX Intr vector unset\n");
+		return -1;
+	}
+
+	vec = intr_handle->intr_vec[queue_id];
+	efd_idx = (vec >= RTE_INTR_VEC_RXTX_OFFSET) ?
+		(vec - RTE_INTR_VEC_RXTX_OFFSET) : vec;
+	fd = intr_handle->efds[efd_idx];
+
+	return fd;
+}
+
 const struct rte_memzone *
 rte_eth_dma_zone_reserve(const struct rte_eth_dev *dev, const char *ring_name,
 			 uint16_t queue_id, size_t size, unsigned align,
@@ -3433,9 +3562,8 @@ rte_eth_dma_zone_reserve(const struct rte_eth_dev *dev, const char *ring_name,
 	char z_name[RTE_MEMZONE_NAMESIZE];
 	const struct rte_memzone *mz;
 
-	snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
-		 dev->device->driver->name, ring_name,
-		 dev->data->port_id, queue_id);
+	snprintf(z_name, sizeof(z_name), "eth_p%d_q%d_%s",
+		 dev->data->port_id, queue_id, ring_name);
 
 	mz = rte_memzone_lookup(z_name);
 	if (mz)
@@ -3459,10 +3587,8 @@ rte_eth_dev_create(struct rte_device *device, const char *name,
 
 	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
 		ethdev = rte_eth_dev_allocate(name);
-		if (!ethdev) {
-			retval = -ENODEV;
-			goto probe_failed;
-		}
+		if (!ethdev)
+			return -ENODEV;
 
 		if (priv_data_size) {
 			ethdev->data->dev_private = rte_zmalloc_socket(
@@ -3480,8 +3606,7 @@ rte_eth_dev_create(struct rte_device *device, const char *name,
 		if (!ethdev) {
 			RTE_LOG(ERR, EAL, "secondary process attach failed, "
 				"ethdev doesn't exist");
-			retval = -ENODEV;
-			goto probe_failed;
+			return  -ENODEV;
 		}
 	}
 
@@ -3505,13 +3630,9 @@ rte_eth_dev_create(struct rte_device *device, const char *name,
 	rte_eth_dev_probing_finish(ethdev);
 
 	return retval;
-probe_failed:
-	/* free ports private data if primary process */
-	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
-		rte_free(ethdev->data->dev_private);
 
+probe_failed:
 	rte_eth_dev_release_port(ethdev);
-
 	return retval;
 }
 
@@ -3532,11 +3653,6 @@ rte_eth_dev_destroy(struct rte_eth_dev *ethdev,
 			return ret;
 	}
 
-	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
-		rte_free(ethdev->data->dev_private);
-
-	ethdev->data->dev_private = NULL;
-
 	return rte_eth_dev_release_port(ethdev);
 }
 
@@ -4195,7 +4311,7 @@ enum rte_eth_switch_domain_state {
  * RTE_MAX_ETHPORTS elements as there cannot be more active switch domains than
  * ethdev ports in a single process.
  */
-struct rte_eth_dev_switch {
+static struct rte_eth_dev_switch {
 	enum rte_eth_switch_domain_state state;
 } rte_eth_switch_domains[RTE_MAX_ETHPORTS];
 
@@ -4236,8 +4352,6 @@ rte_eth_switch_domain_free(uint16_t domain_id)
 	return 0;
 }
 
-typedef int (*rte_eth_devargs_callback_t)(char *str, void *data);
-
 static int
 rte_eth_devargs_tokenise(struct rte_kvargs *arglist, const char *str_in)
 {
@@ -4302,89 +4416,6 @@ rte_eth_devargs_tokenise(struct rte_kvargs *arglist, const char *str_in)
 	}
 }
 
-static int
-rte_eth_devargs_parse_list(char *str, rte_eth_devargs_callback_t callback,
-	void *data)
-{
-	char *str_start;
-	int state;
-	int result;
-
-	if (*str != '[')
-		/* Single element, not a list */
-		return callback(str, data);
-
-	/* Sanity check, then strip the brackets */
-	str_start = &str[strlen(str) - 1];
-	if (*str_start != ']') {
-		RTE_LOG(ERR, EAL, "(%s): List does not end with ']'", str);
-		return -EINVAL;
-	}
-	str++;
-	*str_start = '\0';
-
-	/* Process list elements */
-	state = 0;
-	while (1) {
-		if (state == 0) {
-			if (*str == '\0')
-				break;
-			if (*str != ',') {
-				str_start = str;
-				state = 1;
-			}
-		} else if (state == 1) {
-			if (*str == ',' || *str == '\0') {
-				if (str > str_start) {
-					/* Non-empty string fragment */
-					*str = '\0';
-					result = callback(str_start, data);
-					if (result < 0)
-						return result;
-				}
-				state = 0;
-			}
-		}
-		str++;
-	}
-	return 0;
-}
-
-static int
-rte_eth_devargs_process_range(char *str, uint16_t *list, uint16_t *len_list,
-	const uint16_t max_list)
-{
-	uint16_t lo, hi, val;
-	int result;
-
-	result = sscanf(str, "%hu-%hu", &lo, &hi);
-	if (result == 1) {
-		if (*len_list >= max_list)
-			return -ENOMEM;
-		list[(*len_list)++] = lo;
-	} else if (result == 2) {
-		if (lo >= hi || lo > RTE_MAX_ETHPORTS || hi > RTE_MAX_ETHPORTS)
-			return -EINVAL;
-		for (val = lo; val <= hi; val++) {
-			if (*len_list >= max_list)
-				return -ENOMEM;
-			list[(*len_list)++] = val;
-		}
-	} else
-		return -EINVAL;
-	return 0;
-}
-
-
-static int
-rte_eth_devargs_parse_representor_ports(char *str, void *data)
-{
-	struct rte_eth_devargs *eth_da = data;
-
-	return rte_eth_devargs_process_range(str, eth_da->representor_ports,
-		&eth_da->nb_representor_ports, RTE_MAX_ETHPORTS);
-}
-
 int __rte_experimental
 rte_eth_devargs_parse(const char *dargs, struct rte_eth_devargs *eth_da)
 {
diff --git a/lib/librte_ethdev/rte_ethdev.h b/lib/librte_ethdev/rte_ethdev.h
index 7070e9ab..769a6943 100644
--- a/lib/librte_ethdev/rte_ethdev.h
+++ b/lib/librte_ethdev/rte_ethdev.h
@@ -167,6 +167,85 @@ extern int rte_eth_dev_logtype;
 struct rte_mbuf;
 
 /**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Initializes a device iterator.
+ *
+ * This iterator allows accessing a list of devices matching some devargs.
+ *
+ * @param iter
+ *   Device iterator handle initialized by the function.
+ *   The fields bus_str and cls_str might be dynamically allocated,
+ *   and could be freed by calling rte_eth_iterator_cleanup().
+ *
+ * @param devargs
+ *   Device description string.
+ *
+ * @return
+ *   0 on successful initialization, negative otherwise.
+ */
+__rte_experimental
+int rte_eth_iterator_init(struct rte_dev_iterator *iter, const char *devargs);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Iterates on devices with devargs filter.
+ * The ownership is not checked.
+ *
+ * The next port id is returned, and the iterator is updated.
+ *
+ * @param iter
+ *   Device iterator handle initialized by rte_eth_iterator_init().
+ *   Some fields bus_str and cls_str might be freed when no more port is found,
+ *   by calling rte_eth_iterator_cleanup().
+ *
+ * @return
+ *   A port id if found, RTE_MAX_ETHPORTS otherwise.
+ */
+__rte_experimental
+uint16_t rte_eth_iterator_next(struct rte_dev_iterator *iter);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Free some allocated fields of the iterator.
+ *
+ * This function is automatically called by rte_eth_iterator_next()
+ * on the last iteration (i.e. when no more matching port is found).
+ *
+ * It is safe to call this function twice; it will do nothing more.
+ *
+ * @param iter
+ *   Device iterator handle initialized by rte_eth_iterator_init().
+ *   The fields bus_str and cls_str are freed if needed.
+ */
+__rte_experimental
+void rte_eth_iterator_cleanup(struct rte_dev_iterator *iter);
+
+/**
+ * Macro to iterate over all ethdev ports matching some devargs.
+ *
+ * If a break is done before the end of the loop,
+ * the function rte_eth_iterator_cleanup() must be called.
+ *
+ * @param id
+ *   Iterated port id of type uint16_t.
+ * @param devargs
+ *   Device parameters input as string of type char*.
+ * @param iter
+ *   Iterator handle of type struct rte_dev_iterator, used internally.
+ */
+#define RTE_ETH_FOREACH_MATCHING_DEV(id, devargs, iter) \
+	for (rte_eth_iterator_init(iter, devargs), \
+	     id = rte_eth_iterator_next(iter); \
+	     id != RTE_MAX_ETHPORTS; \
+	     id = rte_eth_iterator_next(iter))
+
+/**
  * A structure used to retrieve statistics for an Ethernet port.
  * Not all statistics fields in struct rte_eth_stats are supported
  * by any type of network interface card (NIC). If any statistics
@@ -870,12 +949,6 @@ struct rte_eth_conf {
 };
 
 /**
- * A structure used to retrieve the contextual information of
- * an Ethernet device, such as the controlling driver of the device,
- * its PCI context, etc...
- */
-
-/**
  * RX offload capabilities of a device.
  */
 #define DEV_RX_OFFLOAD_VLAN_STRIP  0x00000001
@@ -890,16 +963,13 @@ struct rte_eth_conf {
 #define DEV_RX_OFFLOAD_VLAN_FILTER	0x00000200
 #define DEV_RX_OFFLOAD_VLAN_EXTEND	0x00000400
 #define DEV_RX_OFFLOAD_JUMBO_FRAME	0x00000800
-#define DEV_RX_OFFLOAD_CRC_STRIP	0x00001000
 #define DEV_RX_OFFLOAD_SCATTER		0x00002000
 #define DEV_RX_OFFLOAD_TIMESTAMP	0x00004000
 #define DEV_RX_OFFLOAD_SECURITY         0x00008000
-
-/**
- * Invalid to set both DEV_RX_OFFLOAD_CRC_STRIP and DEV_RX_OFFLOAD_KEEP_CRC
- * No DEV_RX_OFFLOAD_CRC_STRIP flag means keep CRC
- */
 #define DEV_RX_OFFLOAD_KEEP_CRC		0x00010000
+#define DEV_RX_OFFLOAD_SCTP_CKSUM	0x00020000
+#define DEV_RX_OFFLOAD_OUTER_UDP_CKSUM  0x00040000
+
 #define DEV_RX_OFFLOAD_CHECKSUM (DEV_RX_OFFLOAD_IPV4_CKSUM | \
 				 DEV_RX_OFFLOAD_UDP_CKSUM | \
 				 DEV_RX_OFFLOAD_TCP_CKSUM)
@@ -953,6 +1023,13 @@ struct rte_eth_conf {
  * for tunnel TSO.
  */
 #define DEV_TX_OFFLOAD_IP_TNL_TSO       0x00080000
+/** Device supports outer UDP checksum */
+#define DEV_TX_OFFLOAD_OUTER_UDP_CKSUM  0x00100000
+/**
+ * Device supports match on metadata Tx offload..
+ * Application must set PKT_TX_METADATA and mbuf metadata field.
+ */
+#define DEV_TX_OFFLOAD_MATCH_METADATA   0x00200000
 
 #define RTE_ETH_DEV_CAPA_RUNTIME_RX_QUEUE_SETUP 0x00000001
 /**< Device supports Rx queue setup after device started*/
@@ -1010,6 +1087,12 @@ struct rte_eth_switch_info {
 /**
  * Ethernet device information
  */
+
+/**
+ * A structure used to retrieve the contextual information of
+ * an Ethernet device, such as the controlling driver of the
+ * device, etc...
+ */
 struct rte_eth_dev_info {
 	struct rte_device *device; /** Generic device information */
 	const char *driver_name; /**< Device Driver name. */
@@ -1260,6 +1343,11 @@ struct rte_eth_dev_owner {
 	char name[RTE_ETH_MAX_OWNER_NAME_LEN]; /**< The owner name. */
 };
 
+/**
+ * Port is released (i.e. totally freed and data erased) on close.
+ * Temporary flag for PMD migration to new rte_eth_dev_close() behaviour.
+ */
+#define RTE_ETH_DEV_CLOSE_REMOVE 0x0001
 /** Device supports link state interrupt */
 #define RTE_ETH_DEV_INTR_LSC     0x0002
 /** Device is a bonded slave */
@@ -1268,6 +1356,8 @@ struct rte_eth_dev_owner {
 #define RTE_ETH_DEV_INTR_RMV     0x0008
 /** Device is port representor */
 #define RTE_ETH_DEV_REPRESENTOR  0x0010
+/** Device does not support MAC change after started */
+#define RTE_ETH_DEV_NOLIVE_MAC_ADDR  0x0020
 
 /**
  * Iterates over valid ethdev ports owned by a specific owner.
@@ -1420,37 +1510,6 @@ uint16_t rte_eth_dev_count_avail(void);
 uint16_t __rte_experimental rte_eth_dev_count_total(void);
 
 /**
- * Attach a new Ethernet device specified by arguments.
- *
- * @param devargs
- *  A pointer to a strings array describing the new device
- *  to be attached. The strings should be a pci address like
- *  '0000:01:00.0' or virtual device name like 'net_pcap0'.
- * @param port_id
- *  A pointer to a port identifier actually attached.
- * @return
- *  0 on success and port_id is filled, negative on error
- */
-__rte_deprecated
-int rte_eth_dev_attach(const char *devargs, uint16_t *port_id);
-
-/**
- * Detach a Ethernet device specified by port identifier.
- * This function must be called when the device is in the
- * closed state.
- *
- * @param port_id
- *   The port identifier of the device to detach.
- * @param devname
- *   A pointer to a buffer that will be filled with the device name.
- *   This buffer must be at least RTE_DEV_NAME_MAX_LEN long.
- * @return
- *  0 on success and devname is filled, negative on error
- */
-__rte_deprecated
-int rte_eth_dev_detach(uint16_t port_id, char *devname);
-
-/**
  * Convert a numerical speed in Mbps to a bitmap flag that can be used in
  * the bitmap link_speeds of the struct rte_eth_conf
  *
@@ -1464,9 +1523,6 @@ int rte_eth_dev_detach(uint16_t port_id, char *devname);
 uint32_t rte_eth_speed_bitflag(uint32_t speed, int duplex);
 
 /**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
  * Get DEV_RX_OFFLOAD_* flag name.
  *
  * @param offload
@@ -1474,12 +1530,9 @@ uint32_t rte_eth_speed_bitflag(uint32_t speed, int duplex);
  * @return
  *   Offload name or 'UNKNOWN' if the flag cannot be recognised.
  */
-const char * __rte_experimental rte_eth_dev_rx_offload_name(uint64_t offload);
+const char *rte_eth_dev_rx_offload_name(uint64_t offload);
 
 /**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
  * Get DEV_TX_OFFLOAD_* flag name.
  *
  * @param offload
@@ -1487,7 +1540,7 @@ const char * __rte_experimental rte_eth_dev_rx_offload_name(uint64_t offload);
  * @return
  *   Offload name or 'UNKNOWN' if the flag cannot be recognised.
  */
-const char * __rte_experimental rte_eth_dev_tx_offload_name(uint64_t offload);
+const char *rte_eth_dev_tx_offload_name(uint64_t offload);
 
 /**
  * Configure an Ethernet device.
@@ -1750,6 +1803,10 @@ int rte_eth_dev_tx_queue_stop(uint16_t port_id, uint16_t tx_queue_id);
  * The device start step is the last one and consists of setting the configured
  * offload features and in starting the transmit and the receive units of the
  * device.
+ *
+ * Device RTE_ETH_DEV_NOLIVE_MAC_ADDR flag causes MAC address to be set before
+ * PMD port start callback function is invoked.
+ *
  * On success, all basic functions exported by the Ethernet API (link status,
  * receive/transmit, and so on) can be invoked.
  *
@@ -1797,8 +1854,8 @@ int rte_eth_dev_set_link_down(uint16_t port_id);
 
 /**
  * Close a stopped Ethernet device. The device cannot be restarted!
- * The function frees all resources except for needed by the
- * closed state. To free these resources, call rte_eth_dev_detach().
+ * The function frees all port resources if the driver supports
+ * the flag RTE_ETH_DEV_CLOSE_REMOVE.
  *
  * @param port_id
  *   The port identifier of the Ethernet device.
@@ -2719,6 +2776,26 @@ int rte_eth_dev_rx_intr_ctl_q(uint16_t port_id, uint16_t queue_id,
 			      int epfd, int op, void *data);
 
 /**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Get interrupt fd per Rx queue.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param queue_id
+ *   The index of the receive queue from which to retrieve input packets.
+ *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @return
+ *   - (>=0) the interrupt fd associated to the requested Rx queue if
+ *           successful.
+ *   - (-1) on error.
+ */
+int __rte_experimental
+rte_eth_dev_rx_intr_ctl_q_get_fd(uint16_t port_id, uint16_t queue_id);
+
+/**
  * Turn on the LED on the Ethernet device.
  * This function turns on the LED on the Ethernet device.
  *
diff --git a/lib/librte_ethdev/rte_ethdev_core.h b/lib/librte_ethdev/rte_ethdev_core.h
index 33d12b3a..8f03f83f 100644
--- a/lib/librte_ethdev/rte_ethdev_core.h
+++ b/lib/librte_ethdev/rte_ethdev_core.h
@@ -539,7 +539,13 @@ struct rte_eth_dev {
 	eth_rx_burst_t rx_pkt_burst; /**< Pointer to PMD receive function. */
 	eth_tx_burst_t tx_pkt_burst; /**< Pointer to PMD transmit function. */
 	eth_tx_prep_t tx_pkt_prepare; /**< Pointer to PMD transmit prepare function. */
-	struct rte_eth_dev_data *data;  /**< Pointer to device data */
+	/**
+	 * Next two fields are per-device data but *data is shared between
+	 * primary and secondary processes and *process_private is per-process
+	 * private. The second one is managed by PMDs if necessary.
+	 */
+	struct rte_eth_dev_data *data;  /**< Pointer to device data. */
+	void *process_private; /**< Pointer to per-process device data. */
 	const struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */
 	struct rte_device *device; /**< Backing device */
 	struct rte_intr_handle *intr_handle; /**< Device interrupt handle */
@@ -579,24 +585,30 @@ struct rte_eth_dev_data {
 
 	struct rte_eth_dev_sriov sriov;    /**< SRIOV data */
 
-	void *dev_private;              /**< PMD-specific private data */
-
-	struct rte_eth_link dev_link;
-	/**< Link-level information & status */
+	void *dev_private;
+			/**< PMD-specific private data.
+			 *   @see rte_eth_dev_release_port()
+			 */
 
+	struct rte_eth_link dev_link;   /**< Link-level information & status. */
 	struct rte_eth_conf dev_conf;   /**< Configuration applied to device. */
 	uint16_t mtu;                   /**< Maximum Transmission Unit. */
-
 	uint32_t min_rx_buf_size;
-	/**< Common rx buffer size handled by all queues */
+			/**< Common RX buffer size handled by all queues. */
 
 	uint64_t rx_mbuf_alloc_failed; /**< RX ring mbuf allocation failures. */
-	struct ether_addr* mac_addrs;/**< Device Ethernet Link address. */
+	struct ether_addr *mac_addrs;
+			/**< Device Ethernet link address.
+			 *   @see rte_eth_dev_release_port()
+			 */
 	uint64_t mac_pool_sel[ETH_NUM_RECEIVE_MAC_ADDR];
-	/** bitmap array of associating Ethernet MAC addresses to pools */
-	struct ether_addr* hash_mac_addrs;
-	/** Device Ethernet MAC addresses of hash filtering. */
+			/**< Bitmap associating MAC addresses to pools. */
+	struct ether_addr *hash_mac_addrs;
+			/**< Device Ethernet MAC addresses of hash filtering.
+			 *   @see rte_eth_dev_release_port()
+			 */
 	uint16_t port_id;           /**< Device [external] port identifier. */
+
 	__extension__
 	uint8_t promiscuous   : 1, /**< RX promiscuous mode ON(1) / OFF(0). */
 		scattered_rx : 1,  /**< RX of scattered packets is ON(1) / OFF(0) */
@@ -604,15 +616,19 @@ struct rte_eth_dev_data {
 		dev_started : 1,   /**< Device state: STARTED(1) / STOPPED(0). */
 		lro         : 1;   /**< RX LRO is ON(1) / OFF(0) */
 	uint8_t rx_queue_state[RTE_MAX_QUEUES_PER_PORT];
-	/** Queues state: STARTED(1) / STOPPED(0) */
+			/**< Queues state: STARTED(1) / STOPPED(0). */
 	uint8_t tx_queue_state[RTE_MAX_QUEUES_PER_PORT];
-	/** Queues state: STARTED(1) / STOPPED(0) */
-	uint32_t dev_flags; /**< Capabilities */
-	enum rte_kernel_driver kdrv;    /**< Kernel driver passthrough */
-	int numa_node;  /**< NUMA node connection */
+			/**< Queues state: STARTED(1) / STOPPED(0). */
+	uint32_t dev_flags;             /**< Capabilities. */
+	enum rte_kernel_driver kdrv;    /**< Kernel driver passthrough. */
+	int numa_node;                  /**< NUMA node connection. */
 	struct rte_vlan_filter_conf vlan_filter_conf;
-	/**< VLAN filter configuration. */
+			/**< VLAN filter configuration. */
 	struct rte_eth_dev_owner owner; /**< The port owner. */
+	uint16_t representor_id;
+			/**< Switch-specific identifier.
+			 *   Valid if RTE_ETH_DEV_REPRESENTOR in dev_flags.
+			 */
 } __rte_cache_aligned;
 
 /**
diff --git a/lib/librte_ethdev/rte_ethdev_driver.h b/lib/librte_ethdev/rte_ethdev_driver.h
index c6d9bc1a..c2ac2632 100644
--- a/lib/librte_ethdev/rte_ethdev_driver.h
+++ b/lib/librte_ethdev/rte_ethdev_driver.h
@@ -58,10 +58,17 @@ struct rte_eth_dev *rte_eth_dev_attach_secondary(const char *name);
 
 /**
  * @internal
- * Release the specified ethdev port.
+ * Notify RTE_ETH_EVENT_DESTROY and release the specified ethdev port.
+ *
+ * The following PMD-managed data fields will be freed:
+ *   - dev_private
+ *   - mac_addrs
+ *   - hash_mac_addrs
+ * If one of these fields should not be freed,
+ * it must be reset to NULL by the PMD, typically in dev_close method.
  *
  * @param eth_dev
- * The *eth_dev* pointer is the address of the *rte_eth_dev* structure.
+ * Device to be detached.
  * @return
  *   - 0 on success, negative on error
  */
@@ -324,32 +331,6 @@ typedef int (*ethdev_uninit_t)(struct rte_eth_dev *ethdev);
 int __rte_experimental
 rte_eth_dev_destroy(struct rte_eth_dev *ethdev, ethdev_uninit_t ethdev_uninit);
 
-/**
- * PMD helper function to check if keeping CRC is requested
- *
- * @note
- * When CRC_STRIP offload flag is removed and default behavior switch to
- * strip CRC, as planned, this helper function is not that useful and will be
- * removed. In PMDs this function will be replaced with check:
- *   if (offloads & DEV_RX_OFFLOAD_KEEP_CRC)
- *
- * @param rx_offloads
- *   offload bits to be applied
- *
- * @return
- *   Return positive if keeping CRC is requested,
- *   zero if stripping CRC is requested
- */
-static inline int
-rte_eth_dev_must_keep_crc(uint64_t rx_offloads)
-{
-	if (rx_offloads & DEV_RX_OFFLOAD_CRC_STRIP)
-		return 0;
-
-	/* no KEEP_CRC or CRC_STRIP offload flags means keep CRC */
-	return 1;
-}
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_ethdev/rte_ethdev_pci.h b/lib/librte_ethdev/rte_ethdev_pci.h
index f652596f..23257e98 100644
--- a/lib/librte_ethdev/rte_ethdev_pci.h
+++ b/lib/librte_ethdev/rte_ethdev_pci.h
@@ -135,17 +135,6 @@ rte_eth_dev_pci_allocate(struct rte_pci_device *dev, size_t private_data_size)
 static inline void
 rte_eth_dev_pci_release(struct rte_eth_dev *eth_dev)
 {
-	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
-		rte_free(eth_dev->data->dev_private);
-
-	eth_dev->data->dev_private = NULL;
-
-	/*
-	 * Secondary process will check the name to attach.
-	 * Clear this field to avoid attaching a released ports.
-	 */
-	eth_dev->data->name[0] = '\0';
-
 	eth_dev->device = NULL;
 	eth_dev->intr_handle = NULL;
 
diff --git a/lib/librte_ethdev/rte_ethdev_version.map b/lib/librte_ethdev/rte_ethdev_version.map
index 38f117f0..3560c288 100644
--- a/lib/librte_ethdev/rte_ethdev_version.map
+++ b/lib/librte_ethdev/rte_ethdev_version.map
@@ -8,14 +8,12 @@ DPDK_2.2 {
 	rte_eth_allmulticast_get;
 	rte_eth_dev_allocate;
 	rte_eth_dev_allocated;
-	rte_eth_dev_attach;
 	rte_eth_dev_callback_register;
 	rte_eth_dev_callback_unregister;
 	rte_eth_dev_close;
 	rte_eth_dev_configure;
 	rte_eth_dev_count;
 	rte_eth_dev_default_mac_addr_set;
-	rte_eth_dev_detach;
 	rte_eth_dev_filter_supported;
 	rte_eth_dev_flow_ctrl_get;
 	rte_eth_dev_flow_ctrl_set;
@@ -220,6 +218,14 @@ DPDK_18.08 {
 
 } DPDK_18.05;
 
+DPDK_18.11 {
+	global:
+
+	rte_eth_dev_rx_offload_name;
+	rte_eth_dev_tx_offload_name;
+
+} DPDK_18.08;
+
 EXPERIMENTAL {
 	global:
 
@@ -235,10 +241,13 @@ EXPERIMENTAL {
 	rte_eth_dev_owner_new;
 	rte_eth_dev_owner_set;
 	rte_eth_dev_owner_unset;
-	rte_eth_dev_rx_offload_name;
-	rte_eth_dev_tx_offload_name;
+	rte_eth_dev_rx_intr_ctl_q_get_fd;
+	rte_eth_iterator_cleanup;
+	rte_eth_iterator_init;
+	rte_eth_iterator_next;
 	rte_eth_switch_domain_alloc;
 	rte_eth_switch_domain_free;
+	rte_flow_conv;
 	rte_flow_expand_rss;
 	rte_mtr_capabilities_get;
 	rte_mtr_create;
diff --git a/lib/librte_ethdev/rte_flow.c b/lib/librte_ethdev/rte_flow.c
index cff4b520..3277be1e 100644
--- a/lib/librte_ethdev/rte_flow.c
+++ b/lib/librte_ethdev/rte_flow.c
@@ -11,6 +11,7 @@
 #include <rte_common.h>
 #include <rte_errno.h>
 #include <rte_branch_prediction.h>
+#include <rte_string_fns.h>
 #include "rte_ethdev.h"
 #include "rte_flow_driver.h"
 #include "rte_flow.h"
@@ -50,10 +51,15 @@ static const struct rte_flow_desc_data rte_flow_desc_item[] = {
 	MK_FLOW_ITEM(TCP, sizeof(struct rte_flow_item_tcp)),
 	MK_FLOW_ITEM(SCTP, sizeof(struct rte_flow_item_sctp)),
 	MK_FLOW_ITEM(VXLAN, sizeof(struct rte_flow_item_vxlan)),
-	MK_FLOW_ITEM(MPLS, sizeof(struct rte_flow_item_mpls)),
-	MK_FLOW_ITEM(GRE, sizeof(struct rte_flow_item_gre)),
 	MK_FLOW_ITEM(E_TAG, sizeof(struct rte_flow_item_e_tag)),
 	MK_FLOW_ITEM(NVGRE, sizeof(struct rte_flow_item_nvgre)),
+	MK_FLOW_ITEM(MPLS, sizeof(struct rte_flow_item_mpls)),
+	MK_FLOW_ITEM(GRE, sizeof(struct rte_flow_item_gre)),
+	MK_FLOW_ITEM(FUZZY, sizeof(struct rte_flow_item_fuzzy)),
+	MK_FLOW_ITEM(GTP, sizeof(struct rte_flow_item_gtp)),
+	MK_FLOW_ITEM(GTPC, sizeof(struct rte_flow_item_gtp)),
+	MK_FLOW_ITEM(GTPU, sizeof(struct rte_flow_item_gtp)),
+	MK_FLOW_ITEM(ESP, sizeof(struct rte_flow_item_esp)),
 	MK_FLOW_ITEM(GENEVE, sizeof(struct rte_flow_item_geneve)),
 	MK_FLOW_ITEM(VXLAN_GPE, sizeof(struct rte_flow_item_vxlan_gpe)),
 	MK_FLOW_ITEM(ARP_ETH_IPV4, sizeof(struct rte_flow_item_arp_eth_ipv4)),
@@ -66,6 +72,8 @@ static const struct rte_flow_desc_data rte_flow_desc_item[] = {
 		     sizeof(struct rte_flow_item_icmp6_nd_opt_sla_eth)),
 	MK_FLOW_ITEM(ICMP6_ND_OPT_TLA_ETH,
 		     sizeof(struct rte_flow_item_icmp6_nd_opt_tla_eth)),
+	MK_FLOW_ITEM(MARK, sizeof(struct rte_flow_item_mark)),
+	MK_FLOW_ITEM(META, sizeof(struct rte_flow_item_meta)),
 };
 
 /** Generate flow_action[] entry. */
@@ -80,6 +88,7 @@ static const struct rte_flow_desc_data rte_flow_desc_action[] = {
 	MK_FLOW_ACTION(END, 0),
 	MK_FLOW_ACTION(VOID, 0),
 	MK_FLOW_ACTION(PASSTHRU, 0),
+	MK_FLOW_ACTION(JUMP, sizeof(struct rte_flow_action_jump)),
 	MK_FLOW_ACTION(MARK, sizeof(struct rte_flow_action_mark)),
 	MK_FLOW_ACTION(FLAG, 0),
 	MK_FLOW_ACTION(QUEUE, sizeof(struct rte_flow_action_queue)),
@@ -90,6 +99,8 @@ static const struct rte_flow_desc_data rte_flow_desc_action[] = {
 	MK_FLOW_ACTION(VF, sizeof(struct rte_flow_action_vf)),
 	MK_FLOW_ACTION(PHY_PORT, sizeof(struct rte_flow_action_phy_port)),
 	MK_FLOW_ACTION(PORT_ID, sizeof(struct rte_flow_action_port_id)),
+	MK_FLOW_ACTION(METER, sizeof(struct rte_flow_action_meter)),
+	MK_FLOW_ACTION(SECURITY, sizeof(struct rte_flow_action_security)),
 	MK_FLOW_ACTION(OF_SET_MPLS_TTL,
 		       sizeof(struct rte_flow_action_of_set_mpls_ttl)),
 	MK_FLOW_ACTION(OF_DEC_MPLS_TTL, 0),
@@ -109,6 +120,29 @@ static const struct rte_flow_desc_data rte_flow_desc_action[] = {
 		       sizeof(struct rte_flow_action_of_pop_mpls)),
 	MK_FLOW_ACTION(OF_PUSH_MPLS,
 		       sizeof(struct rte_flow_action_of_push_mpls)),
+	MK_FLOW_ACTION(VXLAN_ENCAP, sizeof(struct rte_flow_action_vxlan_encap)),
+	MK_FLOW_ACTION(VXLAN_DECAP, 0),
+	MK_FLOW_ACTION(NVGRE_ENCAP, sizeof(struct rte_flow_action_vxlan_encap)),
+	MK_FLOW_ACTION(NVGRE_DECAP, 0),
+	MK_FLOW_ACTION(RAW_ENCAP, sizeof(struct rte_flow_action_raw_encap)),
+	MK_FLOW_ACTION(RAW_DECAP, sizeof(struct rte_flow_action_raw_decap)),
+	MK_FLOW_ACTION(SET_IPV4_SRC,
+		       sizeof(struct rte_flow_action_set_ipv4)),
+	MK_FLOW_ACTION(SET_IPV4_DST,
+		       sizeof(struct rte_flow_action_set_ipv4)),
+	MK_FLOW_ACTION(SET_IPV6_SRC,
+		       sizeof(struct rte_flow_action_set_ipv6)),
+	MK_FLOW_ACTION(SET_IPV6_DST,
+		       sizeof(struct rte_flow_action_set_ipv6)),
+	MK_FLOW_ACTION(SET_TP_SRC,
+		       sizeof(struct rte_flow_action_set_tp)),
+	MK_FLOW_ACTION(SET_TP_DST,
+		       sizeof(struct rte_flow_action_set_tp)),
+	MK_FLOW_ACTION(MAC_SWAP, 0),
+	MK_FLOW_ACTION(DEC_TTL, 0),
+	MK_FLOW_ACTION(SET_TTL, sizeof(struct rte_flow_action_set_ttl)),
+	MK_FLOW_ACTION(SET_MAC_SRC, sizeof(struct rte_flow_action_set_mac)),
+	MK_FLOW_ACTION(SET_MAC_DST, sizeof(struct rte_flow_action_set_mac)),
 };
 
 static int
@@ -288,26 +322,41 @@ rte_flow_error_set(struct rte_flow_error *error,
 }
 
 /** Pattern item specification types. */
-enum item_spec_type {
-	ITEM_SPEC,
-	ITEM_LAST,
-	ITEM_MASK,
+enum rte_flow_conv_item_spec_type {
+	RTE_FLOW_CONV_ITEM_SPEC,
+	RTE_FLOW_CONV_ITEM_LAST,
+	RTE_FLOW_CONV_ITEM_MASK,
 };
 
-/** Compute storage space needed by item specification and copy it. */
+/**
+ * Copy pattern item specification.
+ *
+ * @param[out] buf
+ *   Output buffer. Can be NULL if @p size is zero.
+ * @param size
+ *   Size of @p buf in bytes.
+ * @param[in] item
+ *   Pattern item to copy specification from.
+ * @param type
+ *   Specification selector for either @p spec, @p last or @p mask.
+ *
+ * @return
+ *   Number of bytes needed to store pattern item specification regardless
+ *   of @p size. @p buf contents are truncated to @p size if not large
+ *   enough.
+ */
 static size_t
-flow_item_spec_copy(void *buf, const struct rte_flow_item *item,
-		    enum item_spec_type type)
+rte_flow_conv_item_spec(void *buf, const size_t size,
+			const struct rte_flow_item *item,
+			enum rte_flow_conv_item_spec_type type)
 {
-	size_t size = 0;
+	size_t off;
 	const void *data =
-		type == ITEM_SPEC ? item->spec :
-		type == ITEM_LAST ? item->last :
-		type == ITEM_MASK ? item->mask :
+		type == RTE_FLOW_CONV_ITEM_SPEC ? item->spec :
+		type == RTE_FLOW_CONV_ITEM_LAST ? item->last :
+		type == RTE_FLOW_CONV_ITEM_MASK ? item->mask :
 		NULL;
 
-	if (!item->spec || !data)
-		goto empty;
 	switch (item->type) {
 		union {
 			const struct rte_flow_item_raw *raw;
@@ -324,7 +373,7 @@ flow_item_spec_copy(void *buf, const struct rte_flow_item *item,
 		union {
 			struct rte_flow_item_raw *raw;
 		} dst;
-		size_t off;
+		size_t tmp;
 
 	case RTE_FLOW_ITEM_TYPE_RAW:
 		spec.raw = item->spec;
@@ -332,91 +381,466 @@ flow_item_spec_copy(void *buf, const struct rte_flow_item *item,
 		mask.raw = item->mask ? item->mask : &rte_flow_item_raw_mask;
 		src.raw = data;
 		dst.raw = buf;
-		off = RTE_ALIGN_CEIL(sizeof(struct rte_flow_item_raw),
-				     sizeof(*src.raw->pattern));
-		if (type == ITEM_SPEC ||
-		    (type == ITEM_MASK &&
+		rte_memcpy(dst.raw,
+			   (&(struct rte_flow_item_raw){
+				.relative = src.raw->relative,
+				.search = src.raw->search,
+				.reserved = src.raw->reserved,
+				.offset = src.raw->offset,
+				.limit = src.raw->limit,
+				.length = src.raw->length,
+			   }),
+			   size > sizeof(*dst.raw) ? sizeof(*dst.raw) : size);
+		off = sizeof(*dst.raw);
+		if (type == RTE_FLOW_CONV_ITEM_SPEC ||
+		    (type == RTE_FLOW_CONV_ITEM_MASK &&
 		     ((spec.raw->length & mask.raw->length) >=
 		      (last.raw->length & mask.raw->length))))
-			size = spec.raw->length & mask.raw->length;
+			tmp = spec.raw->length & mask.raw->length;
 		else
-			size = last.raw->length & mask.raw->length;
-		size = off + size * sizeof(*src.raw->pattern);
-		if (dst.raw) {
-			memcpy(dst.raw, src.raw, sizeof(*src.raw));
-			dst.raw->pattern = memcpy((uint8_t *)dst.raw + off,
-						  src.raw->pattern,
-						  size - off);
+			tmp = last.raw->length & mask.raw->length;
+		if (tmp) {
+			off = RTE_ALIGN_CEIL(off, sizeof(*dst.raw->pattern));
+			if (size >= off + tmp)
+				dst.raw->pattern = rte_memcpy
+					((void *)((uintptr_t)dst.raw + off),
+					 src.raw->pattern, tmp);
+			off += tmp;
 		}
 		break;
 	default:
-		size = rte_flow_desc_item[item->type].size;
-		if (buf)
-			memcpy(buf, data, size);
+		off = rte_flow_desc_item[item->type].size;
+		rte_memcpy(buf, data, (size > off ? off : size));
 		break;
 	}
-empty:
-	return RTE_ALIGN_CEIL(size, sizeof(double));
+	return off;
 }
 
-/** Compute storage space needed by action configuration and copy it. */
+/**
+ * Copy action configuration.
+ *
+ * @param[out] buf
+ *   Output buffer. Can be NULL if @p size is zero.
+ * @param size
+ *   Size of @p buf in bytes.
+ * @param[in] action
+ *   Action to copy configuration from.
+ *
+ * @return
+ *   Number of bytes needed to store pattern item specification regardless
+ *   of @p size. @p buf contents are truncated to @p size if not large
+ *   enough.
+ */
 static size_t
-flow_action_conf_copy(void *buf, const struct rte_flow_action *action)
+rte_flow_conv_action_conf(void *buf, const size_t size,
+			  const struct rte_flow_action *action)
 {
-	size_t size = 0;
+	size_t off;
 
-	if (!action->conf)
-		goto empty;
 	switch (action->type) {
 		union {
 			const struct rte_flow_action_rss *rss;
+			const struct rte_flow_action_vxlan_encap *vxlan_encap;
+			const struct rte_flow_action_nvgre_encap *nvgre_encap;
 		} src;
 		union {
 			struct rte_flow_action_rss *rss;
+			struct rte_flow_action_vxlan_encap *vxlan_encap;
+			struct rte_flow_action_nvgre_encap *nvgre_encap;
 		} dst;
-		size_t off;
+		size_t tmp;
+		int ret;
 
 	case RTE_FLOW_ACTION_TYPE_RSS:
 		src.rss = action->conf;
 		dst.rss = buf;
-		off = 0;
-		if (dst.rss)
-			*dst.rss = (struct rte_flow_action_rss){
+		rte_memcpy(dst.rss,
+			   (&(struct rte_flow_action_rss){
 				.func = src.rss->func,
 				.level = src.rss->level,
 				.types = src.rss->types,
 				.key_len = src.rss->key_len,
 				.queue_num = src.rss->queue_num,
-			};
-		off += sizeof(*src.rss);
+			   }),
+			   size > sizeof(*dst.rss) ? sizeof(*dst.rss) : size);
+		off = sizeof(*dst.rss);
 		if (src.rss->key_len) {
-			off = RTE_ALIGN_CEIL(off, sizeof(double));
-			size = sizeof(*src.rss->key) * src.rss->key_len;
-			if (dst.rss)
-				dst.rss->key = memcpy
+			off = RTE_ALIGN_CEIL(off, sizeof(*dst.rss->key));
+			tmp = sizeof(*src.rss->key) * src.rss->key_len;
+			if (size >= off + tmp)
+				dst.rss->key = rte_memcpy
 					((void *)((uintptr_t)dst.rss + off),
-					 src.rss->key, size);
-			off += size;
+					 src.rss->key, tmp);
+			off += tmp;
 		}
 		if (src.rss->queue_num) {
-			off = RTE_ALIGN_CEIL(off, sizeof(double));
-			size = sizeof(*src.rss->queue) * src.rss->queue_num;
-			if (dst.rss)
-				dst.rss->queue = memcpy
+			off = RTE_ALIGN_CEIL(off, sizeof(*dst.rss->queue));
+			tmp = sizeof(*src.rss->queue) * src.rss->queue_num;
+			if (size >= off + tmp)
+				dst.rss->queue = rte_memcpy
 					((void *)((uintptr_t)dst.rss + off),
-					 src.rss->queue, size);
-			off += size;
+					 src.rss->queue, tmp);
+			off += tmp;
+		}
+		break;
+	case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
+	case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
+		src.vxlan_encap = action->conf;
+		dst.vxlan_encap = buf;
+		RTE_BUILD_BUG_ON(sizeof(*src.vxlan_encap) !=
+				 sizeof(*src.nvgre_encap) ||
+				 offsetof(struct rte_flow_action_vxlan_encap,
+					  definition) !=
+				 offsetof(struct rte_flow_action_nvgre_encap,
+					  definition));
+		off = sizeof(*dst.vxlan_encap);
+		if (src.vxlan_encap->definition) {
+			off = RTE_ALIGN_CEIL
+				(off, sizeof(*dst.vxlan_encap->definition));
+			ret = rte_flow_conv
+				(RTE_FLOW_CONV_OP_PATTERN,
+				 (void *)((uintptr_t)dst.vxlan_encap + off),
+				 size > off ? size - off : 0,
+				 src.vxlan_encap->definition, NULL);
+			if (ret < 0)
+				return 0;
+			if (size >= off + ret)
+				dst.vxlan_encap->definition =
+					(void *)((uintptr_t)dst.vxlan_encap +
+						 off);
+			off += ret;
 		}
-		size = off;
 		break;
 	default:
-		size = rte_flow_desc_action[action->type].size;
-		if (buf)
-			memcpy(buf, action->conf, size);
+		off = rte_flow_desc_action[action->type].size;
+		rte_memcpy(buf, action->conf, (size > off ? off : size));
 		break;
 	}
-empty:
-	return RTE_ALIGN_CEIL(size, sizeof(double));
+	return off;
+}
+
+/**
+ * Copy a list of pattern items.
+ *
+ * @param[out] dst
+ *   Destination buffer. Can be NULL if @p size is zero.
+ * @param size
+ *   Size of @p dst in bytes.
+ * @param[in] src
+ *   Source pattern items.
+ * @param num
+ *   Maximum number of pattern items to process from @p src or 0 to process
+ *   the entire list. In both cases, processing stops after
+ *   RTE_FLOW_ITEM_TYPE_END is encountered.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   A positive value representing the number of bytes needed to store
+ *   pattern items regardless of @p size on success (@p buf contents are
+ *   truncated to @p size if not large enough), a negative errno value
+ *   otherwise and rte_errno is set.
+ */
+static int
+rte_flow_conv_pattern(struct rte_flow_item *dst,
+		      const size_t size,
+		      const struct rte_flow_item *src,
+		      unsigned int num,
+		      struct rte_flow_error *error)
+{
+	uintptr_t data = (uintptr_t)dst;
+	size_t off;
+	size_t ret;
+	unsigned int i;
+
+	for (i = 0, off = 0; !num || i != num; ++i, ++src, ++dst) {
+		if ((size_t)src->type >= RTE_DIM(rte_flow_desc_item) ||
+		    !rte_flow_desc_item[src->type].name)
+			return rte_flow_error_set
+				(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, src,
+				 "cannot convert unknown item type");
+		if (size >= off + sizeof(*dst))
+			*dst = (struct rte_flow_item){
+				.type = src->type,
+			};
+		off += sizeof(*dst);
+		if (!src->type)
+			num = i + 1;
+	}
+	num = i;
+	src -= num;
+	dst -= num;
+	do {
+		if (src->spec) {
+			off = RTE_ALIGN_CEIL(off, sizeof(double));
+			ret = rte_flow_conv_item_spec
+				((void *)(data + off),
+				 size > off ? size - off : 0, src,
+				 RTE_FLOW_CONV_ITEM_SPEC);
+			if (size && size >= off + ret)
+				dst->spec = (void *)(data + off);
+			off += ret;
+
+		}
+		if (src->last) {
+			off = RTE_ALIGN_CEIL(off, sizeof(double));
+			ret = rte_flow_conv_item_spec
+				((void *)(data + off),
+				 size > off ? size - off : 0, src,
+				 RTE_FLOW_CONV_ITEM_LAST);
+			if (size && size >= off + ret)
+				dst->last = (void *)(data + off);
+			off += ret;
+		}
+		if (src->mask) {
+			off = RTE_ALIGN_CEIL(off, sizeof(double));
+			ret = rte_flow_conv_item_spec
+				((void *)(data + off),
+				 size > off ? size - off : 0, src,
+				 RTE_FLOW_CONV_ITEM_MASK);
+			if (size && size >= off + ret)
+				dst->mask = (void *)(data + off);
+			off += ret;
+		}
+		++src;
+		++dst;
+	} while (--num);
+	return off;
+}
+
+/**
+ * Copy a list of actions.
+ *
+ * @param[out] dst
+ *   Destination buffer. Can be NULL if @p size is zero.
+ * @param size
+ *   Size of @p dst in bytes.
+ * @param[in] src
+ *   Source actions.
+ * @param num
+ *   Maximum number of actions to process from @p src or 0 to process the
+ *   entire list. In both cases, processing stops after
+ *   RTE_FLOW_ACTION_TYPE_END is encountered.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   A positive value representing the number of bytes needed to store
+ *   actions regardless of @p size on success (@p buf contents are truncated
+ *   to @p size if not large enough), a negative errno value otherwise and
+ *   rte_errno is set.
+ */
+static int
+rte_flow_conv_actions(struct rte_flow_action *dst,
+		      const size_t size,
+		      const struct rte_flow_action *src,
+		      unsigned int num,
+		      struct rte_flow_error *error)
+{
+	uintptr_t data = (uintptr_t)dst;
+	size_t off;
+	size_t ret;
+	unsigned int i;
+
+	for (i = 0, off = 0; !num || i != num; ++i, ++src, ++dst) {
+		if ((size_t)src->type >= RTE_DIM(rte_flow_desc_action) ||
+		    !rte_flow_desc_action[src->type].name)
+			return rte_flow_error_set
+				(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
+				 src, "cannot convert unknown action type");
+		if (size >= off + sizeof(*dst))
+			*dst = (struct rte_flow_action){
+				.type = src->type,
+			};
+		off += sizeof(*dst);
+		if (!src->type)
+			num = i + 1;
+	}
+	num = i;
+	src -= num;
+	dst -= num;
+	do {
+		if (src->conf) {
+			off = RTE_ALIGN_CEIL(off, sizeof(double));
+			ret = rte_flow_conv_action_conf
+				((void *)(data + off),
+				 size > off ? size - off : 0, src);
+			if (size && size >= off + ret)
+				dst->conf = (void *)(data + off);
+			off += ret;
+		}
+		++src;
+		++dst;
+	} while (--num);
+	return off;
+}
+
+/**
+ * Copy flow rule components.
+ *
+ * This comprises the flow rule descriptor itself, attributes, pattern and
+ * actions list. NULL components in @p src are skipped.
+ *
+ * @param[out] dst
+ *   Destination buffer. Can be NULL if @p size is zero.
+ * @param size
+ *   Size of @p dst in bytes.
+ * @param[in] src
+ *   Source flow rule descriptor.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   A positive value representing the number of bytes needed to store all
+ *   components including the descriptor regardless of @p size on success
+ *   (@p buf contents are truncated to @p size if not large enough), a
+ *   negative errno value otherwise and rte_errno is set.
+ */
+static int
+rte_flow_conv_rule(struct rte_flow_conv_rule *dst,
+		   const size_t size,
+		   const struct rte_flow_conv_rule *src,
+		   struct rte_flow_error *error)
+{
+	size_t off;
+	int ret;
+
+	rte_memcpy(dst,
+		   (&(struct rte_flow_conv_rule){
+			.attr = NULL,
+			.pattern = NULL,
+			.actions = NULL,
+		   }),
+		   size > sizeof(*dst) ? sizeof(*dst) : size);
+	off = sizeof(*dst);
+	if (src->attr_ro) {
+		off = RTE_ALIGN_CEIL(off, sizeof(double));
+		if (size && size >= off + sizeof(*dst->attr))
+			dst->attr = rte_memcpy
+				((void *)((uintptr_t)dst + off),
+				 src->attr_ro, sizeof(*dst->attr));
+		off += sizeof(*dst->attr);
+	}
+	if (src->pattern_ro) {
+		off = RTE_ALIGN_CEIL(off, sizeof(double));
+		ret = rte_flow_conv_pattern((void *)((uintptr_t)dst + off),
+					    size > off ? size - off : 0,
+					    src->pattern_ro, 0, error);
+		if (ret < 0)
+			return ret;
+		if (size && size >= off + (size_t)ret)
+			dst->pattern = (void *)((uintptr_t)dst + off);
+		off += ret;
+	}
+	if (src->actions_ro) {
+		off = RTE_ALIGN_CEIL(off, sizeof(double));
+		ret = rte_flow_conv_actions((void *)((uintptr_t)dst + off),
+					    size > off ? size - off : 0,
+					    src->actions_ro, 0, error);
+		if (ret < 0)
+			return ret;
+		if (size >= off + (size_t)ret)
+			dst->actions = (void *)((uintptr_t)dst + off);
+		off += ret;
+	}
+	return off;
+}
+
+/**
+ * Retrieve the name of a pattern item/action type.
+ *
+ * @param is_action
+ *   Nonzero when @p src represents an action type instead of a pattern item
+ *   type.
+ * @param is_ptr
+ *   Nonzero to write string address instead of contents into @p dst.
+ * @param[out] dst
+ *   Destination buffer. Can be NULL if @p size is zero.
+ * @param size
+ *   Size of @p dst in bytes.
+ * @param[in] src
+ *   Depending on @p is_action, source pattern item or action type cast as a
+ *   pointer.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   A positive value representing the number of bytes needed to store the
+ *   name or its address regardless of @p size on success (@p buf contents
+ *   are truncated to @p size if not large enough), a negative errno value
+ *   otherwise and rte_errno is set.
+ */
+static int
+rte_flow_conv_name(int is_action,
+		   int is_ptr,
+		   char *dst,
+		   const size_t size,
+		   const void *src,
+		   struct rte_flow_error *error)
+{
+	struct desc_info {
+		const struct rte_flow_desc_data *data;
+		size_t num;
+	};
+	static const struct desc_info info_rep[2] = {
+		{ rte_flow_desc_item, RTE_DIM(rte_flow_desc_item), },
+		{ rte_flow_desc_action, RTE_DIM(rte_flow_desc_action), },
+	};
+	const struct desc_info *const info = &info_rep[!!is_action];
+	unsigned int type = (uintptr_t)src;
+
+	if (type >= info->num)
+		return rte_flow_error_set
+			(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+			 "unknown object type to retrieve the name of");
+	if (!is_ptr)
+		return strlcpy(dst, info->data[type].name, size);
+	if (size >= sizeof(const char **))
+		*((const char **)dst) = info->data[type].name;
+	return sizeof(const char **);
+}
+
+/** Helper function to convert flow API objects. */
+int
+rte_flow_conv(enum rte_flow_conv_op op,
+	      void *dst,
+	      size_t size,
+	      const void *src,
+	      struct rte_flow_error *error)
+{
+	switch (op) {
+		const struct rte_flow_attr *attr;
+
+	case RTE_FLOW_CONV_OP_NONE:
+		return 0;
+	case RTE_FLOW_CONV_OP_ATTR:
+		attr = src;
+		if (size > sizeof(*attr))
+			size = sizeof(*attr);
+		rte_memcpy(dst, attr, size);
+		return sizeof(*attr);
+	case RTE_FLOW_CONV_OP_ITEM:
+		return rte_flow_conv_pattern(dst, size, src, 1, error);
+	case RTE_FLOW_CONV_OP_ACTION:
+		return rte_flow_conv_actions(dst, size, src, 1, error);
+	case RTE_FLOW_CONV_OP_PATTERN:
+		return rte_flow_conv_pattern(dst, size, src, 0, error);
+	case RTE_FLOW_CONV_OP_ACTIONS:
+		return rte_flow_conv_actions(dst, size, src, 0, error);
+	case RTE_FLOW_CONV_OP_RULE:
+		return rte_flow_conv_rule(dst, size, src, error);
+	case RTE_FLOW_CONV_OP_ITEM_NAME:
+		return rte_flow_conv_name(0, 0, dst, size, src, error);
+	case RTE_FLOW_CONV_OP_ACTION_NAME:
+		return rte_flow_conv_name(1, 0, dst, size, src, error);
+	case RTE_FLOW_CONV_OP_ITEM_NAME_PTR:
+		return rte_flow_conv_name(0, 1, dst, size, src, error);
+	case RTE_FLOW_CONV_OP_ACTION_NAME_PTR:
+		return rte_flow_conv_name(1, 1, dst, size, src, error);
+	}
+	return rte_flow_error_set
+		(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+		 "unknown object conversion operation");
 }
 
 /** Store a full rte_flow description. */
@@ -426,105 +850,49 @@ rte_flow_copy(struct rte_flow_desc *desc, size_t len,
 	      const struct rte_flow_item *items,
 	      const struct rte_flow_action *actions)
 {
-	struct rte_flow_desc *fd = NULL;
-	size_t tmp;
-	size_t off1 = 0;
-	size_t off2 = 0;
-	size_t size = 0;
-
-store:
-	if (items) {
-		const struct rte_flow_item *item;
-
-		item = items;
-		if (fd)
-			fd->items = (void *)&fd->data[off1];
-		do {
-			struct rte_flow_item *dst = NULL;
-
-			if ((size_t)item->type >=
-				RTE_DIM(rte_flow_desc_item) ||
-			    !rte_flow_desc_item[item->type].name) {
-				rte_errno = ENOTSUP;
-				return 0;
-			}
-			if (fd)
-				dst = memcpy(fd->data + off1, item,
-					     sizeof(*item));
-			off1 += sizeof(*item);
-			if (item->spec) {
-				if (fd)
-					dst->spec = fd->data + off2;
-				off2 += flow_item_spec_copy
-					(fd ? fd->data + off2 : NULL, item,
-					 ITEM_SPEC);
-			}
-			if (item->last) {
-				if (fd)
-					dst->last = fd->data + off2;
-				off2 += flow_item_spec_copy
-					(fd ? fd->data + off2 : NULL, item,
-					 ITEM_LAST);
-			}
-			if (item->mask) {
-				if (fd)
-					dst->mask = fd->data + off2;
-				off2 += flow_item_spec_copy
-					(fd ? fd->data + off2 : NULL, item,
-					 ITEM_MASK);
-			}
-			off2 = RTE_ALIGN_CEIL(off2, sizeof(double));
-		} while ((item++)->type != RTE_FLOW_ITEM_TYPE_END);
-		off1 = RTE_ALIGN_CEIL(off1, sizeof(double));
-	}
-	if (actions) {
-		const struct rte_flow_action *action;
-
-		action = actions;
-		if (fd)
-			fd->actions = (void *)&fd->data[off1];
-		do {
-			struct rte_flow_action *dst = NULL;
-
-			if ((size_t)action->type >=
-				RTE_DIM(rte_flow_desc_action) ||
-			    !rte_flow_desc_action[action->type].name) {
-				rte_errno = ENOTSUP;
-				return 0;
-			}
-			if (fd)
-				dst = memcpy(fd->data + off1, action,
-					     sizeof(*action));
-			off1 += sizeof(*action);
-			if (action->conf) {
-				if (fd)
-					dst->conf = fd->data + off2;
-				off2 += flow_action_conf_copy
-					(fd ? fd->data + off2 : NULL, action);
-			}
-			off2 = RTE_ALIGN_CEIL(off2, sizeof(double));
-		} while ((action++)->type != RTE_FLOW_ACTION_TYPE_END);
+	/*
+	 * Overlap struct rte_flow_conv with struct rte_flow_desc in order
+	 * to convert the former to the latter without wasting space.
+	 */
+	struct rte_flow_conv_rule *dst =
+		len ?
+		(void *)((uintptr_t)desc +
+			 (offsetof(struct rte_flow_desc, actions) -
+			  offsetof(struct rte_flow_conv_rule, actions))) :
+		NULL;
+	size_t dst_size =
+		len > sizeof(*desc) - sizeof(*dst) ?
+		len - (sizeof(*desc) - sizeof(*dst)) :
+		0;
+	struct rte_flow_conv_rule src = {
+		.attr_ro = NULL,
+		.pattern_ro = items,
+		.actions_ro = actions,
+	};
+	int ret;
+
+	RTE_BUILD_BUG_ON(sizeof(struct rte_flow_desc) <
+			 sizeof(struct rte_flow_conv_rule));
+	if (dst_size &&
+	    (&dst->pattern != &desc->items ||
+	     &dst->actions != &desc->actions ||
+	     (uintptr_t)(dst + 1) != (uintptr_t)(desc + 1))) {
+		rte_errno = EINVAL;
+		return 0;
 	}
-	if (fd != NULL)
-		return size;
-	off1 = RTE_ALIGN_CEIL(off1, sizeof(double));
-	tmp = RTE_ALIGN_CEIL(offsetof(struct rte_flow_desc, data),
-			     sizeof(double));
-	size = tmp + off1 + off2;
-	if (size > len)
-		return size;
-	fd = desc;
-	if (fd != NULL) {
-		*fd = (const struct rte_flow_desc) {
-			.size = size,
+	ret = rte_flow_conv(RTE_FLOW_CONV_OP_RULE, dst, dst_size, &src, NULL);
+	if (ret < 0)
+		return 0;
+	ret += sizeof(*desc) - sizeof(*dst);
+	rte_memcpy(desc,
+		   (&(struct rte_flow_desc){
+			.size = ret,
 			.attr = *attr,
-		};
-		tmp -= offsetof(struct rte_flow_desc, data);
-		off2 = tmp + off1;
-		off1 = tmp;
-		goto store;
-	}
-	return 0;
+			.items = dst_size ? dst->pattern : NULL,
+			.actions = dst_size ? dst->actions : NULL,
+		   }),
+		   len > sizeof(*desc) ? sizeof(*desc) : len);
+	return ret;
 }
 
 /**
diff --git a/lib/librte_ethdev/rte_flow.h b/lib/librte_ethdev/rte_flow.h
index f8ba71cd..c0fe8792 100644
--- a/lib/librte_ethdev/rte_flow.h
+++ b/lib/librte_ethdev/rte_flow.h
@@ -18,6 +18,7 @@
 #include <stdint.h>
 
 #include <rte_arp.h>
+#include <rte_common.h>
 #include <rte_ether.h>
 #include <rte_eth_ctrl.h>
 #include <rte_icmp.h>
@@ -413,6 +414,14 @@ enum rte_flow_item_type {
 	 * See struct rte_flow_item_mark.
 	 */
 	RTE_FLOW_ITEM_TYPE_MARK,
+
+	/**
+	 * [META]
+	 *
+	 * Matches a metadata value specified in mbuf metadata field.
+	 * See struct rte_flow_item_meta.
+	 */
+	RTE_FLOW_ITEM_TYPE_META,
 };
 
 /**
@@ -1156,6 +1165,22 @@ rte_flow_item_icmp6_nd_opt_tla_eth_mask = {
 #endif
 
 /**
+ * RTE_FLOW_ITEM_TYPE_META.
+ *
+ * Matches a specified metadata value.
+ */
+struct rte_flow_item_meta {
+	rte_be32_t data;
+};
+
+/** Default mask for RTE_FLOW_ITEM_TYPE_META. */
+#ifndef __cplusplus
+static const struct rte_flow_item_meta rte_flow_item_meta_mask = {
+	.data = RTE_BE32(UINT32_MAX),
+};
+#endif
+
+/**
  * @warning
  * @b EXPERIMENTAL: this structure may change without prior notice
  *
@@ -1505,6 +1530,127 @@ enum rte_flow_action_type {
 	 * error.
 	 */
 	RTE_FLOW_ACTION_TYPE_NVGRE_DECAP,
+
+	/**
+	 * Add outer header whose template is provided in its data buffer
+	 *
+	 * See struct rte_flow_action_raw_encap.
+	 */
+	RTE_FLOW_ACTION_TYPE_RAW_ENCAP,
+
+	/**
+	 * Remove outer header whose template is provided in its data buffer.
+	 *
+	 * See struct rte_flow_action_raw_decap
+	 */
+	RTE_FLOW_ACTION_TYPE_RAW_DECAP,
+
+	/**
+	 * Modify IPv4 source address in the outermost IPv4 header.
+	 *
+	 * If flow pattern does not define a valid RTE_FLOW_ITEM_TYPE_IPV4,
+	 * then the PMD should return a RTE_FLOW_ERROR_TYPE_ACTION error.
+	 *
+	 * See struct rte_flow_action_set_ipv4.
+	 */
+	RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC,
+
+	/**
+	 * Modify IPv4 destination address in the outermost IPv4 header.
+	 *
+	 * If flow pattern does not define a valid RTE_FLOW_ITEM_TYPE_IPV4,
+	 * then the PMD should return a RTE_FLOW_ERROR_TYPE_ACTION error.
+	 *
+	 * See struct rte_flow_action_set_ipv4.
+	 */
+	RTE_FLOW_ACTION_TYPE_SET_IPV4_DST,
+
+	/**
+	 * Modify IPv6 source address in the outermost IPv6 header.
+	 *
+	 * If flow pattern does not define a valid RTE_FLOW_ITEM_TYPE_IPV6,
+	 * then the PMD should return a RTE_FLOW_ERROR_TYPE_ACTION error.
+	 *
+	 * See struct rte_flow_action_set_ipv6.
+	 */
+	RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC,
+
+	/**
+	 * Modify IPv6 destination address in the outermost IPv6 header.
+	 *
+	 * If flow pattern does not define a valid RTE_FLOW_ITEM_TYPE_IPV6,
+	 * then the PMD should return a RTE_FLOW_ERROR_TYPE_ACTION error.
+	 *
+	 * See struct rte_flow_action_set_ipv6.
+	 */
+	RTE_FLOW_ACTION_TYPE_SET_IPV6_DST,
+
+	/**
+	 * Modify source port number in the outermost TCP/UDP header.
+	 *
+	 * If flow pattern does not define a valid RTE_FLOW_ITEM_TYPE_TCP
+	 * or RTE_FLOW_ITEM_TYPE_UDP, then the PMD should return a
+	 * RTE_FLOW_ERROR_TYPE_ACTION error.
+	 *
+	 * See struct rte_flow_action_set_tp.
+	 */
+	RTE_FLOW_ACTION_TYPE_SET_TP_SRC,
+
+	/**
+	 * Modify destination port number in the outermost TCP/UDP header.
+	 *
+	 * If flow pattern does not define a valid RTE_FLOW_ITEM_TYPE_TCP
+	 * or RTE_FLOW_ITEM_TYPE_UDP, then the PMD should return a
+	 * RTE_FLOW_ERROR_TYPE_ACTION error.
+	 *
+	 * See struct rte_flow_action_set_tp.
+	 */
+	RTE_FLOW_ACTION_TYPE_SET_TP_DST,
+
+	/**
+	 * Swap the source and destination MAC addresses in the outermost
+	 * Ethernet header.
+	 *
+	 * If flow pattern does not define a valid RTE_FLOW_ITEM_TYPE_ETH,
+	 * then the PMD should return a RTE_FLOW_ERROR_TYPE_ACTION error.
+	 *
+	 * No associated configuration structure.
+	 */
+	RTE_FLOW_ACTION_TYPE_MAC_SWAP,
+
+	/**
+	 * Decrease TTL value directly
+	 *
+	 * No associated configuration structure.
+	 */
+	RTE_FLOW_ACTION_TYPE_DEC_TTL,
+
+	/**
+	 * Set TTL value
+	 *
+	 * See struct rte_flow_action_set_ttl
+	 */
+	RTE_FLOW_ACTION_TYPE_SET_TTL,
+
+	/**
+	 * Set source MAC address from matched flow.
+	 *
+	 * If flow pattern does not define a valid RTE_FLOW_ITEM_TYPE_ETH,
+	 * the PMD should return a RTE_FLOW_ERROR_TYPE_ACTION error.
+	 *
+	 * See struct rte_flow_action_set_mac.
+	 */
+	RTE_FLOW_ACTION_TYPE_SET_MAC_SRC,
+
+	/**
+	 * Set destination MAC address from matched flow.
+	 *
+	 * If flow pattern does not define a valid RTE_FLOW_ITEM_TYPE_ETH,
+	 * the PMD should return a RTE_FLOW_ERROR_TYPE_ACTION error.
+	 *
+	 * See struct rte_flow_action_set_mac.
+	 */
+	RTE_FLOW_ACTION_TYPE_SET_MAC_DST,
 };
 
 /**
@@ -1868,6 +2014,114 @@ struct rte_flow_action_nvgre_encap {
 	struct rte_flow_item *definition;
 };
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this structure may change without prior notice
+ *
+ * RTE_FLOW_ACTION_TYPE_RAW_ENCAP
+ *
+ * Raw tunnel end-point encapsulation data definition.
+ *
+ * The data holds the headers definitions to be applied on the packet.
+ * The data must start with ETH header up to the tunnel item header itself.
+ * When used right after RAW_DECAP (for decapsulating L3 tunnel type for
+ * example MPLSoGRE) the data will just hold layer 2 header.
+ *
+ * The preserve parameter holds which bits in the packet the PMD is not allowed
+ * to change, this parameter can also be NULL and then the PMD is allowed
+ * to update any field.
+ *
+ * size holds the number of bytes in @p data and @p preserve.
+ */
+struct rte_flow_action_raw_encap {
+	uint8_t *data; /**< Encapsulation data. */
+	uint8_t *preserve; /**< Bit-mask of @p data to preserve on output. */
+	size_t size; /**< Size of @p data and @p preserve. */
+};
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this structure may change without prior notice
+ *
+ * RTE_FLOW_ACTION_TYPE_RAW_DECAP
+ *
+ * Raw tunnel end-point decapsulation data definition.
+ *
+ * The data holds the headers definitions to be removed from the packet.
+ * The data must start with ETH header up to the tunnel item header itself.
+ * When used right before RAW_DECAP (for encapsulating L3 tunnel type for
+ * example MPLSoGRE) the data will just hold layer 2 header.
+ *
+ * size holds the number of bytes in @p data.
+ */
+struct rte_flow_action_raw_decap {
+	uint8_t *data; /**< Encapsulation data. */
+	size_t size; /**< Size of @p data and @p preserve. */
+};
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this structure may change without prior notice
+ *
+ * RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC
+ * RTE_FLOW_ACTION_TYPE_SET_IPV4_DST
+ *
+ * Allows modification of IPv4 source (RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC)
+ * and destination address (RTE_FLOW_ACTION_TYPE_SET_IPV4_DST) in the
+ * specified outermost IPv4 header.
+ */
+struct rte_flow_action_set_ipv4 {
+	rte_be32_t ipv4_addr;
+};
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this structure may change without prior notice
+ *
+ * RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC
+ * RTE_FLOW_ACTION_TYPE_SET_IPV6_DST
+ *
+ * Allows modification of IPv6 source (RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC)
+ * and destination address (RTE_FLOW_ACTION_TYPE_SET_IPV6_DST) in the
+ * specified outermost IPv6 header.
+ */
+struct rte_flow_action_set_ipv6 {
+	uint8_t ipv6_addr[16];
+};
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this structure may change without prior notice
+ *
+ * RTE_FLOW_ACTION_TYPE_SET_TP_SRC
+ * RTE_FLOW_ACTION_TYPE_SET_TP_DST
+ *
+ * Allows modification of source (RTE_FLOW_ACTION_TYPE_SET_TP_SRC)
+ * and destination (RTE_FLOW_ACTION_TYPE_SET_TP_DST) port numbers
+ * in the specified outermost TCP/UDP header.
+ */
+struct rte_flow_action_set_tp {
+	rte_be16_t port;
+};
+
+/**
+ * RTE_FLOW_ACTION_TYPE_SET_TTL
+ *
+ * Set the TTL value directly for IPv4 or IPv6
+ */
+struct rte_flow_action_set_ttl {
+	uint8_t ttl_value;
+};
+
+/**
+ * RTE_FLOW_ACTION_TYPE_SET_MAC
+ *
+ * Set MAC address from the matched flow
+ */
+struct rte_flow_action_set_mac {
+	uint8_t mac_addr[ETHER_ADDR_LEN];
+};
+
 /*
  * Definition of a single action.
  *
@@ -1932,6 +2186,175 @@ struct rte_flow_error {
 };
 
 /**
+ * Complete flow rule description.
+ *
+ * This object type is used when converting a flow rule description.
+ *
+ * @see RTE_FLOW_CONV_OP_RULE
+ * @see rte_flow_conv()
+ */
+RTE_STD_C11
+struct rte_flow_conv_rule {
+	union {
+		const struct rte_flow_attr *attr_ro; /**< RO attributes. */
+		struct rte_flow_attr *attr; /**< Attributes. */
+	};
+	union {
+		const struct rte_flow_item *pattern_ro; /**< RO pattern. */
+		struct rte_flow_item *pattern; /**< Pattern items. */
+	};
+	union {
+		const struct rte_flow_action *actions_ro; /**< RO actions. */
+		struct rte_flow_action *actions; /**< List of actions. */
+	};
+};
+
+/**
+ * Conversion operations for flow API objects.
+ *
+ * @see rte_flow_conv()
+ */
+enum rte_flow_conv_op {
+	/**
+	 * No operation to perform.
+	 *
+	 * rte_flow_conv() simply returns 0.
+	 */
+	RTE_FLOW_CONV_OP_NONE,
+
+	/**
+	 * Convert attributes structure.
+	 *
+	 * This is a basic copy of an attributes structure.
+	 *
+	 * - @p src type:
+	 *   @code const struct rte_flow_attr * @endcode
+	 * - @p dst type:
+	 *   @code struct rte_flow_attr * @endcode
+	 */
+	RTE_FLOW_CONV_OP_ATTR,
+
+	/**
+	 * Convert a single item.
+	 *
+	 * Duplicates @p spec, @p last and @p mask but not outside objects.
+	 *
+	 * - @p src type:
+	 *   @code const struct rte_flow_item * @endcode
+	 * - @p dst type:
+	 *   @code struct rte_flow_item * @endcode
+	 */
+	RTE_FLOW_CONV_OP_ITEM,
+
+	/**
+	 * Convert a single action.
+	 *
+	 * Duplicates @p conf but not outside objects.
+	 *
+	 * - @p src type:
+	 *   @code const struct rte_flow_action * @endcode
+	 * - @p dst type:
+	 *   @code struct rte_flow_action * @endcode
+	 */
+	RTE_FLOW_CONV_OP_ACTION,
+
+	/**
+	 * Convert an entire pattern.
+	 *
+	 * Duplicates all pattern items at once with the same constraints as
+	 * RTE_FLOW_CONV_OP_ITEM.
+	 *
+	 * - @p src type:
+	 *   @code const struct rte_flow_item * @endcode
+	 * - @p dst type:
+	 *   @code struct rte_flow_item * @endcode
+	 */
+	RTE_FLOW_CONV_OP_PATTERN,
+
+	/**
+	 * Convert a list of actions.
+	 *
+	 * Duplicates the entire list of actions at once with the same
+	 * constraints as RTE_FLOW_CONV_OP_ACTION.
+	 *
+	 * - @p src type:
+	 *   @code const struct rte_flow_action * @endcode
+	 * - @p dst type:
+	 *   @code struct rte_flow_action * @endcode
+	 */
+	RTE_FLOW_CONV_OP_ACTIONS,
+
+	/**
+	 * Convert a complete flow rule description.
+	 *
+	 * Comprises attributes, pattern and actions together at once with
+	 * the usual constraints.
+	 *
+	 * - @p src type:
+	 *   @code const struct rte_flow_conv_rule * @endcode
+	 * - @p dst type:
+	 *   @code struct rte_flow_conv_rule * @endcode
+	 */
+	RTE_FLOW_CONV_OP_RULE,
+
+	/**
+	 * Convert item type to its name string.
+	 *
+	 * Writes a NUL-terminated string to @p dst. Like snprintf(), the
+	 * returned value excludes the terminator which is always written
+	 * nonetheless.
+	 *
+	 * - @p src type:
+	 *   @code (const void *)enum rte_flow_item_type @endcode
+	 * - @p dst type:
+	 *   @code char * @endcode
+	 **/
+	RTE_FLOW_CONV_OP_ITEM_NAME,
+
+	/**
+	 * Convert action type to its name string.
+	 *
+	 * Writes a NUL-terminated string to @p dst. Like snprintf(), the
+	 * returned value excludes the terminator which is always written
+	 * nonetheless.
+	 *
+	 * - @p src type:
+	 *   @code (const void *)enum rte_flow_action_type @endcode
+	 * - @p dst type:
+	 *   @code char * @endcode
+	 **/
+	RTE_FLOW_CONV_OP_ACTION_NAME,
+
+	/**
+	 * Convert item type to pointer to item name.
+	 *
+	 * Retrieves item name pointer from its type. The string itself is
+	 * not copied; instead, a unique pointer to an internal static
+	 * constant storage is written to @p dst.
+	 *
+	 * - @p src type:
+	 *   @code (const void *)enum rte_flow_item_type @endcode
+	 * - @p dst type:
+	 *   @code const char ** @endcode
+	 */
+	RTE_FLOW_CONV_OP_ITEM_NAME_PTR,
+
+	/**
+	 * Convert action type to pointer to action name.
+	 *
+	 * Retrieves action name pointer from its type. The string itself is
+	 * not copied; instead, a unique pointer to an internal static
+	 * constant storage is written to @p dst.
+	 *
+	 * - @p src type:
+	 *   @code (const void *)enum rte_flow_action_type @endcode
+	 * - @p dst type:
+	 *   @code const char ** @endcode
+	 */
+	RTE_FLOW_CONV_OP_ACTION_NAME_PTR,
+};
+
+/**
  * Check whether a flow rule can be created on a given port.
  *
  * The flow rule is validated for correctness and whether it could be accepted
@@ -2162,10 +2585,8 @@ rte_flow_error_set(struct rte_flow_error *error,
 		   const char *message);
 
 /**
- * Generic flow representation.
- *
- * This form is sufficient to describe an rte_flow independently from any
- * PMD implementation and allows for replayability and identification.
+ * @deprecated
+ * @see rte_flow_copy()
  */
 struct rte_flow_desc {
 	size_t size; /**< Allocated space including data[]. */
@@ -2176,8 +2597,14 @@ struct rte_flow_desc {
 };
 
 /**
+ * @deprecated
  * Copy an rte_flow rule description.
  *
+ * This interface is kept for compatibility with older applications but is
+ * implemented as a wrapper to rte_flow_conv(). It is deprecated due to its
+ * lack of flexibility and reliance on a type unusable with C++ programs
+ * (struct rte_flow_desc).
+ *
  * @param[in] fd
  *   Flow rule description.
  * @param[in] len
@@ -2195,12 +2622,61 @@ struct rte_flow_desc {
  *   If len is lower than the size of the flow, the number of bytes that would
  *   have been written to desc had it been sufficient. Nothing is written.
  */
+__rte_deprecated
 size_t
 rte_flow_copy(struct rte_flow_desc *fd, size_t len,
 	      const struct rte_flow_attr *attr,
 	      const struct rte_flow_item *items,
 	      const struct rte_flow_action *actions);
 
+/**
+ * Flow object conversion helper.
+ *
+ * This function performs conversion of various flow API objects to a
+ * pre-allocated destination buffer. See enum rte_flow_conv_op for possible
+ * operations and details about each of them.
+ *
+ * Since destination buffer must be large enough, it works in a manner
+ * reminiscent of snprintf():
+ *
+ * - If @p size is 0, @p dst may be a NULL pointer, otherwise @p dst must be
+ *   non-NULL.
+ * - If positive, the returned value represents the number of bytes needed
+ *   to store the conversion of @p src to @p dst according to @p op
+ *   regardless of the @p size parameter.
+ * - Since no more than @p size bytes can be written to @p dst, output is
+ *   truncated and may be inconsistent when the returned value is larger
+ *   than that.
+ * - In case of conversion error, a negative error code is returned and
+ *   @p dst contents are unspecified.
+ *
+ * @param op
+ *   Operation to perform, related to the object type of @p dst.
+ * @param[out] dst
+ *   Destination buffer address. Must be suitably aligned by the caller.
+ * @param size
+ *   Destination buffer size in bytes.
+ * @param[in] src
+ *   Source object to copy. Depending on @p op, its type may differ from
+ *   that of @p dst.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL. Initialized in case of
+ *   error only.
+ *
+ * @return
+ *   The number of bytes required to convert @p src to @p dst on success, a
+ *   negative errno value otherwise and rte_errno is set.
+ *
+ * @see rte_flow_conv_op
+ */
+__rte_experimental
+int
+rte_flow_conv(enum rte_flow_conv_op op,
+	      void *dst,
+	      size_t size,
+	      const void *src,
+	      struct rte_flow_error *error);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_ethdev/rte_tm.h b/lib/librte_ethdev/rte_tm.h
index 955f02ff..646ef388 100644
--- a/lib/librte_ethdev/rte_tm.h
+++ b/lib/librte_ethdev/rte_tm.h
@@ -831,10 +831,10 @@ enum rte_tm_cman_mode {
  */
 struct rte_tm_red_params {
 	/** Minimum queue threshold */
-	uint32_t min_th;
+	uint64_t min_th;
 
 	/** Maximum queue threshold */
-	uint32_t max_th;
+	uint64_t max_th;
 
 	/** Inverse of packet marking probability maximum value (maxp), i.e.
 	 * maxp_inv = 1 / maxp
diff --git a/lib/librte_eventdev/Makefile b/lib/librte_eventdev/Makefile
index 47f599a6..94961870 100644
--- a/lib/librte_eventdev/Makefile
+++ b/lib/librte_eventdev/Makefile
@@ -8,7 +8,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
 LIB = librte_eventdev.a
 
 # library version
-LIBABIVER := 5
+LIBABIVER := 6
 
 # build flags
 CFLAGS += -DALLOW_EXPERIMENTAL_API
@@ -28,6 +28,7 @@ SRCS-y += rte_event_ring.c
 SRCS-y += rte_event_eth_rx_adapter.c
 SRCS-y += rte_event_timer_adapter.c
 SRCS-y += rte_event_crypto_adapter.c
+SRCS-y += rte_event_eth_tx_adapter.c
 
 # export include files
 SYMLINK-y-include += rte_eventdev.h
@@ -39,6 +40,7 @@ SYMLINK-y-include += rte_event_eth_rx_adapter.h
 SYMLINK-y-include += rte_event_timer_adapter.h
 SYMLINK-y-include += rte_event_timer_adapter_pmd.h
 SYMLINK-y-include += rte_event_crypto_adapter.h
+SYMLINK-y-include += rte_event_eth_tx_adapter.h
 
 # versioning export map
 EXPORT_MAP := rte_eventdev_version.map
diff --git a/lib/librte_eventdev/meson.build b/lib/librte_eventdev/meson.build
index 3cbaf298..6becfe86 100644
--- a/lib/librte_eventdev/meson.build
+++ b/lib/librte_eventdev/meson.build
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
-version = 5
+version = 6
 allow_experimental_apis = true
 
 if host_machine.system() == 'linux'
@@ -14,7 +14,8 @@ sources = files('rte_eventdev.c',
 		'rte_event_ring.c',
 		'rte_event_eth_rx_adapter.c',
 		'rte_event_timer_adapter.c',
-		'rte_event_crypto_adapter.c')
+		'rte_event_crypto_adapter.c',
+		'rte_event_eth_tx_adapter.c')
 headers = files('rte_eventdev.h',
 		'rte_eventdev_pmd.h',
 		'rte_eventdev_pmd_pci.h',
@@ -23,5 +24,6 @@ headers = files('rte_eventdev.h',
 		'rte_event_eth_rx_adapter.h',
 		'rte_event_timer_adapter.h',
 		'rte_event_timer_adapter_pmd.h',
-		'rte_event_crypto_adapter.h')
+		'rte_event_crypto_adapter.h',
+		'rte_event_eth_tx_adapter.h')
 deps += ['ring', 'ethdev', 'hash', 'mempool', 'mbuf', 'timer', 'cryptodev']
diff --git a/lib/librte_eventdev/rte_event_eth_rx_adapter.c b/lib/librte_eventdev/rte_event_eth_rx_adapter.c
index f5e5a0b5..71d008cd 100644
--- a/lib/librte_eventdev/rte_event_eth_rx_adapter.c
+++ b/lib/librte_eventdev/rte_event_eth_rx_adapter.c
@@ -1125,7 +1125,6 @@ rxa_poll(struct rte_event_eth_rx_adapter *rx_adapter)
 	wrr_pos = rx_adapter->wrr_pos;
 	max_nb_rx = rx_adapter->max_nb_rx;
 	buf = &rx_adapter->event_enqueue_buffer;
-	stats = &rx_adapter->stats;
 
 	/* Iterate through a WRR sequence */
 	for (num_queue = 0; num_queue < rx_adapter->wrr_len; num_queue++) {
@@ -1998,8 +1997,7 @@ rte_event_eth_rx_adapter_create_ext(uint8_t id, uint8_t dev_id,
 	rx_adapter->id = id;
 	strcpy(rx_adapter->mem_name, mem_name);
 	rx_adapter->eth_devices = rte_zmalloc_socket(rx_adapter->mem_name,
-					/* FIXME: incompatible with hotplug */
-					rte_eth_dev_count_total() *
+					RTE_MAX_ETHPORTS *
 					sizeof(struct eth_device_info), 0,
 					socket_id);
 	rte_convert_rss_key((const uint32_t *)default_rss_key,
@@ -2012,7 +2010,7 @@ rte_event_eth_rx_adapter_create_ext(uint8_t id, uint8_t dev_id,
 		return -ENOMEM;
 	}
 	rte_spinlock_init(&rx_adapter->rx_lock);
-	RTE_ETH_FOREACH_DEV(i)
+	for (i = 0; i < RTE_MAX_ETHPORTS; i++)
 		rx_adapter->eth_devices[i].dev = &rte_eth_devices[i];
 
 	event_eth_rx_adapter[id] = rx_adapter;
diff --git a/lib/librte_eventdev/rte_event_eth_rx_adapter.h b/lib/librte_eventdev/rte_event_eth_rx_adapter.h
index 332ee216..863b72a1 100644
--- a/lib/librte_eventdev/rte_event_eth_rx_adapter.h
+++ b/lib/librte_eventdev/rte_event_eth_rx_adapter.h
@@ -76,10 +76,6 @@
  * rte_event_eth_rx_adapter_cb_register() function allows the
  * application to register a callback that selects which packets to enqueue
  * to the event device.
- *
- * Note:
- * 1) Devices created after an instance of rte_event_eth_rx_adapter_create
- *  should be added to a new instance of the rx adapter.
  */
 
 #ifdef __cplusplus
diff --git a/lib/librte_eventdev/rte_event_eth_tx_adapter.c b/lib/librte_eventdev/rte_event_eth_tx_adapter.c
new file mode 100644
index 00000000..3a21defb
--- /dev/null
+++ b/lib/librte_eventdev/rte_event_eth_tx_adapter.c
@@ -0,0 +1,1138 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation.
+ */
+#include <rte_spinlock.h>
+#include <rte_service_component.h>
+#include <rte_ethdev.h>
+
+#include "rte_eventdev_pmd.h"
+#include "rte_event_eth_tx_adapter.h"
+
+#define TXA_BATCH_SIZE		32
+#define TXA_SERVICE_NAME_LEN	32
+#define TXA_MEM_NAME_LEN	32
+#define TXA_FLUSH_THRESHOLD	1024
+#define TXA_RETRY_CNT		100
+#define TXA_MAX_NB_TX		128
+#define TXA_INVALID_DEV_ID	INT32_C(-1)
+#define TXA_INVALID_SERVICE_ID	INT64_C(-1)
+
+#define txa_evdev(id) (&rte_eventdevs[txa_dev_id_array[(id)]])
+
+#define txa_dev_caps_get(id) txa_evdev((id))->dev_ops->eth_tx_adapter_caps_get
+
+#define txa_dev_adapter_create(t) txa_evdev(t)->dev_ops->eth_tx_adapter_create
+
+#define txa_dev_adapter_create_ext(t) \
+				txa_evdev(t)->dev_ops->eth_tx_adapter_create
+
+#define txa_dev_adapter_free(t) txa_evdev(t)->dev_ops->eth_tx_adapter_free
+
+#define txa_dev_queue_add(id) txa_evdev(id)->dev_ops->eth_tx_adapter_queue_add
+
+#define txa_dev_queue_del(t) txa_evdev(t)->dev_ops->eth_tx_adapter_queue_del
+
+#define txa_dev_start(t) txa_evdev(t)->dev_ops->eth_tx_adapter_start
+
+#define txa_dev_stop(t) txa_evdev(t)->dev_ops->eth_tx_adapter_stop
+
+#define txa_dev_stats_reset(t) txa_evdev(t)->dev_ops->eth_tx_adapter_stats_reset
+
+#define txa_dev_stats_get(t) txa_evdev(t)->dev_ops->eth_tx_adapter_stats_get
+
+#define RTE_EVENT_ETH_TX_ADAPTER_ID_VALID_OR_ERR_RET(id, retval) \
+do { \
+	if (!txa_valid_id(id)) { \
+		RTE_EDEV_LOG_ERR("Invalid eth Rx adapter id = %d", id); \
+		return retval; \
+	} \
+} while (0)
+
+#define TXA_CHECK_OR_ERR_RET(id) \
+do {\
+	int ret; \
+	RTE_EVENT_ETH_TX_ADAPTER_ID_VALID_OR_ERR_RET((id), -EINVAL); \
+	ret = txa_init(); \
+	if (ret != 0) \
+		return ret; \
+	if (!txa_adapter_exist((id))) \
+		return -EINVAL; \
+} while (0)
+
+/* Tx retry callback structure */
+struct txa_retry {
+	/* Ethernet port id */
+	uint16_t port_id;
+	/* Tx queue */
+	uint16_t tx_queue;
+	/* Adapter ID */
+	uint8_t id;
+};
+
+/* Per queue structure */
+struct txa_service_queue_info {
+	/* Queue has been added */
+	uint8_t added;
+	/* Retry callback argument */
+	struct txa_retry txa_retry;
+	/* Tx buffer */
+	struct rte_eth_dev_tx_buffer *tx_buf;
+};
+
+/* PMD private structure */
+struct txa_service_data {
+	/* Max mbufs processed in any service function invocation */
+	uint32_t max_nb_tx;
+	/* Number of Tx queues in adapter */
+	uint32_t nb_queues;
+	/*  Synchronization with data path */
+	rte_spinlock_t tx_lock;
+	/* Event port ID */
+	uint8_t port_id;
+	/* Event device identifier */
+	uint8_t eventdev_id;
+	/* Highest port id supported + 1 */
+	uint16_t dev_count;
+	/* Loop count to flush Tx buffers */
+	int loop_cnt;
+	/* Per ethernet device structure */
+	struct txa_service_ethdev *txa_ethdev;
+	/* Statistics */
+	struct rte_event_eth_tx_adapter_stats stats;
+	/* Adapter Identifier */
+	uint8_t id;
+	/* Conf arg must be freed */
+	uint8_t conf_free;
+	/* Configuration callback */
+	rte_event_eth_tx_adapter_conf_cb conf_cb;
+	/* Configuration callback argument */
+	void *conf_arg;
+	/* socket id */
+	int socket_id;
+	/* Per adapter EAL service */
+	int64_t service_id;
+	/* Memory allocation name */
+	char mem_name[TXA_MEM_NAME_LEN];
+} __rte_cache_aligned;
+
+/* Per eth device structure */
+struct txa_service_ethdev {
+	/* Pointer to ethernet device */
+	struct rte_eth_dev *dev;
+	/* Number of queues added */
+	uint16_t nb_queues;
+	/* PMD specific queue data */
+	void *queues;
+};
+
+/* Array of adapter instances, initialized with event device id
+ * when adapter is created
+ */
+static int *txa_dev_id_array;
+
+/* Array of pointers to service implementation data */
+static struct txa_service_data **txa_service_data_array;
+
+static int32_t txa_service_func(void *args);
+static int txa_service_adapter_create_ext(uint8_t id,
+			struct rte_eventdev *dev,
+			rte_event_eth_tx_adapter_conf_cb conf_cb,
+			void *conf_arg);
+static int txa_service_queue_del(uint8_t id,
+				const struct rte_eth_dev *dev,
+				int32_t tx_queue_id);
+
+static int
+txa_adapter_exist(uint8_t id)
+{
+	return txa_dev_id_array[id] != TXA_INVALID_DEV_ID;
+}
+
+static inline int
+txa_valid_id(uint8_t id)
+{
+	return id < RTE_EVENT_ETH_TX_ADAPTER_MAX_INSTANCE;
+}
+
+static void *
+txa_memzone_array_get(const char *name, unsigned int elt_size, int nb_elems)
+{
+	const struct rte_memzone *mz;
+	unsigned int sz;
+
+	sz = elt_size * nb_elems;
+	sz = RTE_ALIGN(sz, RTE_CACHE_LINE_SIZE);
+
+	mz = rte_memzone_lookup(name);
+	if (mz == NULL) {
+		mz = rte_memzone_reserve_aligned(name, sz, rte_socket_id(), 0,
+						 RTE_CACHE_LINE_SIZE);
+		if (mz == NULL) {
+			RTE_EDEV_LOG_ERR("failed to reserve memzone"
+					" name = %s err = %"
+					PRId32, name, rte_errno);
+			return NULL;
+		}
+	}
+
+	return  mz->addr;
+}
+
+static int
+txa_dev_id_array_init(void)
+{
+	if (txa_dev_id_array == NULL) {
+		int i;
+
+		txa_dev_id_array = txa_memzone_array_get("txa_adapter_array",
+					sizeof(int),
+					RTE_EVENT_ETH_TX_ADAPTER_MAX_INSTANCE);
+		if (txa_dev_id_array == NULL)
+			return -ENOMEM;
+
+		for (i = 0; i < RTE_EVENT_ETH_TX_ADAPTER_MAX_INSTANCE; i++)
+			txa_dev_id_array[i] = TXA_INVALID_DEV_ID;
+	}
+
+	return 0;
+}
+
+static int
+txa_init(void)
+{
+	return txa_dev_id_array_init();
+}
+
+static int
+txa_service_data_init(void)
+{
+	if (txa_service_data_array == NULL) {
+		txa_service_data_array =
+				txa_memzone_array_get("txa_service_data_array",
+					sizeof(int),
+					RTE_EVENT_ETH_TX_ADAPTER_MAX_INSTANCE);
+		if (txa_service_data_array == NULL)
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static inline struct txa_service_data *
+txa_service_id_to_data(uint8_t id)
+{
+	return txa_service_data_array[id];
+}
+
+static inline struct txa_service_queue_info *
+txa_service_queue(struct txa_service_data *txa, uint16_t port_id,
+		uint16_t tx_queue_id)
+{
+	struct txa_service_queue_info *tqi;
+
+	if (unlikely(txa->txa_ethdev == NULL || txa->dev_count < port_id + 1))
+		return NULL;
+
+	tqi = txa->txa_ethdev[port_id].queues;
+
+	return likely(tqi != NULL) ? tqi + tx_queue_id : NULL;
+}
+
+static int
+txa_service_conf_cb(uint8_t __rte_unused id, uint8_t dev_id,
+		struct rte_event_eth_tx_adapter_conf *conf, void *arg)
+{
+	int ret;
+	struct rte_eventdev *dev;
+	struct rte_event_port_conf *pc;
+	struct rte_event_dev_config dev_conf;
+	int started;
+	uint8_t port_id;
+
+	pc = arg;
+	dev = &rte_eventdevs[dev_id];
+	dev_conf = dev->data->dev_conf;
+
+	started = dev->data->dev_started;
+	if (started)
+		rte_event_dev_stop(dev_id);
+
+	port_id = dev_conf.nb_event_ports;
+	dev_conf.nb_event_ports += 1;
+
+	ret = rte_event_dev_configure(dev_id, &dev_conf);
+	if (ret) {
+		RTE_EDEV_LOG_ERR("failed to configure event dev %u",
+						dev_id);
+		if (started) {
+			if (rte_event_dev_start(dev_id))
+				return -EIO;
+		}
+		return ret;
+	}
+
+	pc->disable_implicit_release = 0;
+	ret = rte_event_port_setup(dev_id, port_id, pc);
+	if (ret) {
+		RTE_EDEV_LOG_ERR("failed to setup event port %u\n",
+					port_id);
+		if (started) {
+			if (rte_event_dev_start(dev_id))
+				return -EIO;
+		}
+		return ret;
+	}
+
+	conf->event_port_id = port_id;
+	conf->max_nb_tx = TXA_MAX_NB_TX;
+	if (started)
+		ret = rte_event_dev_start(dev_id);
+	return ret;
+}
+
+static int
+txa_service_ethdev_alloc(struct txa_service_data *txa)
+{
+	struct txa_service_ethdev *txa_ethdev;
+	uint16_t i, dev_count;
+
+	dev_count = rte_eth_dev_count_avail();
+	if (txa->txa_ethdev && dev_count == txa->dev_count)
+		return 0;
+
+	txa_ethdev = rte_zmalloc_socket(txa->mem_name,
+					dev_count * sizeof(*txa_ethdev),
+					0,
+					txa->socket_id);
+	if (txa_ethdev == NULL) {
+		RTE_EDEV_LOG_ERR("Failed to alloc txa::txa_ethdev ");
+		return -ENOMEM;
+	}
+
+	if (txa->dev_count)
+		memcpy(txa_ethdev, txa->txa_ethdev,
+			txa->dev_count * sizeof(*txa_ethdev));
+
+	RTE_ETH_FOREACH_DEV(i) {
+		if (i == dev_count)
+			break;
+		txa_ethdev[i].dev = &rte_eth_devices[i];
+	}
+
+	txa->txa_ethdev = txa_ethdev;
+	txa->dev_count = dev_count;
+	return 0;
+}
+
+static int
+txa_service_queue_array_alloc(struct txa_service_data *txa,
+			uint16_t port_id)
+{
+	struct txa_service_queue_info *tqi;
+	uint16_t nb_queue;
+	int ret;
+
+	ret = txa_service_ethdev_alloc(txa);
+	if (ret != 0)
+		return ret;
+
+	if (txa->txa_ethdev[port_id].queues)
+		return 0;
+
+	nb_queue = txa->txa_ethdev[port_id].dev->data->nb_tx_queues;
+	tqi = rte_zmalloc_socket(txa->mem_name,
+				nb_queue *
+				sizeof(struct txa_service_queue_info), 0,
+				txa->socket_id);
+	if (tqi == NULL)
+		return -ENOMEM;
+	txa->txa_ethdev[port_id].queues = tqi;
+	return 0;
+}
+
+static void
+txa_service_queue_array_free(struct txa_service_data *txa,
+			uint16_t port_id)
+{
+	struct txa_service_ethdev *txa_ethdev;
+	struct txa_service_queue_info *tqi;
+
+	txa_ethdev = &txa->txa_ethdev[port_id];
+	if (txa->txa_ethdev == NULL || txa_ethdev->nb_queues != 0)
+		return;
+
+	tqi = txa_ethdev->queues;
+	txa_ethdev->queues = NULL;
+	rte_free(tqi);
+
+	if (txa->nb_queues == 0) {
+		rte_free(txa->txa_ethdev);
+		txa->txa_ethdev = NULL;
+	}
+}
+
+static void
+txa_service_unregister(struct txa_service_data *txa)
+{
+	if (txa->service_id != TXA_INVALID_SERVICE_ID) {
+		rte_service_component_runstate_set(txa->service_id, 0);
+		while (rte_service_may_be_active(txa->service_id))
+			rte_pause();
+		rte_service_component_unregister(txa->service_id);
+	}
+	txa->service_id = TXA_INVALID_SERVICE_ID;
+}
+
+static int
+txa_service_register(struct txa_service_data *txa)
+{
+	int ret;
+	struct rte_service_spec service;
+	struct rte_event_eth_tx_adapter_conf conf;
+
+	if (txa->service_id != TXA_INVALID_SERVICE_ID)
+		return 0;
+
+	memset(&service, 0, sizeof(service));
+	snprintf(service.name, TXA_SERVICE_NAME_LEN, "txa_%d", txa->id);
+	service.socket_id = txa->socket_id;
+	service.callback = txa_service_func;
+	service.callback_userdata = txa;
+	service.capabilities = RTE_SERVICE_CAP_MT_SAFE;
+	ret = rte_service_component_register(&service,
+					(uint32_t *)&txa->service_id);
+	if (ret) {
+		RTE_EDEV_LOG_ERR("failed to register service %s err = %"
+				 PRId32, service.name, ret);
+		return ret;
+	}
+
+	ret = txa->conf_cb(txa->id, txa->eventdev_id, &conf, txa->conf_arg);
+	if (ret) {
+		txa_service_unregister(txa);
+		return ret;
+	}
+
+	rte_service_component_runstate_set(txa->service_id, 1);
+	txa->port_id = conf.event_port_id;
+	txa->max_nb_tx = conf.max_nb_tx;
+	return 0;
+}
+
+static struct rte_eth_dev_tx_buffer *
+txa_service_tx_buf_alloc(struct txa_service_data *txa,
+			const struct rte_eth_dev *dev)
+{
+	struct rte_eth_dev_tx_buffer *tb;
+	uint16_t port_id;
+
+	port_id = dev->data->port_id;
+	tb = rte_zmalloc_socket(txa->mem_name,
+				RTE_ETH_TX_BUFFER_SIZE(TXA_BATCH_SIZE),
+				0,
+				rte_eth_dev_socket_id(port_id));
+	if (tb == NULL)
+		RTE_EDEV_LOG_ERR("Failed to allocate memory for tx buffer");
+	return tb;
+}
+
+static int
+txa_service_is_queue_added(struct txa_service_data *txa,
+			const struct rte_eth_dev *dev,
+			uint16_t tx_queue_id)
+{
+	struct txa_service_queue_info *tqi;
+
+	tqi = txa_service_queue(txa, dev->data->port_id, tx_queue_id);
+	return tqi && tqi->added;
+}
+
+static int
+txa_service_ctrl(uint8_t id, int start)
+{
+	int ret;
+	struct txa_service_data *txa;
+
+	txa = txa_service_id_to_data(id);
+	if (txa->service_id == TXA_INVALID_SERVICE_ID)
+		return 0;
+
+	ret = rte_service_runstate_set(txa->service_id, start);
+	if (ret == 0 && !start) {
+		while (rte_service_may_be_active(txa->service_id))
+			rte_pause();
+	}
+	return ret;
+}
+
+static void
+txa_service_buffer_retry(struct rte_mbuf **pkts, uint16_t unsent,
+			void *userdata)
+{
+	struct txa_retry *tr;
+	struct txa_service_data *data;
+	struct rte_event_eth_tx_adapter_stats *stats;
+	uint16_t sent = 0;
+	unsigned int retry = 0;
+	uint16_t i, n;
+
+	tr = (struct txa_retry *)(uintptr_t)userdata;
+	data = txa_service_id_to_data(tr->id);
+	stats = &data->stats;
+
+	do {
+		n = rte_eth_tx_burst(tr->port_id, tr->tx_queue,
+			       &pkts[sent], unsent - sent);
+
+		sent += n;
+	} while (sent != unsent && retry++ < TXA_RETRY_CNT);
+
+	for (i = sent; i < unsent; i++)
+		rte_pktmbuf_free(pkts[i]);
+
+	stats->tx_retry += retry;
+	stats->tx_packets += sent;
+	stats->tx_dropped += unsent - sent;
+}
+
+static void
+txa_service_tx(struct txa_service_data *txa, struct rte_event *ev,
+	uint32_t n)
+{
+	uint32_t i;
+	uint16_t nb_tx;
+	struct rte_event_eth_tx_adapter_stats *stats;
+
+	stats = &txa->stats;
+
+	nb_tx = 0;
+	for (i = 0; i < n; i++) {
+		struct rte_mbuf *m;
+		uint16_t port;
+		uint16_t queue;
+		struct txa_service_queue_info *tqi;
+
+		m = ev[i].mbuf;
+		port = m->port;
+		queue = rte_event_eth_tx_adapter_txq_get(m);
+
+		tqi = txa_service_queue(txa, port, queue);
+		if (unlikely(tqi == NULL || !tqi->added)) {
+			rte_pktmbuf_free(m);
+			continue;
+		}
+
+		nb_tx += rte_eth_tx_buffer(port, queue, tqi->tx_buf, m);
+	}
+
+	stats->tx_packets += nb_tx;
+}
+
+static int32_t
+txa_service_func(void *args)
+{
+	struct txa_service_data *txa = args;
+	uint8_t dev_id;
+	uint8_t port;
+	uint16_t n;
+	uint32_t nb_tx, max_nb_tx;
+	struct rte_event ev[TXA_BATCH_SIZE];
+
+	dev_id = txa->eventdev_id;
+	max_nb_tx = txa->max_nb_tx;
+	port = txa->port_id;
+
+	if (txa->nb_queues == 0)
+		return 0;
+
+	if (!rte_spinlock_trylock(&txa->tx_lock))
+		return 0;
+
+	for (nb_tx = 0; nb_tx < max_nb_tx; nb_tx += n) {
+
+		n = rte_event_dequeue_burst(dev_id, port, ev, RTE_DIM(ev), 0);
+		if (!n)
+			break;
+		txa_service_tx(txa, ev, n);
+	}
+
+	if ((txa->loop_cnt++ & (TXA_FLUSH_THRESHOLD - 1)) == 0) {
+
+		struct txa_service_ethdev *tdi;
+		struct txa_service_queue_info *tqi;
+		struct rte_eth_dev *dev;
+		uint16_t i;
+
+		tdi = txa->txa_ethdev;
+		nb_tx = 0;
+
+		RTE_ETH_FOREACH_DEV(i) {
+			uint16_t q;
+
+			if (i == txa->dev_count)
+				break;
+
+			dev = tdi[i].dev;
+			if (tdi[i].nb_queues == 0)
+				continue;
+			for (q = 0; q < dev->data->nb_tx_queues; q++) {
+
+				tqi = txa_service_queue(txa, i, q);
+				if (unlikely(tqi == NULL || !tqi->added))
+					continue;
+
+				nb_tx += rte_eth_tx_buffer_flush(i, q,
+							tqi->tx_buf);
+			}
+		}
+
+		txa->stats.tx_packets += nb_tx;
+	}
+	rte_spinlock_unlock(&txa->tx_lock);
+	return 0;
+}
+
+static int
+txa_service_adapter_create(uint8_t id, struct rte_eventdev *dev,
+			struct rte_event_port_conf *port_conf)
+{
+	struct txa_service_data *txa;
+	struct rte_event_port_conf *cb_conf;
+	int ret;
+
+	cb_conf = rte_malloc(NULL, sizeof(*cb_conf), 0);
+	if (cb_conf == NULL)
+		return -ENOMEM;
+
+	*cb_conf = *port_conf;
+	ret = txa_service_adapter_create_ext(id, dev, txa_service_conf_cb,
+					cb_conf);
+	if (ret) {
+		rte_free(cb_conf);
+		return ret;
+	}
+
+	txa = txa_service_id_to_data(id);
+	txa->conf_free = 1;
+	return ret;
+}
+
+static int
+txa_service_adapter_create_ext(uint8_t id, struct rte_eventdev *dev,
+			rte_event_eth_tx_adapter_conf_cb conf_cb,
+			void *conf_arg)
+{
+	struct txa_service_data *txa;
+	int socket_id;
+	char mem_name[TXA_SERVICE_NAME_LEN];
+	int ret;
+
+	if (conf_cb == NULL)
+		return -EINVAL;
+
+	socket_id = dev->data->socket_id;
+	snprintf(mem_name, TXA_MEM_NAME_LEN,
+		"rte_event_eth_txa_%d",
+		id);
+
+	ret = txa_service_data_init();
+	if (ret != 0)
+		return ret;
+
+	txa = rte_zmalloc_socket(mem_name,
+				sizeof(*txa),
+				RTE_CACHE_LINE_SIZE, socket_id);
+	if (txa == NULL) {
+		RTE_EDEV_LOG_ERR("failed to get mem for tx adapter");
+		return -ENOMEM;
+	}
+
+	txa->id = id;
+	txa->eventdev_id = dev->data->dev_id;
+	txa->socket_id = socket_id;
+	strncpy(txa->mem_name, mem_name, TXA_SERVICE_NAME_LEN);
+	txa->conf_cb = conf_cb;
+	txa->conf_arg = conf_arg;
+	txa->service_id = TXA_INVALID_SERVICE_ID;
+	rte_spinlock_init(&txa->tx_lock);
+	txa_service_data_array[id] = txa;
+
+	return 0;
+}
+
+static int
+txa_service_event_port_get(uint8_t id, uint8_t *port)
+{
+	struct txa_service_data *txa;
+
+	txa = txa_service_id_to_data(id);
+	if (txa->service_id == TXA_INVALID_SERVICE_ID)
+		return -ENODEV;
+
+	*port = txa->port_id;
+	return 0;
+}
+
+static int
+txa_service_adapter_free(uint8_t id)
+{
+	struct txa_service_data *txa;
+
+	txa = txa_service_id_to_data(id);
+	if (txa->nb_queues) {
+		RTE_EDEV_LOG_ERR("%" PRIu16 " Tx queues not deleted",
+				txa->nb_queues);
+		return -EBUSY;
+	}
+
+	if (txa->conf_free)
+		rte_free(txa->conf_arg);
+	rte_free(txa);
+	return 0;
+}
+
+static int
+txa_service_queue_add(uint8_t id,
+		__rte_unused struct rte_eventdev *dev,
+		const struct rte_eth_dev *eth_dev,
+		int32_t tx_queue_id)
+{
+	struct txa_service_data *txa;
+	struct txa_service_ethdev *tdi;
+	struct txa_service_queue_info *tqi;
+	struct rte_eth_dev_tx_buffer *tb;
+	struct txa_retry *txa_retry;
+	int ret;
+
+	txa = txa_service_id_to_data(id);
+
+	if (tx_queue_id == -1) {
+		int nb_queues;
+		uint16_t i, j;
+		uint16_t *qdone;
+
+		nb_queues = eth_dev->data->nb_tx_queues;
+		if (txa->dev_count > eth_dev->data->port_id) {
+			tdi = &txa->txa_ethdev[eth_dev->data->port_id];
+			nb_queues -= tdi->nb_queues;
+		}
+
+		qdone = rte_zmalloc(txa->mem_name,
+				nb_queues * sizeof(*qdone), 0);
+		j = 0;
+		for (i = 0; i < nb_queues; i++) {
+			if (txa_service_is_queue_added(txa, eth_dev, i))
+				continue;
+			ret = txa_service_queue_add(id, dev, eth_dev, i);
+			if (ret == 0)
+				qdone[j++] = i;
+			else
+				break;
+		}
+
+		if (i != nb_queues) {
+			for (i = 0; i < j; i++)
+				txa_service_queue_del(id, eth_dev, qdone[i]);
+		}
+		rte_free(qdone);
+		return ret;
+	}
+
+	ret = txa_service_register(txa);
+	if (ret)
+		return ret;
+
+	rte_spinlock_lock(&txa->tx_lock);
+
+	if (txa_service_is_queue_added(txa, eth_dev, tx_queue_id)) {
+		rte_spinlock_unlock(&txa->tx_lock);
+		return 0;
+	}
+
+	ret = txa_service_queue_array_alloc(txa, eth_dev->data->port_id);
+	if (ret)
+		goto err_unlock;
+
+	tb = txa_service_tx_buf_alloc(txa, eth_dev);
+	if (tb == NULL)
+		goto err_unlock;
+
+	tdi = &txa->txa_ethdev[eth_dev->data->port_id];
+	tqi = txa_service_queue(txa, eth_dev->data->port_id, tx_queue_id);
+
+	txa_retry = &tqi->txa_retry;
+	txa_retry->id = txa->id;
+	txa_retry->port_id = eth_dev->data->port_id;
+	txa_retry->tx_queue = tx_queue_id;
+
+	rte_eth_tx_buffer_init(tb, TXA_BATCH_SIZE);
+	rte_eth_tx_buffer_set_err_callback(tb,
+		txa_service_buffer_retry, txa_retry);
+
+	tqi->tx_buf = tb;
+	tqi->added = 1;
+	tdi->nb_queues++;
+	txa->nb_queues++;
+
+err_unlock:
+	if (txa->nb_queues == 0) {
+		txa_service_queue_array_free(txa,
+					eth_dev->data->port_id);
+		txa_service_unregister(txa);
+	}
+
+	rte_spinlock_unlock(&txa->tx_lock);
+	return 0;
+}
+
+static int
+txa_service_queue_del(uint8_t id,
+		const struct rte_eth_dev *dev,
+		int32_t tx_queue_id)
+{
+	struct txa_service_data *txa;
+	struct txa_service_queue_info *tqi;
+	struct rte_eth_dev_tx_buffer *tb;
+	uint16_t port_id;
+
+	if (tx_queue_id == -1) {
+		uint16_t i;
+		int ret = -1;
+
+		for (i = 0; i < dev->data->nb_tx_queues; i++) {
+			ret = txa_service_queue_del(id, dev, i);
+			if (ret != 0)
+				break;
+		}
+		return ret;
+	}
+
+	txa = txa_service_id_to_data(id);
+	port_id = dev->data->port_id;
+
+	tqi = txa_service_queue(txa, port_id, tx_queue_id);
+	if (tqi == NULL || !tqi->added)
+		return 0;
+
+	tb = tqi->tx_buf;
+	tqi->added = 0;
+	tqi->tx_buf = NULL;
+	rte_free(tb);
+	txa->nb_queues--;
+	txa->txa_ethdev[port_id].nb_queues--;
+
+	txa_service_queue_array_free(txa, port_id);
+	return 0;
+}
+
+static int
+txa_service_id_get(uint8_t id, uint32_t *service_id)
+{
+	struct txa_service_data *txa;
+
+	txa = txa_service_id_to_data(id);
+	if (txa->service_id == TXA_INVALID_SERVICE_ID)
+		return -ESRCH;
+
+	if (service_id == NULL)
+		return -EINVAL;
+
+	*service_id = txa->service_id;
+	return 0;
+}
+
+static int
+txa_service_start(uint8_t id)
+{
+	return txa_service_ctrl(id, 1);
+}
+
+static int
+txa_service_stats_get(uint8_t id,
+		struct rte_event_eth_tx_adapter_stats *stats)
+{
+	struct txa_service_data *txa;
+
+	txa = txa_service_id_to_data(id);
+	*stats = txa->stats;
+	return 0;
+}
+
+static int
+txa_service_stats_reset(uint8_t id)
+{
+	struct txa_service_data *txa;
+
+	txa = txa_service_id_to_data(id);
+	memset(&txa->stats, 0, sizeof(txa->stats));
+	return 0;
+}
+
+static int
+txa_service_stop(uint8_t id)
+{
+	return txa_service_ctrl(id, 0);
+}
+
+
+int __rte_experimental
+rte_event_eth_tx_adapter_create(uint8_t id, uint8_t dev_id,
+				struct rte_event_port_conf *port_conf)
+{
+	struct rte_eventdev *dev;
+	int ret;
+
+	if (port_conf == NULL)
+		return -EINVAL;
+
+	RTE_EVENT_ETH_TX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL);
+	RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL);
+
+	dev = &rte_eventdevs[dev_id];
+
+	ret = txa_init();
+	if (ret != 0)
+		return ret;
+
+	if (txa_adapter_exist(id))
+		return -EEXIST;
+
+	txa_dev_id_array[id] = dev_id;
+	if (txa_dev_adapter_create(id))
+		ret = txa_dev_adapter_create(id)(id, dev);
+
+	if (ret != 0) {
+		txa_dev_id_array[id] = TXA_INVALID_DEV_ID;
+		return ret;
+	}
+
+	ret = txa_service_adapter_create(id, dev, port_conf);
+	if (ret != 0) {
+		if (txa_dev_adapter_free(id))
+			txa_dev_adapter_free(id)(id, dev);
+		txa_dev_id_array[id] = TXA_INVALID_DEV_ID;
+		return ret;
+	}
+
+	txa_dev_id_array[id] = dev_id;
+	return 0;
+}
+
+int __rte_experimental
+rte_event_eth_tx_adapter_create_ext(uint8_t id, uint8_t dev_id,
+				rte_event_eth_tx_adapter_conf_cb conf_cb,
+				void *conf_arg)
+{
+	struct rte_eventdev *dev;
+	int ret;
+
+	RTE_EVENT_ETH_TX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL);
+	RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL);
+
+	ret = txa_init();
+	if (ret != 0)
+		return ret;
+
+	if (txa_adapter_exist(id))
+		return -EINVAL;
+
+	dev = &rte_eventdevs[dev_id];
+
+	txa_dev_id_array[id] = dev_id;
+	if (txa_dev_adapter_create_ext(id))
+		ret = txa_dev_adapter_create_ext(id)(id, dev);
+
+	if (ret != 0) {
+		txa_dev_id_array[id] = TXA_INVALID_DEV_ID;
+		return ret;
+	}
+
+	ret = txa_service_adapter_create_ext(id, dev, conf_cb, conf_arg);
+	if (ret != 0) {
+		if (txa_dev_adapter_free(id))
+			txa_dev_adapter_free(id)(id, dev);
+		txa_dev_id_array[id] = TXA_INVALID_DEV_ID;
+		return ret;
+	}
+
+	txa_dev_id_array[id] = dev_id;
+	return 0;
+}
+
+
+int __rte_experimental
+rte_event_eth_tx_adapter_event_port_get(uint8_t id, uint8_t *event_port_id)
+{
+	TXA_CHECK_OR_ERR_RET(id);
+
+	return txa_service_event_port_get(id, event_port_id);
+}
+
+int __rte_experimental
+rte_event_eth_tx_adapter_free(uint8_t id)
+{
+	int ret;
+
+	TXA_CHECK_OR_ERR_RET(id);
+
+	ret = txa_dev_adapter_free(id) ?
+		txa_dev_adapter_free(id)(id, txa_evdev(id)) :
+		0;
+
+	if (ret == 0)
+		ret = txa_service_adapter_free(id);
+	txa_dev_id_array[id] = TXA_INVALID_DEV_ID;
+
+	return ret;
+}
+
+int __rte_experimental
+rte_event_eth_tx_adapter_queue_add(uint8_t id,
+				uint16_t eth_dev_id,
+				int32_t queue)
+{
+	struct rte_eth_dev *eth_dev;
+	int ret;
+	uint32_t caps;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(eth_dev_id, -EINVAL);
+	TXA_CHECK_OR_ERR_RET(id);
+
+	eth_dev = &rte_eth_devices[eth_dev_id];
+	if (queue != -1 && (uint16_t)queue >= eth_dev->data->nb_tx_queues) {
+		RTE_EDEV_LOG_ERR("Invalid tx queue_id %" PRIu16,
+				(uint16_t)queue);
+		return -EINVAL;
+	}
+
+	caps = 0;
+	if (txa_dev_caps_get(id))
+		txa_dev_caps_get(id)(txa_evdev(id), eth_dev, &caps);
+
+	if (caps & RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT)
+		ret =  txa_dev_queue_add(id) ?
+					txa_dev_queue_add(id)(id,
+							txa_evdev(id),
+							eth_dev,
+							queue) : 0;
+	else
+		ret = txa_service_queue_add(id, txa_evdev(id), eth_dev, queue);
+
+	return ret;
+}
+
+int __rte_experimental
+rte_event_eth_tx_adapter_queue_del(uint8_t id,
+				uint16_t eth_dev_id,
+				int32_t queue)
+{
+	struct rte_eth_dev *eth_dev;
+	int ret;
+	uint32_t caps;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(eth_dev_id, -EINVAL);
+	TXA_CHECK_OR_ERR_RET(id);
+
+	eth_dev = &rte_eth_devices[eth_dev_id];
+	if (queue != -1 && (uint16_t)queue >= eth_dev->data->nb_tx_queues) {
+		RTE_EDEV_LOG_ERR("Invalid tx queue_id %" PRIu16,
+				(uint16_t)queue);
+		return -EINVAL;
+	}
+
+	caps = 0;
+
+	if (txa_dev_caps_get(id))
+		txa_dev_caps_get(id)(txa_evdev(id), eth_dev, &caps);
+
+	if (caps & RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT)
+		ret =  txa_dev_queue_del(id) ?
+					txa_dev_queue_del(id)(id, txa_evdev(id),
+							eth_dev,
+							queue) : 0;
+	else
+		ret = txa_service_queue_del(id, eth_dev, queue);
+
+	return ret;
+}
+
+int __rte_experimental
+rte_event_eth_tx_adapter_service_id_get(uint8_t id, uint32_t *service_id)
+{
+	TXA_CHECK_OR_ERR_RET(id);
+
+	return txa_service_id_get(id, service_id);
+}
+
+int __rte_experimental
+rte_event_eth_tx_adapter_start(uint8_t id)
+{
+	int ret;
+
+	TXA_CHECK_OR_ERR_RET(id);
+
+	ret = txa_dev_start(id) ? txa_dev_start(id)(id, txa_evdev(id)) : 0;
+	if (ret == 0)
+		ret = txa_service_start(id);
+	return ret;
+}
+
+int __rte_experimental
+rte_event_eth_tx_adapter_stats_get(uint8_t id,
+				struct rte_event_eth_tx_adapter_stats *stats)
+{
+	int ret;
+
+	TXA_CHECK_OR_ERR_RET(id);
+
+	if (stats == NULL)
+		return -EINVAL;
+
+	*stats = (struct rte_event_eth_tx_adapter_stats){0};
+
+	ret = txa_dev_stats_get(id) ?
+			txa_dev_stats_get(id)(id, txa_evdev(id), stats) : 0;
+
+	if (ret == 0 && txa_service_id_get(id, NULL) != ESRCH) {
+		if (txa_dev_stats_get(id)) {
+			struct rte_event_eth_tx_adapter_stats service_stats;
+
+			ret = txa_service_stats_get(id, &service_stats);
+			if (ret == 0) {
+				stats->tx_retry += service_stats.tx_retry;
+				stats->tx_packets += service_stats.tx_packets;
+				stats->tx_dropped += service_stats.tx_dropped;
+			}
+		} else
+			ret = txa_service_stats_get(id, stats);
+	}
+
+	return ret;
+}
+
+int __rte_experimental
+rte_event_eth_tx_adapter_stats_reset(uint8_t id)
+{
+	int ret;
+
+	TXA_CHECK_OR_ERR_RET(id);
+
+	ret = txa_dev_stats_reset(id) ?
+		txa_dev_stats_reset(id)(id, txa_evdev(id)) : 0;
+	if (ret == 0)
+		ret = txa_service_stats_reset(id);
+	return ret;
+}
+
+int __rte_experimental
+rte_event_eth_tx_adapter_stop(uint8_t id)
+{
+	int ret;
+
+	TXA_CHECK_OR_ERR_RET(id);
+
+	ret = txa_dev_stop(id) ? txa_dev_stop(id)(id,  txa_evdev(id)) : 0;
+	if (ret == 0)
+		ret = txa_service_stop(id);
+	return ret;
+}
diff --git a/lib/librte_eventdev/rte_event_eth_tx_adapter.h b/lib/librte_eventdev/rte_event_eth_tx_adapter.h
new file mode 100644
index 00000000..81456d4a
--- /dev/null
+++ b/lib/librte_eventdev/rte_event_eth_tx_adapter.h
@@ -0,0 +1,462 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation.
+ */
+
+#ifndef _RTE_EVENT_ETH_TX_ADAPTER_
+#define _RTE_EVENT_ETH_TX_ADAPTER_
+
+/**
+ * @file
+ *
+ * RTE Event Ethernet Tx Adapter
+ *
+ * The event ethernet Tx adapter provides configuration and data path APIs
+ * for the ethernet transmit stage of an event driven packet processing
+ * application. These APIs abstract the implementation of the transmit stage
+ * and allow the application to use eventdev PMD support or a common
+ * implementation.
+ *
+ * In the common implementation, the application enqueues mbufs to the adapter
+ * which runs as a rte_service function. The service function dequeues events
+ * from its event port and transmits the mbufs referenced by these events.
+ *
+ * The ethernet Tx event adapter APIs are:
+ *
+ *  - rte_event_eth_tx_adapter_create()
+ *  - rte_event_eth_tx_adapter_create_ext()
+ *  - rte_event_eth_tx_adapter_free()
+ *  - rte_event_eth_tx_adapter_start()
+ *  - rte_event_eth_tx_adapter_stop()
+ *  - rte_event_eth_tx_adapter_queue_add()
+ *  - rte_event_eth_tx_adapter_queue_del()
+ *  - rte_event_eth_tx_adapter_stats_get()
+ *  - rte_event_eth_tx_adapter_stats_reset()
+ *  - rte_event_eth_tx_adapter_enqueue()
+ *  - rte_event_eth_tx_adapter_event_port_get()
+ *  - rte_event_eth_tx_adapter_service_id_get()
+ *
+ * The application creates the adapter using
+ * rte_event_eth_tx_adapter_create() or rte_event_eth_tx_adapter_create_ext().
+ *
+ * The adapter will use the common implementation when the eventdev PMD
+ * does not have the #RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT capability.
+ * The common implementation uses an event port that is created using the port
+ * configuration parameter passed to rte_event_eth_tx_adapter_create(). The
+ * application can get the port identifier using
+ * rte_event_eth_tx_adapter_event_port_get() and must link an event queue to
+ * this port.
+ *
+ * If the eventdev PMD has the #RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT
+ * flags set, Tx adapter events should be enqueued using the
+ * rte_event_eth_tx_adapter_enqueue() function, else the application should
+ * use rte_event_enqueue_burst().
+ *
+ * Transmit queues can be added and deleted from the adapter using
+ * rte_event_eth_tx_adapter_queue_add()/del() APIs respectively.
+ *
+ * The application can start and stop the adapter using the
+ * rte_event_eth_tx_adapter_start/stop() calls.
+ *
+ * The common adapter implementation uses an EAL service function as described
+ * before and its execution is controlled using the rte_service APIs. The
+ * rte_event_eth_tx_adapter_service_id_get()
+ * function can be used to retrieve the adapter's service function ID.
+ *
+ * The ethernet port and transmit queue index to transmit the mbuf on are
+ * specified using the mbuf port and the higher 16 bits of
+ * struct rte_mbuf::hash::sched:hi. The application should use the
+ * rte_event_eth_tx_adapter_txq_set() and rte_event_eth_tx_adapter_txq_get()
+ * functions to access the transmit queue index since it is expected that the
+ * transmit queue will be eventually defined within struct rte_mbuf and using
+ * these macros will help with minimizing application impact due to
+ * a change in how the transmit queue index is specified.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+#include <rte_mbuf.h>
+
+#include "rte_eventdev.h"
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Adapter configuration structure
+ *
+ * @see rte_event_eth_tx_adapter_create_ext
+ * @see rte_event_eth_tx_adapter_conf_cb
+ */
+struct rte_event_eth_tx_adapter_conf {
+	uint8_t event_port_id;
+	/**< Event port identifier, the adapter service function dequeues mbuf
+	 * events from this port.
+	 * @see RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT
+	 */
+	uint32_t max_nb_tx;
+	/**< The adapter can return early if it has processed at least
+	 * max_nb_tx mbufs. This isn't treated as a requirement; batching may
+	 * cause the adapter to process more than max_nb_tx mbufs.
+	 */
+};
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Function type used for adapter configuration callback. The callback is
+ * used to fill in members of the struct rte_event_eth_tx_adapter_conf, this
+ * callback is invoked when creating a RTE service function based
+ * adapter implementation.
+ *
+ * @param id
+ *  Adapter identifier.
+ * @param dev_id
+ *  Event device identifier.
+ * @param [out] conf
+ *  Structure that needs to be populated by this callback.
+ * @param arg
+ *  Argument to the callback. This is the same as the conf_arg passed to the
+ *  rte_event_eth_tx_adapter_create_ext().
+ *
+ * @return
+ *   - 0: Success
+ *   - <0: Error code on failure
+ */
+typedef int (*rte_event_eth_tx_adapter_conf_cb) (uint8_t id, uint8_t dev_id,
+				struct rte_event_eth_tx_adapter_conf *conf,
+				void *arg);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * A structure used to retrieve statistics for an ethernet Tx adapter instance.
+ */
+struct rte_event_eth_tx_adapter_stats {
+	uint64_t tx_retry;
+	/**< Number of transmit retries */
+	uint64_t tx_packets;
+	/**< Number of packets transmitted */
+	uint64_t tx_dropped;
+	/**< Number of packets dropped */
+};
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Create a new ethernet Tx adapter with the specified identifier.
+ *
+ * @param id
+ *  The identifier of the ethernet Tx adapter.
+ * @param dev_id
+ *  The event device identifier.
+ * @param port_config
+ *  Event port configuration, the adapter uses this configuration to
+ *  create an event port if needed.
+ * @return
+ *   - 0: Success
+ *   - <0: Error code on failure
+ */
+int __rte_experimental
+rte_event_eth_tx_adapter_create(uint8_t id, uint8_t dev_id,
+				struct rte_event_port_conf *port_config);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Create a new ethernet Tx adapter with the specified identifier.
+ *
+ * @param id
+ *  The identifier of the ethernet Tx adapter.
+ * @param dev_id
+ *  The event device identifier.
+ * @param conf_cb
+ *  Callback function that initializes members of the
+ *  struct rte_event_eth_tx_adapter_conf struct passed into
+ *  it.
+ * @param conf_arg
+ *  Argument that is passed to the conf_cb function.
+ * @return
+ *   - 0: Success
+ *   - <0: Error code on failure
+ */
+int __rte_experimental
+rte_event_eth_tx_adapter_create_ext(uint8_t id, uint8_t dev_id,
+				rte_event_eth_tx_adapter_conf_cb conf_cb,
+				void *conf_arg);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Free an ethernet Tx adapter
+ *
+ * @param id
+ *  Adapter identifier.
+ * @return
+ *   - 0: Success
+ *   - <0: Error code on failure, If the adapter still has Tx queues
+ *      added to it, the function returns -EBUSY.
+ */
+int __rte_experimental
+rte_event_eth_tx_adapter_free(uint8_t id);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Start ethernet Tx adapter
+ *
+ * @param id
+ *  Adapter identifier.
+ * @return
+ *  - 0: Success, Adapter started correctly.
+ *  - <0: Error code on failure.
+ */
+int __rte_experimental
+rte_event_eth_tx_adapter_start(uint8_t id);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Stop ethernet Tx adapter
+ *
+ * @param id
+ *  Adapter identifier.
+ * @return
+ *  - 0: Success.
+ *  - <0: Error code on failure.
+ */
+int __rte_experimental
+rte_event_eth_tx_adapter_stop(uint8_t id);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Add a Tx queue to the adapter.
+ * A queue value of -1 is used to indicate all
+ * queues within the device.
+ *
+ * @param id
+ *  Adapter identifier.
+ * @param eth_dev_id
+ *  Ethernet Port Identifier.
+ * @param queue
+ *  Tx queue index.
+ * @return
+ *  - 0: Success, Queues added successfully.
+ *  - <0: Error code on failure.
+ */
+int __rte_experimental
+rte_event_eth_tx_adapter_queue_add(uint8_t id,
+				uint16_t eth_dev_id,
+				int32_t queue);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Delete a Tx queue from the adapter.
+ * A queue value of -1 is used to indicate all
+ * queues within the device, that have been added to this
+ * adapter.
+ *
+ * @param id
+ *  Adapter identifier.
+ * @param eth_dev_id
+ *  Ethernet Port Identifier.
+ * @param queue
+ *  Tx queue index.
+ * @return
+ *  - 0: Success, Queues deleted successfully.
+ *  - <0: Error code on failure.
+ */
+int __rte_experimental
+rte_event_eth_tx_adapter_queue_del(uint8_t id,
+				uint16_t eth_dev_id,
+				int32_t queue);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Set Tx queue in the mbuf. This queue is used by the adapter
+ * to transmit the mbuf.
+ *
+ * @param pkt
+ *  Pointer to the mbuf.
+ * @param queue
+ *  Tx queue index.
+ */
+static __rte_always_inline void __rte_experimental
+rte_event_eth_tx_adapter_txq_set(struct rte_mbuf *pkt, uint16_t queue)
+{
+	uint16_t *p = (uint16_t *)&pkt->hash.sched.hi;
+	p[1] = queue;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Retrieve Tx queue from the mbuf.
+ *
+ * @param pkt
+ *  Pointer to the mbuf.
+ * @return
+ *  Tx queue identifier.
+ *
+ * @see rte_event_eth_tx_adapter_txq_set()
+ */
+static __rte_always_inline uint16_t __rte_experimental
+rte_event_eth_tx_adapter_txq_get(struct rte_mbuf *pkt)
+{
+	uint16_t *p = (uint16_t *)&pkt->hash.sched.hi;
+	return p[1];
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Retrieve the adapter event port. The adapter creates an event port if
+ * the #RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT is not set in the
+ * ethernet Tx capabilities of the event device.
+ *
+ * @param id
+ *  Adapter Identifier.
+ * @param[out] event_port_id
+ *  Event port pointer.
+ * @return
+ *   - 0: Success.
+ *   - <0: Error code on failure.
+ */
+int __rte_experimental
+rte_event_eth_tx_adapter_event_port_get(uint8_t id, uint8_t *event_port_id);
+
+/**
+ * Enqueue a burst of events objects or an event object supplied in *rte_event*
+ * structure on an  event device designated by its *dev_id* through the event
+ * port specified by *port_id*. This function is supported if the eventdev PMD
+ * has the #RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT capability flag set.
+ *
+ * The *nb_events* parameter is the number of event objects to enqueue which are
+ * supplied in the *ev* array of *rte_event* structure.
+ *
+ * The rte_event_eth_tx_adapter_enqueue() function returns the number of
+ * events objects it actually enqueued. A return value equal to *nb_events*
+ * means that all event objects have been enqueued.
+ *
+ * @param dev_id
+ *  The identifier of the device.
+ * @param port_id
+ *  The identifier of the event port.
+ * @param ev
+ *  Points to an array of *nb_events* objects of type *rte_event* structure
+ *  which contain the event object enqueue operations to be processed.
+ * @param nb_events
+ *  The number of event objects to enqueue, typically number of
+ *  rte_event_port_enqueue_depth() available for this port.
+ *
+ * @return
+ *   The number of event objects actually enqueued on the event device. The
+ *   return value can be less than the value of the *nb_events* parameter when
+ *   the event devices queue is full or if invalid parameters are specified in a
+ *   *rte_event*. If the return value is less than *nb_events*, the remaining
+ *   events at the end of ev[] are not consumed and the caller has to take care
+ *   of them, and rte_errno is set accordingly. Possible errno values include:
+ *   - -EINVAL  The port ID is invalid, device ID is invalid, an event's queue
+ *              ID is invalid, or an event's sched type doesn't match the
+ *              capabilities of the destination queue.
+ *   - -ENOSPC  The event port was backpressured and unable to enqueue
+ *              one or more events. This error code is only applicable to
+ *              closed systems.
+ */
+static inline uint16_t __rte_experimental
+rte_event_eth_tx_adapter_enqueue(uint8_t dev_id,
+				uint8_t port_id,
+				struct rte_event ev[],
+				uint16_t nb_events)
+{
+	const struct rte_eventdev *dev = &rte_eventdevs[dev_id];
+
+#ifdef RTE_LIBRTE_EVENTDEV_DEBUG
+	if (dev_id >= RTE_EVENT_MAX_DEVS ||
+		!rte_eventdevs[dev_id].attached) {
+		rte_errno = -EINVAL;
+		return 0;
+	}
+
+	if (port_id >= dev->data->nb_ports) {
+		rte_errno = -EINVAL;
+		return 0;
+	}
+#endif
+	return dev->txa_enqueue(dev->data->ports[port_id], ev, nb_events);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Retrieve statistics for an adapter
+ *
+ * @param id
+ *  Adapter identifier.
+ * @param [out] stats
+ *  A pointer to structure used to retrieve statistics for an adapter.
+ * @return
+ *  - 0: Success, statistics retrieved successfully.
+ *  - <0: Error code on failure.
+ */
+int __rte_experimental
+rte_event_eth_tx_adapter_stats_get(uint8_t id,
+				struct rte_event_eth_tx_adapter_stats *stats);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Reset statistics for an adapter.
+ *
+ * @param id
+ *  Adapter identifier.
+ * @return
+ *  - 0: Success, statistics reset successfully.
+ *  - <0: Error code on failure.
+ */
+int __rte_experimental
+rte_event_eth_tx_adapter_stats_reset(uint8_t id);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Retrieve the service ID of an adapter. If the adapter doesn't use
+ * a rte_service function, this function returns -ESRCH.
+ *
+ * @param id
+ *  Adapter identifier.
+ * @param [out] service_id
+ *  A pointer to a uint32_t, to be filled in with the service id.
+ * @return
+ *  - 0: Success
+ *  - <0: Error code on failure, if the adapter doesn't use a rte_service
+ * function, this function returns -ESRCH.
+ */
+int __rte_experimental
+rte_event_eth_tx_adapter_service_id_get(uint8_t id, uint32_t *service_id);
+
+#ifdef __cplusplus
+}
+#endif
+#endif	/* _RTE_EVENT_ETH_TX_ADAPTER_ */
diff --git a/lib/librte_eventdev/rte_eventdev.c b/lib/librte_eventdev/rte_eventdev.c
index 801810ed..ebaf3087 100644
--- a/lib/librte_eventdev/rte_eventdev.c
+++ b/lib/librte_eventdev/rte_eventdev.c
@@ -35,22 +35,20 @@
 #include "rte_eventdev.h"
 #include "rte_eventdev_pmd.h"
 
-struct rte_eventdev rte_event_devices[RTE_EVENT_MAX_DEVS];
+static struct rte_eventdev rte_event_devices[RTE_EVENT_MAX_DEVS];
 
-struct rte_eventdev *rte_eventdevs = &rte_event_devices[0];
+struct rte_eventdev *rte_eventdevs = rte_event_devices;
 
 static struct rte_eventdev_global eventdev_globals = {
 	.nb_devs		= 0
 };
 
-struct rte_eventdev_global *rte_eventdev_globals = &eventdev_globals;
-
 /* Event dev north bound API implementation */
 
 uint8_t
 rte_event_dev_count(void)
 {
-	return rte_eventdev_globals->nb_devs;
+	return eventdev_globals.nb_devs;
 }
 
 int
@@ -62,7 +60,7 @@ rte_event_dev_get_dev_id(const char *name)
 	if (!name)
 		return -EINVAL;
 
-	for (i = 0; i < rte_eventdev_globals->nb_devs; i++) {
+	for (i = 0; i < eventdev_globals.nb_devs; i++) {
 		cmp = (strncmp(rte_event_devices[i].data->name, name,
 				RTE_EVENTDEV_NAME_MAX_LEN) == 0) ||
 			(rte_event_devices[i].dev ? (strncmp(
@@ -109,7 +107,7 @@ rte_event_dev_info_get(uint8_t dev_id, struct rte_event_dev_info *dev_info)
 }
 
 int
-rte_event_eth_rx_adapter_caps_get(uint8_t dev_id, uint8_t eth_port_id,
+rte_event_eth_rx_adapter_caps_get(uint8_t dev_id, uint16_t eth_port_id,
 				uint32_t *caps)
 {
 	struct rte_eventdev *dev;
@@ -175,6 +173,31 @@ rte_event_crypto_adapter_caps_get(uint8_t dev_id, uint8_t cdev_id,
 		(dev, cdev, caps) : -ENOTSUP;
 }
 
+int __rte_experimental
+rte_event_eth_tx_adapter_caps_get(uint8_t dev_id, uint16_t eth_port_id,
+				uint32_t *caps)
+{
+	struct rte_eventdev *dev;
+	struct rte_eth_dev *eth_dev;
+
+	RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL);
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(eth_port_id, -EINVAL);
+
+	dev = &rte_eventdevs[dev_id];
+	eth_dev = &rte_eth_devices[eth_port_id];
+
+	if (caps == NULL)
+		return -EINVAL;
+
+	*caps = 0;
+
+	return dev->dev_ops->eth_tx_adapter_caps_get ?
+			(*dev->dev_ops->eth_tx_adapter_caps_get)(dev,
+								eth_dev,
+								caps)
+			: 0;
+}
+
 static inline int
 rte_event_dev_queue_config(struct rte_eventdev *dev, uint8_t nb_queues)
 {
@@ -980,6 +1003,28 @@ rte_event_port_unlink(uint8_t dev_id, uint8_t port_id,
 	return diag;
 }
 
+int __rte_experimental
+rte_event_port_unlinks_in_progress(uint8_t dev_id, uint8_t port_id)
+{
+	struct rte_eventdev *dev;
+
+	RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL);
+	dev = &rte_eventdevs[dev_id];
+	if (!is_valid_port(dev, port_id)) {
+		RTE_EDEV_LOG_ERR("Invalid port_id=%" PRIu8, port_id);
+		return -EINVAL;
+	}
+
+	/* Return 0 if the PMD does not implement unlinks in progress.
+	 * This allows PMDs which handle unlink synchronously to not implement
+	 * this function at all.
+	 */
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->port_unlinks_in_progress, 0);
+
+	return (*dev->dev_ops->port_unlinks_in_progress)(dev,
+			dev->data->ports[port_id]);
+}
+
 int
 rte_event_port_links_get(uint8_t dev_id, uint8_t port_id,
 			 uint8_t queues[], uint8_t priorities[])
@@ -1275,6 +1320,15 @@ rte_eventdev_find_free_device_index(void)
 	return RTE_EVENT_MAX_DEVS;
 }
 
+static uint16_t
+rte_event_tx_adapter_enqueue(__rte_unused void *port,
+			__rte_unused struct rte_event ev[],
+			__rte_unused uint16_t nb_events)
+{
+	rte_errno = ENOTSUP;
+	return 0;
+}
+
 struct rte_eventdev *
 rte_event_pmd_allocate(const char *name, int socket_id)
 {
@@ -1295,6 +1349,8 @@ rte_event_pmd_allocate(const char *name, int socket_id)
 
 	eventdev = &rte_eventdevs[dev_id];
 
+	eventdev->txa_enqueue = rte_event_tx_adapter_enqueue;
+
 	if (eventdev->data == NULL) {
 		struct rte_eventdev_data *eventdev_data = NULL;
 
diff --git a/lib/librte_eventdev/rte_eventdev.h b/lib/librte_eventdev/rte_eventdev.h
index b6fd6ee7..d7eb69d1 100644
--- a/lib/librte_eventdev/rte_eventdev.h
+++ b/lib/librte_eventdev/rte_eventdev.h
@@ -1112,7 +1112,7 @@ struct rte_event {
  *
  */
 int
-rte_event_eth_rx_adapter_caps_get(uint8_t dev_id, uint8_t eth_port_id,
+rte_event_eth_rx_adapter_caps_get(uint8_t dev_id, uint16_t eth_port_id,
 				uint32_t *caps);
 
 #define RTE_EVENT_TIMER_ADAPTER_CAP_INTERNAL_PORT (1ULL << 0)
@@ -1186,6 +1186,32 @@ int __rte_experimental
 rte_event_crypto_adapter_caps_get(uint8_t dev_id, uint8_t cdev_id,
 				  uint32_t *caps);
 
+/* Ethdev Tx adapter capability bitmap flags */
+#define RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT	0x1
+/**< This flag is sent when the PMD supports a packet transmit callback
+ */
+
+/**
+ * Retrieve the event device's eth Tx adapter capabilities
+ *
+ * @param dev_id
+ *   The identifier of the device.
+ *
+ * @param eth_port_id
+ *   The identifier of the ethernet device.
+ *
+ * @param[out] caps
+ *   A pointer to memory filled with eth Tx adapter capabilities.
+ *
+ * @return
+ *   - 0: Success, driver provides eth Tx adapter capabilities.
+ *   - <0: Error code returned by the driver function.
+ *
+ */
+int __rte_experimental
+rte_event_eth_tx_adapter_caps_get(uint8_t dev_id, uint16_t eth_port_id,
+				uint32_t *caps);
+
 struct rte_eventdev_ops;
 struct rte_eventdev;
 
@@ -1204,6 +1230,10 @@ typedef uint16_t (*event_dequeue_burst_t)(void *port, struct rte_event ev[],
 		uint16_t nb_events, uint64_t timeout_ticks);
 /**< @internal Dequeue burst of events from port of a device */
 
+typedef uint16_t (*event_tx_adapter_enqueue)(void *port,
+				struct rte_event ev[], uint16_t nb_events);
+/**< @internal Enqueue burst of events on port of a device */
+
 #define RTE_EVENTDEV_NAME_MAX_LEN	(64)
 /**< @internal Max length of name of event PMD */
 
@@ -1266,7 +1296,8 @@ struct rte_eventdev {
 	/**< Pointer to PMD dequeue function. */
 	event_dequeue_burst_t dequeue_burst;
 	/**< Pointer to PMD dequeue burst function. */
-
+	event_tx_adapter_enqueue txa_enqueue;
+	/**< Pointer to PMD eth Tx adapter enqueue function. */
 	struct rte_eventdev_data *data;
 	/**< Pointer to device data */
 	struct rte_eventdev_ops *dev_ops;
@@ -1656,12 +1687,13 @@ rte_event_port_link(uint8_t dev_id, uint8_t port_id,
  * event port designated by its *port_id* on the event device designated
  * by its *dev_id*.
  *
- * The unlink establishment shall disable the event port *port_id* from
- * receiving events from the specified event queue *queue_id*
- *
+ * The unlink call issues an async request to disable the event port *port_id*
+ * from receiving events from the specified event queue *queue_id*.
  * Event queue(s) to event port unlink establishment can be changed at runtime
  * without re-configuring the device.
  *
+ * @see rte_event_port_unlinks_in_progress() to poll for completed unlinks.
+ *
  * @param dev_id
  *   The identifier of the device.
  *
@@ -1679,22 +1711,48 @@ rte_event_port_link(uint8_t dev_id, uint8_t port_id,
  *   NULL.
  *
  * @return
- * The number of unlinks actually established. The return value can be less
+ * The number of unlinks successfully requested. The return value can be less
  * than the value of the *nb_unlinks* parameter when the implementation has the
  * limitation on specific queue to port unlink establishment or
  * if invalid parameters are specified.
  * If the return value is less than *nb_unlinks*, the remaining queues at the
- * end of queues[] are not established, and the caller has to take care of them.
+ * end of queues[] are not unlinked, and the caller has to take care of them.
  * If return value is less than *nb_unlinks* then implementation shall update
  * the rte_errno accordingly, Possible rte_errno values are
  * (-EINVAL) Invalid parameter
- *
  */
 int
 rte_event_port_unlink(uint8_t dev_id, uint8_t port_id,
 		      uint8_t queues[], uint16_t nb_unlinks);
 
 /**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Returns the number of unlinks in progress.
+ *
+ * This function provides the application with a method to detect when an
+ * unlink has been completed by the implementation.
+ *
+ * @see rte_event_port_unlink() to issue unlink requests.
+ *
+ * @param dev_id
+ *   The indentifier of the device.
+ *
+ * @param port_id
+ *   Event port identifier to select port to check for unlinks in progress.
+ *
+ * @return
+ * The number of unlinks that are in progress. A return of zero indicates that
+ * there are no outstanding unlink requests. A positive return value indicates
+ * the number of unlinks that are in progress, but are not yet complete.
+ * A negative return value indicates an error, -EINVAL indicates an invalid
+ * parameter passed for *dev_id* or *port_id*.
+ */
+int __rte_experimental
+rte_event_port_unlinks_in_progress(uint8_t dev_id, uint8_t port_id);
+
+/**
  * Retrieve the list of source event queues and its associated service priority
  * linked to the destination event port designated by its *port_id*
  * on the event device designated by its *dev_id*.
diff --git a/lib/librte_eventdev/rte_eventdev_pmd.h b/lib/librte_eventdev/rte_eventdev_pmd.h
index 3fbb4d2b..1a01326b 100644
--- a/lib/librte_eventdev/rte_eventdev_pmd.h
+++ b/lib/librte_eventdev/rte_eventdev_pmd.h
@@ -87,8 +87,6 @@ struct rte_eventdev_global {
 	uint8_t nb_devs;	/**< Number of devices found */
 };
 
-extern struct rte_eventdev_global *rte_eventdev_globals;
-/** Pointer to global event devices data structure. */
 extern struct rte_eventdev *rte_eventdevs;
 /** The pool of rte_eventdev structures. */
 
@@ -333,6 +331,23 @@ typedef int (*eventdev_port_unlink_t)(struct rte_eventdev *dev, void *port,
 		uint8_t queues[], uint16_t nb_unlinks);
 
 /**
+ * Unlinks in progress. Returns number of unlinks that the PMD is currently
+ * performing, but have not yet been completed.
+ *
+ * @param dev
+ *   Event device pointer
+ *
+ * @param port
+ *   Event port pointer
+ *
+ * @return
+ *   Returns the number of in-progress unlinks. Zero is returned if none are
+ *   in progress.
+ */
+typedef int (*eventdev_port_unlinks_in_progress_t)(struct rte_eventdev *dev,
+		void *port);
+
+/**
  * Converts nanoseconds to *timeout_ticks* value for rte_event_dequeue()
  *
  * @param dev
@@ -450,7 +465,7 @@ typedef int (*eventdev_eth_rx_adapter_caps_get_t)
 					const struct rte_eth_dev *eth_dev,
 					uint32_t *caps);
 
-struct rte_event_eth_rx_adapter_queue_conf *queue_conf;
+struct rte_event_eth_rx_adapter_queue_conf;
 
 /**
  * Retrieve the event device's timer adapter capabilities, as well as the ops
@@ -575,7 +590,7 @@ typedef int (*eventdev_eth_rx_adapter_stop_t)
 					(const struct rte_eventdev *dev,
 					const struct rte_eth_dev *eth_dev);
 
-struct rte_event_eth_rx_adapter_stats *stats;
+struct rte_event_eth_rx_adapter_stats;
 
 /**
  * Retrieve ethernet Rx adapter statistics.
@@ -789,6 +804,186 @@ typedef int (*eventdev_crypto_adapter_stats_reset)
 			(const struct rte_eventdev *dev,
 			 const struct rte_cryptodev *cdev);
 
+/**
+ * Retrieve the event device's eth Tx adapter capabilities.
+ *
+ * @param dev
+ *   Event device pointer
+ *
+ * @param eth_dev
+ *   Ethernet device pointer
+ *
+ * @param[out] caps
+ *   A pointer to memory filled with eth Tx adapter capabilities.
+ *
+ * @return
+ *   - 0: Success, driver provides eth Tx adapter capabilities
+ *   - <0: Error code returned by the driver function.
+ *
+ */
+typedef int (*eventdev_eth_tx_adapter_caps_get_t)
+					(const struct rte_eventdev *dev,
+					const struct rte_eth_dev *eth_dev,
+					uint32_t *caps);
+
+/**
+ * Create adapter callback.
+ *
+ * @param id
+ *   Adapter identifier
+ *
+ * @param dev
+ *   Event device pointer
+ *
+ * @return
+ *   - 0: Success.
+ *   - <0: Error code on failure.
+ */
+typedef int (*eventdev_eth_tx_adapter_create_t)(uint8_t id,
+					const struct rte_eventdev *dev);
+
+/**
+ * Free adapter callback.
+ *
+ * @param id
+ *   Adapter identifier
+ *
+ * @param dev
+ *   Event device pointer
+ *
+ * @return
+ *   - 0: Success.
+ *   - <0: Error code on failure.
+ */
+typedef int (*eventdev_eth_tx_adapter_free_t)(uint8_t id,
+					const struct rte_eventdev *dev);
+
+/**
+ * Add a Tx queue to the adapter.
+ * A queue value of -1 is used to indicate all
+ * queues within the device.
+ *
+ * @param id
+ *   Adapter identifier
+ *
+ * @param dev
+ *   Event device pointer
+ *
+ * @param eth_dev
+ *   Ethernet device pointer
+ *
+ * @param tx_queue_id
+ *   Transmt queue index
+ *
+ * @return
+ *   - 0: Success.
+ *   - <0: Error code on failure.
+ */
+typedef int (*eventdev_eth_tx_adapter_queue_add_t)(
+					uint8_t id,
+					const struct rte_eventdev *dev,
+					const struct rte_eth_dev *eth_dev,
+					int32_t tx_queue_id);
+
+/**
+ * Delete a Tx queue from the adapter.
+ * A queue value of -1 is used to indicate all
+ * queues within the device, that have been added to this
+ * adapter.
+ *
+ * @param id
+ *   Adapter identifier
+ *
+ * @param dev
+ *   Event device pointer
+ *
+ * @param eth_dev
+ *   Ethernet device pointer
+ *
+ * @param tx_queue_id
+ *   Transmit queue index
+ *
+ * @return
+ *  - 0: Success, Queues deleted successfully.
+ *  - <0: Error code on failure.
+ */
+typedef int (*eventdev_eth_tx_adapter_queue_del_t)(
+					uint8_t id,
+					const struct rte_eventdev *dev,
+					const struct rte_eth_dev *eth_dev,
+					int32_t tx_queue_id);
+
+/**
+ * Start the adapter.
+ *
+ * @param id
+ *   Adapter identifier
+ *
+ * @param dev
+ *   Event device pointer
+ *
+ * @return
+ *  - 0: Success, Adapter started correctly.
+ *  - <0: Error code on failure.
+ */
+typedef int (*eventdev_eth_tx_adapter_start_t)(uint8_t id,
+					const struct rte_eventdev *dev);
+
+/**
+ * Stop the adapter.
+ *
+ * @param id
+ *  Adapter identifier
+ *
+ * @param dev
+ *   Event device pointer
+ *
+ * @return
+ *  - 0: Success.
+ *  - <0: Error code on failure.
+ */
+typedef int (*eventdev_eth_tx_adapter_stop_t)(uint8_t id,
+					const struct rte_eventdev *dev);
+
+struct rte_event_eth_tx_adapter_stats;
+
+/**
+ * Retrieve statistics for an adapter
+ *
+ * @param id
+ *  Adapter identifier
+ *
+ * @param dev
+ *   Event device pointer
+ *
+ * @param [out] stats
+ *  A pointer to structure used to retrieve statistics for an adapter
+ *
+ * @return
+ *  - 0: Success, statistics retrieved successfully.
+ *  - <0: Error code on failure.
+ */
+typedef int (*eventdev_eth_tx_adapter_stats_get_t)(
+				uint8_t id,
+				const struct rte_eventdev *dev,
+				struct rte_event_eth_tx_adapter_stats *stats);
+
+/**
+ * Reset statistics for an adapter
+ *
+ * @param id
+ *  Adapter identifier
+ *
+ * @param dev
+ *   Event device pointer
+ *
+ * @return
+ *  - 0: Success, statistics retrieved successfully.
+ *  - <0: Error code on failure.
+ */
+typedef int (*eventdev_eth_tx_adapter_stats_reset_t)(uint8_t id,
+					const struct rte_eventdev *dev);
+
 /** Event device operations function pointer table */
 struct rte_eventdev_ops {
 	eventdev_info_get_t dev_infos_get;	/**< Get device info. */
@@ -815,6 +1010,8 @@ struct rte_eventdev_ops {
 	/**< Link event queues to an event port. */
 	eventdev_port_unlink_t port_unlink;
 	/**< Unlink event queues from an event port. */
+	eventdev_port_unlinks_in_progress_t port_unlinks_in_progress;
+	/**< Unlinks in progress on an event port. */
 	eventdev_dequeue_timeout_ticks_t timeout_ticks;
 	/**< Converts ns to *timeout_ticks* value for rte_event_dequeue() */
 	eventdev_dump_t dump;
@@ -862,6 +1059,26 @@ struct rte_eventdev_ops {
 	eventdev_crypto_adapter_stats_reset crypto_adapter_stats_reset;
 	/**< Reset crypto stats */
 
+	eventdev_eth_tx_adapter_caps_get_t eth_tx_adapter_caps_get;
+	/**< Get ethernet Tx adapter capabilities */
+
+	eventdev_eth_tx_adapter_create_t eth_tx_adapter_create;
+	/**< Create adapter callback */
+	eventdev_eth_tx_adapter_free_t eth_tx_adapter_free;
+	/**< Free adapter callback */
+	eventdev_eth_tx_adapter_queue_add_t eth_tx_adapter_queue_add;
+	/**< Add Tx queues to the eth Tx adapter */
+	eventdev_eth_tx_adapter_queue_del_t eth_tx_adapter_queue_del;
+	/**< Delete Tx queues from the eth Tx adapter */
+	eventdev_eth_tx_adapter_start_t eth_tx_adapter_start;
+	/**< Start eth Tx adapter */
+	eventdev_eth_tx_adapter_stop_t eth_tx_adapter_stop;
+	/**< Stop eth Tx adapter */
+	eventdev_eth_tx_adapter_stats_get_t eth_tx_adapter_stats_get;
+	/**< Get eth Tx adapter statistics */
+	eventdev_eth_tx_adapter_stats_reset_t eth_tx_adapter_stats_reset;
+	/**< Reset eth Tx adapter statistics */
+
 	eventdev_selftest dev_selftest;
 	/**< Start eventdev Selftest */
 
diff --git a/lib/librte_eventdev/rte_eventdev_version.map b/lib/librte_eventdev/rte_eventdev_version.map
index 12835e9f..d558d7d5 100644
--- a/lib/librte_eventdev/rte_eventdev_version.map
+++ b/lib/librte_eventdev/rte_eventdev_version.map
@@ -96,6 +96,19 @@ EXPERIMENTAL {
 	rte_event_crypto_adapter_stats_reset;
 	rte_event_crypto_adapter_stop;
 	rte_event_eth_rx_adapter_cb_register;
+	rte_event_port_unlinks_in_progress;
+	rte_event_eth_tx_adapter_caps_get;
+	rte_event_eth_tx_adapter_create;
+	rte_event_eth_tx_adapter_create_ext;
+	rte_event_eth_tx_adapter_event_port_get;
+	rte_event_eth_tx_adapter_free;
+	rte_event_eth_tx_adapter_queue_add;
+	rte_event_eth_tx_adapter_queue_del;
+	rte_event_eth_tx_adapter_service_id_get;
+	rte_event_eth_tx_adapter_start;
+	rte_event_eth_tx_adapter_stats_get;
+	rte_event_eth_tx_adapter_stats_reset;
+	rte_event_eth_tx_adapter_stop;
 	rte_event_timer_adapter_caps_get;
 	rte_event_timer_adapter_create;
 	rte_event_timer_adapter_create_ext;
diff --git a/lib/librte_flow_classify/rte_flow_classify.c b/lib/librte_flow_classify/rte_flow_classify.c
index 4c3469da..fb652a2b 100644
--- a/lib/librte_flow_classify/rte_flow_classify.c
+++ b/lib/librte_flow_classify/rte_flow_classify.c
@@ -247,8 +247,7 @@ rte_flow_classifier_check_params(struct rte_flow_classifier_params *params)
 	}
 
 	/* socket */
-	if ((params->socket_id < 0) ||
-	    (params->socket_id >= RTE_MAX_NUMA_NODES)) {
+	if (params->socket_id < 0) {
 		RTE_FLOW_CLASSIFY_LOG(ERR,
 			"%s: Incorrect value for parameter socket_id\n",
 			__func__);
diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c
index f7b86c8c..5ddcccd8 100644
--- a/lib/librte_hash/rte_cuckoo_hash.c
+++ b/lib/librte_hash/rte_cuckoo_hash.c
@@ -1,5 +1,6 @@
 /* SPDX-License-Identifier: BSD-3-Clause
  * Copyright(c) 2010-2016 Intel Corporation
+ * Copyright(c) 2018 Arm Limited
  */
 
 #include <string.h>
@@ -26,11 +27,14 @@
 #include <rte_spinlock.h>
 #include <rte_ring.h>
 #include <rte_compat.h>
-#include <rte_pause.h>
 
 #include "rte_hash.h"
 #include "rte_cuckoo_hash.h"
 
+#define FOR_EACH_BUCKET(CURRENT_BKT, START_BUCKET)                            \
+	for (CURRENT_BKT = START_BUCKET;                                      \
+		CURRENT_BKT != NULL;                                          \
+		CURRENT_BKT = CURRENT_BKT->next)
 
 TAILQ_HEAD(rte_hash_list, rte_tailq_entry);
 
@@ -63,6 +67,14 @@ rte_hash_find_existing(const char *name)
 	return h;
 }
 
+static inline struct rte_hash_bucket *
+rte_hash_get_last_bkt(struct rte_hash_bucket *lst_bkt)
+{
+	while (lst_bkt->next != NULL)
+		lst_bkt = lst_bkt->next;
+	return lst_bkt;
+}
+
 void rte_hash_set_cmp_func(struct rte_hash *h, rte_hash_cmp_eq_t func)
 {
 	h->cmp_jump_table_idx = KEY_CUSTOM;
@@ -78,6 +90,36 @@ rte_hash_cmp_eq(const void *key1, const void *key2, const struct rte_hash *h)
 		return cmp_jump_table[h->cmp_jump_table_idx](key1, key2, h->key_len);
 }
 
+/*
+ * We use higher 16 bits of hash as the signature value stored in table.
+ * We use the lower bits for the primary bucket
+ * location. Then we XOR primary bucket location and the signature
+ * to get the secondary bucket location. This is same as
+ * proposed in Bin Fan, et al's paper
+ * "MemC3: Compact and Concurrent MemCache with Dumber Caching and
+ * Smarter Hashing". The benefit to use
+ * XOR is that one could derive the alternative bucket location
+ * by only using the current bucket location and the signature.
+ */
+static inline uint16_t
+get_short_sig(const hash_sig_t hash)
+{
+	return hash >> 16;
+}
+
+static inline uint32_t
+get_prim_bucket_index(const struct rte_hash *h, const hash_sig_t hash)
+{
+	return hash & h->bucket_bitmask;
+}
+
+static inline uint32_t
+get_alt_bucket_index(const struct rte_hash *h,
+			uint32_t cur_bkt_idx, uint16_t sig)
+{
+	return (cur_bkt_idx ^ sig) & h->bucket_bitmask;
+}
+
 struct rte_hash *
 rte_hash_create(const struct rte_hash_parameters *params)
 {
@@ -85,14 +127,22 @@ rte_hash_create(const struct rte_hash_parameters *params)
 	struct rte_tailq_entry *te = NULL;
 	struct rte_hash_list *hash_list;
 	struct rte_ring *r = NULL;
+	struct rte_ring *r_ext = NULL;
 	char hash_name[RTE_HASH_NAMESIZE];
 	void *k = NULL;
 	void *buckets = NULL;
+	void *buckets_ext = NULL;
 	char ring_name[RTE_RING_NAMESIZE];
+	char ext_ring_name[RTE_RING_NAMESIZE];
 	unsigned num_key_slots;
 	unsigned i;
-	unsigned int hw_trans_mem_support = 0, multi_writer_support = 0;
+	unsigned int hw_trans_mem_support = 0, use_local_cache = 0;
+	unsigned int ext_table_support = 0;
 	unsigned int readwrite_concur_support = 0;
+	unsigned int writer_takes_lock = 0;
+	unsigned int no_free_on_del = 0;
+	uint32_t *tbl_chng_cnt = NULL;
+	unsigned int readwrite_concur_lf_support = 0;
 
 	rte_hash_function default_hash_func = (rte_hash_function)rte_jhash;
 
@@ -112,20 +162,52 @@ rte_hash_create(const struct rte_hash_parameters *params)
 		return NULL;
 	}
 
+	/* Validate correct usage of extra options */
+	if ((params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY) &&
+	    (params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF)) {
+		rte_errno = EINVAL;
+		RTE_LOG(ERR, HASH, "rte_hash_create: choose rw concurrency or "
+			"rw concurrency lock free\n");
+		return NULL;
+	}
+
+	if ((params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF) &&
+	    (params->extra_flag & RTE_HASH_EXTRA_FLAGS_EXT_TABLE)) {
+		rte_errno = EINVAL;
+		RTE_LOG(ERR, HASH, "rte_hash_create: extendable bucket "
+			"feature not supported with rw concurrency "
+			"lock free\n");
+		return NULL;
+	}
+
 	/* Check extra flags field to check extra options. */
 	if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT)
 		hw_trans_mem_support = 1;
 
-	if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD)
-		multi_writer_support = 1;
+	if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD) {
+		use_local_cache = 1;
+		writer_takes_lock = 1;
+	}
 
 	if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY) {
 		readwrite_concur_support = 1;
-		multi_writer_support = 1;
+		writer_takes_lock = 1;
+	}
+
+	if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_EXT_TABLE)
+		ext_table_support = 1;
+
+	if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_NO_FREE_ON_DEL)
+		no_free_on_del = 1;
+
+	if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF) {
+		readwrite_concur_lf_support = 1;
+		/* Enable not freeing internal memory/index on delete */
+		no_free_on_del = 1;
 	}
 
 	/* Store all keys and leave the first entry as a dummy entry for lookup_bulk */
-	if (multi_writer_support)
+	if (use_local_cache)
 		/*
 		 * Increase number of slots by total number of indices
 		 * that can be stored in the lcore caches
@@ -145,6 +227,24 @@ rte_hash_create(const struct rte_hash_parameters *params)
 		goto err;
 	}
 
+	const uint32_t num_buckets = rte_align32pow2(params->entries) /
+						RTE_HASH_BUCKET_ENTRIES;
+
+	/* Create ring for extendable buckets. */
+	if (ext_table_support) {
+		snprintf(ext_ring_name, sizeof(ext_ring_name), "HT_EXT_%s",
+								params->name);
+		r_ext = rte_ring_create(ext_ring_name,
+				rte_align32pow2(num_buckets + 1),
+				params->socket_id, 0);
+
+		if (r_ext == NULL) {
+			RTE_LOG(ERR, HASH, "ext buckets memory allocation "
+								"failed\n");
+			goto err;
+		}
+	}
+
 	snprintf(hash_name, sizeof(hash_name), "HT_%s", params->name);
 
 	rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
@@ -177,19 +277,37 @@ rte_hash_create(const struct rte_hash_parameters *params)
 		goto err_unlock;
 	}
 
-	const uint32_t num_buckets = rte_align32pow2(params->entries)
-					/ RTE_HASH_BUCKET_ENTRIES;
-
 	buckets = rte_zmalloc_socket(NULL,
 				num_buckets * sizeof(struct rte_hash_bucket),
 				RTE_CACHE_LINE_SIZE, params->socket_id);
 
 	if (buckets == NULL) {
-		RTE_LOG(ERR, HASH, "memory allocation failed\n");
+		RTE_LOG(ERR, HASH, "buckets memory allocation failed\n");
 		goto err_unlock;
 	}
 
-	const uint32_t key_entry_size = sizeof(struct rte_hash_key) + params->key_len;
+	/* Allocate same number of extendable buckets */
+	if (ext_table_support) {
+		buckets_ext = rte_zmalloc_socket(NULL,
+				num_buckets * sizeof(struct rte_hash_bucket),
+				RTE_CACHE_LINE_SIZE, params->socket_id);
+		if (buckets_ext == NULL) {
+			RTE_LOG(ERR, HASH, "ext buckets memory allocation "
+							"failed\n");
+			goto err_unlock;
+		}
+		/* Populate ext bkt ring. We reserve 0 similar to the
+		 * key-data slot, just in case in future we want to
+		 * use bucket index for the linked list and 0 means NULL
+		 * for next bucket
+		 */
+		for (i = 1; i <= num_buckets; i++)
+			rte_ring_sp_enqueue(r_ext, (void *)((uintptr_t) i));
+	}
+
+	const uint32_t key_entry_size =
+		RTE_ALIGN(sizeof(struct rte_hash_key) + params->key_len,
+			  KEY_ALIGNMENT);
 	const uint64_t key_tbl_size = (uint64_t) key_entry_size * num_key_slots;
 
 	k = rte_zmalloc_socket(NULL, key_tbl_size,
@@ -200,6 +318,14 @@ rte_hash_create(const struct rte_hash_parameters *params)
 		goto err_unlock;
 	}
 
+	tbl_chng_cnt = rte_zmalloc_socket(NULL, sizeof(uint32_t),
+			RTE_CACHE_LINE_SIZE, params->socket_id);
+
+	if (tbl_chng_cnt == NULL) {
+		RTE_LOG(ERR, HASH, "memory allocation failed\n");
+		goto err_unlock;
+	}
+
 /*
  * If x86 architecture is used, select appropriate compare function,
  * which may use x86 intrinsics, otherwise use memcmp
@@ -239,7 +365,7 @@ rte_hash_create(const struct rte_hash_parameters *params)
 	h->cmp_jump_table_idx = KEY_OTHER_BYTES;
 #endif
 
-	if (multi_writer_support) {
+	if (use_local_cache) {
 		h->local_free_slots = rte_zmalloc_socket(NULL,
 				sizeof(struct lcore_cache) * RTE_MAX_LCORE,
 				RTE_CACHE_LINE_SIZE, params->socket_id);
@@ -262,27 +388,34 @@ rte_hash_create(const struct rte_hash_parameters *params)
 	h->num_buckets = num_buckets;
 	h->bucket_bitmask = h->num_buckets - 1;
 	h->buckets = buckets;
+	h->buckets_ext = buckets_ext;
+	h->free_ext_bkts = r_ext;
 	h->hash_func = (params->hash_func == NULL) ?
 		default_hash_func : params->hash_func;
 	h->key_store = k;
 	h->free_slots = r;
+	h->tbl_chng_cnt = tbl_chng_cnt;
+	*h->tbl_chng_cnt = 0;
 	h->hw_trans_mem_support = hw_trans_mem_support;
-	h->multi_writer_support = multi_writer_support;
+	h->use_local_cache = use_local_cache;
 	h->readwrite_concur_support = readwrite_concur_support;
+	h->ext_table_support = ext_table_support;
+	h->writer_takes_lock = writer_takes_lock;
+	h->no_free_on_del = no_free_on_del;
+	h->readwrite_concur_lf_support = readwrite_concur_lf_support;
 
 #if defined(RTE_ARCH_X86)
-	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
-		h->sig_cmp_fn = RTE_HASH_COMPARE_AVX2;
-	else if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE2))
+	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE2))
 		h->sig_cmp_fn = RTE_HASH_COMPARE_SSE;
 	else
 #endif
 		h->sig_cmp_fn = RTE_HASH_COMPARE_SCALAR;
 
-	/* Turn on multi-writer only with explicit flag from user and TM
-	 * support.
+	/* Writer threads need to take the lock when:
+	 * 1) RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY is enabled OR
+	 * 2) RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD is enabled
 	 */
-	if (h->multi_writer_support) {
+	if (h->writer_takes_lock) {
 		h->readwrite_lock = rte_malloc(NULL, sizeof(rte_rwlock_t),
 						RTE_CACHE_LINE_SIZE);
 		if (h->readwrite_lock == NULL)
@@ -304,10 +437,13 @@ err_unlock:
 	rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
 err:
 	rte_ring_free(r);
+	rte_ring_free(r_ext);
 	rte_free(te);
 	rte_free(h);
 	rte_free(buckets);
+	rte_free(buckets_ext);
 	rte_free(k);
+	rte_free(tbl_chng_cnt);
 	return NULL;
 }
 
@@ -339,13 +475,16 @@ rte_hash_free(struct rte_hash *h)
 
 	rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
 
-	if (h->multi_writer_support) {
+	if (h->use_local_cache)
 		rte_free(h->local_free_slots);
+	if (h->writer_takes_lock)
 		rte_free(h->readwrite_lock);
-	}
 	rte_ring_free(h->free_slots);
+	rte_ring_free(h->free_ext_bkts);
 	rte_free(h->key_store);
 	rte_free(h->buckets);
+	rte_free(h->buckets_ext);
+	rte_free(h->tbl_chng_cnt);
 	rte_free(h);
 	rte_free(te);
 }
@@ -357,18 +496,6 @@ rte_hash_hash(const struct rte_hash *h, const void *key)
 	return h->hash_func(key, h->key_len, h->hash_func_init_val);
 }
 
-/* Calc the secondary hash value from the primary hash value of a given key */
-static inline hash_sig_t
-rte_hash_secondary_hash(const hash_sig_t primary_hash)
-{
-	static const unsigned all_bits_shift = 12;
-	static const unsigned alt_bits_xor = 0x5bd1e995;
-
-	uint32_t tag = primary_hash >> all_bits_shift;
-
-	return primary_hash ^ ((tag + 1) * alt_bits_xor);
-}
-
 int32_t
 rte_hash_count(const struct rte_hash *h)
 {
@@ -378,7 +505,7 @@ rte_hash_count(const struct rte_hash *h)
 	if (h == NULL)
 		return -EINVAL;
 
-	if (h->multi_writer_support) {
+	if (h->use_local_cache) {
 		tot_ring_cnt = h->entries + (RTE_MAX_LCORE - 1) *
 					(LCORE_CACHE_SIZE - 1);
 		for (i = 0; i < RTE_MAX_LCORE; i++)
@@ -397,13 +524,12 @@ rte_hash_count(const struct rte_hash *h)
 static inline void
 __hash_rw_writer_lock(const struct rte_hash *h)
 {
-	if (h->multi_writer_support && h->hw_trans_mem_support)
+	if (h->writer_takes_lock && h->hw_trans_mem_support)
 		rte_rwlock_write_lock_tm(h->readwrite_lock);
-	else if (h->multi_writer_support)
+	else if (h->writer_takes_lock)
 		rte_rwlock_write_lock(h->readwrite_lock);
 }
 
-
 static inline void
 __hash_rw_reader_lock(const struct rte_hash *h)
 {
@@ -416,9 +542,9 @@ __hash_rw_reader_lock(const struct rte_hash *h)
 static inline void
 __hash_rw_writer_unlock(const struct rte_hash *h)
 {
-	if (h->multi_writer_support && h->hw_trans_mem_support)
+	if (h->writer_takes_lock && h->hw_trans_mem_support)
 		rte_rwlock_write_unlock_tm(h->readwrite_lock);
-	else if (h->multi_writer_support)
+	else if (h->writer_takes_lock)
 		rte_rwlock_write_unlock(h->readwrite_lock);
 }
 
@@ -443,13 +569,22 @@ rte_hash_reset(struct rte_hash *h)
 	__hash_rw_writer_lock(h);
 	memset(h->buckets, 0, h->num_buckets * sizeof(struct rte_hash_bucket));
 	memset(h->key_store, 0, h->key_entry_size * (h->entries + 1));
+	*h->tbl_chng_cnt = 0;
 
 	/* clear the free ring */
 	while (rte_ring_dequeue(h->free_slots, &ptr) == 0)
-		rte_pause();
+		continue;
+
+	/* clear free extendable bucket ring and memory */
+	if (h->ext_table_support) {
+		memset(h->buckets_ext, 0, h->num_buckets *
+						sizeof(struct rte_hash_bucket));
+		while (rte_ring_dequeue(h->free_ext_bkts, &ptr) == 0)
+			continue;
+	}
 
 	/* Repopulate the free slots ring. Entry zero is reserved for key misses */
-	if (h->multi_writer_support)
+	if (h->use_local_cache)
 		tot_ring_cnt = h->entries + (RTE_MAX_LCORE - 1) *
 					(LCORE_CACHE_SIZE - 1);
 	else
@@ -458,7 +593,14 @@ rte_hash_reset(struct rte_hash *h)
 	for (i = 1; i < tot_ring_cnt + 1; i++)
 		rte_ring_sp_enqueue(h->free_slots, (void *)((uintptr_t) i));
 
-	if (h->multi_writer_support) {
+	/* Repopulate the free ext bkt ring. */
+	if (h->ext_table_support) {
+		for (i = 1; i <= h->num_buckets; i++)
+			rte_ring_sp_enqueue(h->free_ext_bkts,
+						(void *)((uintptr_t) i));
+	}
+
+	if (h->use_local_cache) {
 		/* Reset local caches per lcore */
 		for (i = 0; i < RTE_MAX_LCORE; i++)
 			h->local_free_slots[i].len = 0;
@@ -476,29 +618,35 @@ enqueue_slot_back(const struct rte_hash *h,
 		struct lcore_cache *cached_free_slots,
 		void *slot_id)
 {
-	if (h->multi_writer_support) {
+	if (h->use_local_cache) {
 		cached_free_slots->objs[cached_free_slots->len] = slot_id;
 		cached_free_slots->len++;
 	} else
 		rte_ring_sp_enqueue(h->free_slots, slot_id);
 }
 
-/* Search a key from bucket and update its data */
+/* Search a key from bucket and update its data.
+ * Writer holds the lock before calling this.
+ */
 static inline int32_t
 search_and_update(const struct rte_hash *h, void *data, const void *key,
-	struct rte_hash_bucket *bkt, hash_sig_t sig, hash_sig_t alt_hash)
+	struct rte_hash_bucket *bkt, uint16_t sig)
 {
 	int i;
 	struct rte_hash_key *k, *keys = h->key_store;
 
 	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
-		if (bkt->sig_current[i] == sig &&
-				bkt->sig_alt[i] == alt_hash) {
+		if (bkt->sig_current[i] == sig) {
 			k = (struct rte_hash_key *) ((char *)keys +
 					bkt->key_idx[i] * h->key_entry_size);
 			if (rte_hash_cmp_eq(key, k->key, h) == 0) {
-				/* Update data */
-				k->pdata = data;
+				/* 'pdata' acts as the synchronization point
+				 * when an existing hash entry is updated.
+				 * Key is not updated in this case.
+				 */
+				__atomic_store_n(&k->pdata,
+					data,
+					__ATOMIC_RELEASE);
 				/*
 				 * Return index where key is stored,
 				 * subtracting the first dummy index
@@ -520,28 +668,31 @@ rte_hash_cuckoo_insert_mw(const struct rte_hash *h,
 		struct rte_hash_bucket *prim_bkt,
 		struct rte_hash_bucket *sec_bkt,
 		const struct rte_hash_key *key, void *data,
-		hash_sig_t sig, hash_sig_t alt_hash, uint32_t new_idx,
+		uint16_t sig, uint32_t new_idx,
 		int32_t *ret_val)
 {
 	unsigned int i;
-	struct rte_hash_bucket *cur_bkt = prim_bkt;
+	struct rte_hash_bucket *cur_bkt;
 	int32_t ret;
 
 	__hash_rw_writer_lock(h);
 	/* Check if key was inserted after last check but before this
 	 * protected region in case of inserting duplicated keys.
 	 */
-	ret = search_and_update(h, data, key, cur_bkt, sig, alt_hash);
+	ret = search_and_update(h, data, key, prim_bkt, sig);
 	if (ret != -1) {
 		__hash_rw_writer_unlock(h);
 		*ret_val = ret;
 		return 1;
 	}
-	ret = search_and_update(h, data, key, sec_bkt, alt_hash, sig);
-	if (ret != -1) {
-		__hash_rw_writer_unlock(h);
-		*ret_val = ret;
-		return 1;
+
+	FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
+		ret = search_and_update(h, data, key, cur_bkt, sig);
+		if (ret != -1) {
+			__hash_rw_writer_unlock(h);
+			*ret_val = ret;
+			return 1;
+		}
 	}
 
 	/* Insert new entry if there is room in the primary
@@ -551,8 +702,15 @@ rte_hash_cuckoo_insert_mw(const struct rte_hash *h,
 		/* Check if slot is available */
 		if (likely(prim_bkt->key_idx[i] == EMPTY_SLOT)) {
 			prim_bkt->sig_current[i] = sig;
-			prim_bkt->sig_alt[i] = alt_hash;
-			prim_bkt->key_idx[i] = new_idx;
+			/* Key can be of arbitrary length, so it is
+			 * not possible to store it atomically.
+			 * Hence the new key element's memory stores
+			 * (key as well as data) should be complete
+			 * before it is referenced.
+			 */
+			__atomic_store_n(&prim_bkt->key_idx[i],
+					 new_idx,
+					 __ATOMIC_RELEASE);
 			break;
 		}
 	}
@@ -576,11 +734,11 @@ rte_hash_cuckoo_move_insert_mw(const struct rte_hash *h,
 			struct rte_hash_bucket *alt_bkt,
 			const struct rte_hash_key *key, void *data,
 			struct queue_node *leaf, uint32_t leaf_slot,
-			hash_sig_t sig, hash_sig_t alt_hash, uint32_t new_idx,
+			uint16_t sig, uint32_t new_idx,
 			int32_t *ret_val)
 {
 	uint32_t prev_alt_bkt_idx;
-	struct rte_hash_bucket *cur_bkt = bkt;
+	struct rte_hash_bucket *cur_bkt;
 	struct queue_node *prev_node, *curr_node = leaf;
 	struct rte_hash_bucket *prev_bkt, *curr_bkt = leaf->bkt;
 	uint32_t prev_slot, curr_slot = leaf_slot;
@@ -597,18 +755,20 @@ rte_hash_cuckoo_move_insert_mw(const struct rte_hash *h,
 	/* Check if key was inserted after last check but before this
 	 * protected region.
 	 */
-	ret = search_and_update(h, data, key, cur_bkt, sig, alt_hash);
+	ret = search_and_update(h, data, key, bkt, sig);
 	if (ret != -1) {
 		__hash_rw_writer_unlock(h);
 		*ret_val = ret;
 		return 1;
 	}
 
-	ret = search_and_update(h, data, key, alt_bkt, alt_hash, sig);
-	if (ret != -1) {
-		__hash_rw_writer_unlock(h);
-		*ret_val = ret;
-		return 1;
+	FOR_EACH_BUCKET(cur_bkt, alt_bkt) {
+		ret = search_and_update(h, data, key, cur_bkt, sig);
+		if (ret != -1) {
+			__hash_rw_writer_unlock(h);
+			*ret_val = ret;
+			return 1;
+		}
 	}
 
 	while (likely(curr_node->prev != NULL)) {
@@ -616,36 +776,73 @@ rte_hash_cuckoo_move_insert_mw(const struct rte_hash *h,
 		prev_bkt = prev_node->bkt;
 		prev_slot = curr_node->prev_slot;
 
-		prev_alt_bkt_idx =
-			prev_bkt->sig_alt[prev_slot] & h->bucket_bitmask;
+		prev_alt_bkt_idx = get_alt_bucket_index(h,
+					prev_node->cur_bkt_idx,
+					prev_bkt->sig_current[prev_slot]);
 
 		if (unlikely(&h->buckets[prev_alt_bkt_idx]
 				!= curr_bkt)) {
 			/* revert it to empty, otherwise duplicated keys */
-			curr_bkt->key_idx[curr_slot] = EMPTY_SLOT;
+			__atomic_store_n(&curr_bkt->key_idx[curr_slot],
+				EMPTY_SLOT,
+				__ATOMIC_RELEASE);
 			__hash_rw_writer_unlock(h);
 			return -1;
 		}
 
+		if (h->readwrite_concur_lf_support) {
+			/* Inform the previous move. The current move need
+			 * not be informed now as the current bucket entry
+			 * is present in both primary and secondary.
+			 * Since there is one writer, load acquires on
+			 * tbl_chng_cnt are not required.
+			 */
+			__atomic_store_n(h->tbl_chng_cnt,
+					 *h->tbl_chng_cnt + 1,
+					 __ATOMIC_RELEASE);
+			/* The stores to sig_alt and sig_current should not
+			 * move above the store to tbl_chng_cnt.
+			 */
+			__atomic_thread_fence(__ATOMIC_RELEASE);
+		}
+
 		/* Need to swap current/alt sig to allow later
 		 * Cuckoo insert to move elements back to its
 		 * primary bucket if available
 		 */
-		curr_bkt->sig_alt[curr_slot] =
-			 prev_bkt->sig_current[prev_slot];
 		curr_bkt->sig_current[curr_slot] =
-			prev_bkt->sig_alt[prev_slot];
-		curr_bkt->key_idx[curr_slot] =
-			prev_bkt->key_idx[prev_slot];
+			prev_bkt->sig_current[prev_slot];
+		/* Release the updated bucket entry */
+		__atomic_store_n(&curr_bkt->key_idx[curr_slot],
+			prev_bkt->key_idx[prev_slot],
+			__ATOMIC_RELEASE);
 
 		curr_slot = prev_slot;
 		curr_node = prev_node;
 		curr_bkt = curr_node->bkt;
 	}
 
+	if (h->readwrite_concur_lf_support) {
+		/* Inform the previous move. The current move need
+		 * not be informed now as the current bucket entry
+		 * is present in both primary and secondary.
+		 * Since there is one writer, load acquires on
+		 * tbl_chng_cnt are not required.
+		 */
+		__atomic_store_n(h->tbl_chng_cnt,
+				 *h->tbl_chng_cnt + 1,
+				 __ATOMIC_RELEASE);
+		/* The stores to sig_alt and sig_current should not
+		 * move above the store to tbl_chng_cnt.
+		 */
+		__atomic_thread_fence(__ATOMIC_RELEASE);
+	}
+
 	curr_bkt->sig_current[curr_slot] = sig;
-	curr_bkt->sig_alt[curr_slot] = alt_hash;
-	curr_bkt->key_idx[curr_slot] = new_idx;
+	/* Release the new bucket entry */
+	__atomic_store_n(&curr_bkt->key_idx[curr_slot],
+			 new_idx,
+			 __ATOMIC_RELEASE);
 
 	__hash_rw_writer_unlock(h);
 
@@ -662,39 +859,44 @@ rte_hash_cuckoo_make_space_mw(const struct rte_hash *h,
 			struct rte_hash_bucket *bkt,
 			struct rte_hash_bucket *sec_bkt,
 			const struct rte_hash_key *key, void *data,
-			hash_sig_t sig, hash_sig_t alt_hash,
+			uint16_t sig, uint32_t bucket_idx,
 			uint32_t new_idx, int32_t *ret_val)
 {
 	unsigned int i;
 	struct queue_node queue[RTE_HASH_BFS_QUEUE_MAX_LEN];
 	struct queue_node *tail, *head;
 	struct rte_hash_bucket *curr_bkt, *alt_bkt;
+	uint32_t cur_idx, alt_idx;
 
 	tail = queue;
 	head = queue + 1;
 	tail->bkt = bkt;
 	tail->prev = NULL;
 	tail->prev_slot = -1;
+	tail->cur_bkt_idx = bucket_idx;
 
 	/* Cuckoo bfs Search */
 	while (likely(tail != head && head <
 					queue + RTE_HASH_BFS_QUEUE_MAX_LEN -
 					RTE_HASH_BUCKET_ENTRIES)) {
 		curr_bkt = tail->bkt;
+		cur_idx = tail->cur_bkt_idx;
 		for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
 			if (curr_bkt->key_idx[i] == EMPTY_SLOT) {
 				int32_t ret = rte_hash_cuckoo_move_insert_mw(h,
 						bkt, sec_bkt, key, data,
-						tail, i, sig, alt_hash,
+						tail, i, sig,
 						new_idx, ret_val);
 				if (likely(ret != -1))
 					return ret;
 			}
 
 			/* Enqueue new node and keep prev node info */
-			alt_bkt = &(h->buckets[curr_bkt->sig_alt[i]
-						    & h->bucket_bitmask]);
+			alt_idx = get_alt_bucket_index(h, cur_idx,
+						curr_bkt->sig_current[i]);
+			alt_bkt = &(h->buckets[alt_idx]);
 			head->bkt = alt_bkt;
+			head->cur_bkt_idx = alt_idx;
 			head->prev = tail;
 			head->prev_slot = i;
 			head++;
@@ -709,45 +911,50 @@ static inline int32_t
 __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
 						hash_sig_t sig, void *data)
 {
-	hash_sig_t alt_hash;
+	uint16_t short_sig;
 	uint32_t prim_bucket_idx, sec_bucket_idx;
-	struct rte_hash_bucket *prim_bkt, *sec_bkt;
+	struct rte_hash_bucket *prim_bkt, *sec_bkt, *cur_bkt;
 	struct rte_hash_key *new_k, *keys = h->key_store;
 	void *slot_id = NULL;
-	uint32_t new_idx;
+	void *ext_bkt_id = NULL;
+	uint32_t new_idx, bkt_id;
 	int ret;
 	unsigned n_slots;
 	unsigned lcore_id;
+	unsigned int i;
 	struct lcore_cache *cached_free_slots = NULL;
 	int32_t ret_val;
+	struct rte_hash_bucket *last;
 
-	prim_bucket_idx = sig & h->bucket_bitmask;
+	short_sig = get_short_sig(sig);
+	prim_bucket_idx = get_prim_bucket_index(h, sig);
+	sec_bucket_idx = get_alt_bucket_index(h, prim_bucket_idx, short_sig);
 	prim_bkt = &h->buckets[prim_bucket_idx];
-	rte_prefetch0(prim_bkt);
-
-	alt_hash = rte_hash_secondary_hash(sig);
-	sec_bucket_idx = alt_hash & h->bucket_bitmask;
 	sec_bkt = &h->buckets[sec_bucket_idx];
+	rte_prefetch0(prim_bkt);
 	rte_prefetch0(sec_bkt);
 
 	/* Check if key is already inserted in primary location */
 	__hash_rw_writer_lock(h);
-	ret = search_and_update(h, data, key, prim_bkt, sig, alt_hash);
+	ret = search_and_update(h, data, key, prim_bkt, short_sig);
 	if (ret != -1) {
 		__hash_rw_writer_unlock(h);
 		return ret;
 	}
 
 	/* Check if key is already inserted in secondary location */
-	ret = search_and_update(h, data, key, sec_bkt, alt_hash, sig);
-	if (ret != -1) {
-		__hash_rw_writer_unlock(h);
-		return ret;
+	FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
+		ret = search_and_update(h, data, key, cur_bkt, short_sig);
+		if (ret != -1) {
+			__hash_rw_writer_unlock(h);
+			return ret;
+		}
 	}
+
 	__hash_rw_writer_unlock(h);
 
 	/* Did not find a match, so get a new slot for storing the new key */
-	if (h->multi_writer_support) {
+	if (h->use_local_cache) {
 		lcore_id = rte_lcore_id();
 		cached_free_slots = &h->local_free_slots[lcore_id];
 		/* Try to get a free slot from the local cache */
@@ -776,12 +983,19 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
 	new_idx = (uint32_t)((uintptr_t) slot_id);
 	/* Copy key */
 	rte_memcpy(new_k->key, key, h->key_len);
-	new_k->pdata = data;
-
+	/* Key can be of arbitrary length, so it is not possible to store
+	 * it atomically. Hence the new key element's memory stores
+	 * (key as well as data) should be complete before it is referenced.
+	 * 'pdata' acts as the synchronization point when an existing hash
+	 * entry is updated.
+	 */
+	__atomic_store_n(&new_k->pdata,
+		data,
+		__ATOMIC_RELEASE);
 
 	/* Find an empty slot and insert */
 	ret = rte_hash_cuckoo_insert_mw(h, prim_bkt, sec_bkt, key, data,
-					sig, alt_hash, new_idx, &ret_val);
+					short_sig, new_idx, &ret_val);
 	if (ret == 0)
 		return new_idx - 1;
 	else if (ret == 1) {
@@ -791,7 +1005,7 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
 
 	/* Primary bucket full, need to make space for new entry */
 	ret = rte_hash_cuckoo_make_space_mw(h, prim_bkt, sec_bkt, key, data,
-					sig, alt_hash, new_idx, &ret_val);
+				short_sig, prim_bucket_idx, new_idx, &ret_val);
 	if (ret == 0)
 		return new_idx - 1;
 	else if (ret == 1) {
@@ -801,17 +1015,75 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
 
 	/* Also search secondary bucket to get better occupancy */
 	ret = rte_hash_cuckoo_make_space_mw(h, sec_bkt, prim_bkt, key, data,
-					alt_hash, sig, new_idx, &ret_val);
+				short_sig, sec_bucket_idx, new_idx, &ret_val);
 
 	if (ret == 0)
 		return new_idx - 1;
 	else if (ret == 1) {
 		enqueue_slot_back(h, cached_free_slots, slot_id);
 		return ret_val;
-	} else {
+	}
+
+	/* if ext table not enabled, we failed the insertion */
+	if (!h->ext_table_support) {
 		enqueue_slot_back(h, cached_free_slots, slot_id);
 		return ret;
 	}
+
+	/* Now we need to go through the extendable bucket. Protection is needed
+	 * to protect all extendable bucket processes.
+	 */
+	__hash_rw_writer_lock(h);
+	/* We check for duplicates again since could be inserted before the lock */
+	ret = search_and_update(h, data, key, prim_bkt, short_sig);
+	if (ret != -1) {
+		enqueue_slot_back(h, cached_free_slots, slot_id);
+		goto failure;
+	}
+
+	FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
+		ret = search_and_update(h, data, key, cur_bkt, short_sig);
+		if (ret != -1) {
+			enqueue_slot_back(h, cached_free_slots, slot_id);
+			goto failure;
+		}
+	}
+
+	/* Search sec and ext buckets to find an empty entry to insert. */
+	FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
+		for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
+			/* Check if slot is available */
+			if (likely(cur_bkt->key_idx[i] == EMPTY_SLOT)) {
+				cur_bkt->sig_current[i] = short_sig;
+				cur_bkt->key_idx[i] = new_idx;
+				__hash_rw_writer_unlock(h);
+				return new_idx - 1;
+			}
+		}
+	}
+
+	/* Failed to get an empty entry from extendable buckets. Link a new
+	 * extendable bucket. We first get a free bucket from ring.
+	 */
+	if (rte_ring_sc_dequeue(h->free_ext_bkts, &ext_bkt_id) != 0) {
+		ret = -ENOSPC;
+		goto failure;
+	}
+
+	bkt_id = (uint32_t)((uintptr_t)ext_bkt_id) - 1;
+	/* Use the first location of the new bucket */
+	(h->buckets_ext[bkt_id]).sig_current[0] = short_sig;
+	(h->buckets_ext[bkt_id]).key_idx[0] = new_idx;
+	/* Link the new bucket to sec bucket linked list */
+	last = rte_hash_get_last_bkt(sec_bkt);
+	last->next = &h->buckets_ext[bkt_id];
+	__hash_rw_writer_unlock(h);
+	return new_idx - 1;
+
+failure:
+	__hash_rw_writer_unlock(h);
+	return ret;
+
 }
 
 int32_t
@@ -859,25 +1131,31 @@ rte_hash_add_key_data(const struct rte_hash *h, const void *key, void *data)
 
 /* Search one bucket to find the match key */
 static inline int32_t
-search_one_bucket(const struct rte_hash *h, const void *key, hash_sig_t sig,
+search_one_bucket(const struct rte_hash *h, const void *key, uint16_t sig,
 			void **data, const struct rte_hash_bucket *bkt)
 {
 	int i;
+	uint32_t key_idx;
+	void *pdata;
 	struct rte_hash_key *k, *keys = h->key_store;
 
 	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
-		if (bkt->sig_current[i] == sig &&
-				bkt->key_idx[i] != EMPTY_SLOT) {
+		key_idx = __atomic_load_n(&bkt->key_idx[i],
+					  __ATOMIC_ACQUIRE);
+		if (bkt->sig_current[i] == sig && key_idx != EMPTY_SLOT) {
 			k = (struct rte_hash_key *) ((char *)keys +
-					bkt->key_idx[i] * h->key_entry_size);
+					key_idx * h->key_entry_size);
+			pdata = __atomic_load_n(&k->pdata,
+					__ATOMIC_ACQUIRE);
+
 			if (rte_hash_cmp_eq(key, k->key, h) == 0) {
 				if (data != NULL)
-					*data = k->pdata;
+					*data = pdata;
 				/*
 				 * Return index where key is stored,
 				 * subtracting the first dummy index
 				 */
-				return bkt->key_idx[i] - 1;
+				return key_idx - 1;
 			}
 		}
 	}
@@ -888,34 +1166,64 @@ static inline int32_t
 __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
 					hash_sig_t sig, void **data)
 {
-	uint32_t bucket_idx;
-	hash_sig_t alt_hash;
-	struct rte_hash_bucket *bkt;
+	uint32_t prim_bucket_idx, sec_bucket_idx;
+	struct rte_hash_bucket *bkt, *cur_bkt;
+	uint32_t cnt_b, cnt_a;
 	int ret;
+	uint16_t short_sig;
 
-	bucket_idx = sig & h->bucket_bitmask;
-	bkt = &h->buckets[bucket_idx];
+	short_sig = get_short_sig(sig);
+	prim_bucket_idx = get_prim_bucket_index(h, sig);
+	sec_bucket_idx = get_alt_bucket_index(h, prim_bucket_idx, short_sig);
 
 	__hash_rw_reader_lock(h);
 
-	/* Check if key is in primary location */
-	ret = search_one_bucket(h, key, sig, data, bkt);
-	if (ret != -1) {
-		__hash_rw_reader_unlock(h);
-		return ret;
-	}
-	/* Calculate secondary hash */
-	alt_hash = rte_hash_secondary_hash(sig);
-	bucket_idx = alt_hash & h->bucket_bitmask;
-	bkt = &h->buckets[bucket_idx];
+	do {
+		/* Load the table change counter before the lookup
+		 * starts. Acquire semantics will make sure that
+		 * loads in search_one_bucket are not hoisted.
+		 */
+		cnt_b = __atomic_load_n(h->tbl_chng_cnt,
+				__ATOMIC_ACQUIRE);
+
+		/* Check if key is in primary location */
+		bkt = &h->buckets[prim_bucket_idx];
+		ret = search_one_bucket(h, key, short_sig, data, bkt);
+		if (ret != -1) {
+			__hash_rw_reader_unlock(h);
+			return ret;
+		}
+		/* Calculate secondary hash */
+		bkt = &h->buckets[sec_bucket_idx];
+
+		/* Check if key is in secondary location */
+		FOR_EACH_BUCKET(cur_bkt, bkt) {
+			ret = search_one_bucket(h, key, short_sig,
+						data, cur_bkt);
+			if (ret != -1) {
+				__hash_rw_reader_unlock(h);
+				return ret;
+			}
+		}
+
+		/* The loads of sig_current in search_one_bucket
+		 * should not move below the load from tbl_chng_cnt.
+		 */
+		__atomic_thread_fence(__ATOMIC_ACQUIRE);
+		/* Re-read the table change counter to check if the
+		 * table has changed during search. If yes, re-do
+		 * the search.
+		 * This load should not get hoisted. The load
+		 * acquires on cnt_b, key index in primary bucket
+		 * and key index in secondary bucket will make sure
+		 * that it does not get hoisted.
+		 */
+		cnt_a = __atomic_load_n(h->tbl_chng_cnt,
+					__ATOMIC_ACQUIRE);
+	} while (cnt_b != cnt_a);
 
-	/* Check if key is in secondary location */
-	ret = search_one_bucket(h, key, alt_hash, data, bkt);
-	if (ret != -1) {
-		__hash_rw_reader_unlock(h);
-		return ret;
-	}
 	__hash_rw_reader_unlock(h);
+
 	return -ENOENT;
 }
 
@@ -955,9 +1263,7 @@ remove_entry(const struct rte_hash *h, struct rte_hash_bucket *bkt, unsigned i)
 	unsigned lcore_id, n_slots;
 	struct lcore_cache *cached_free_slots;
 
-	bkt->sig_current[i] = NULL_SIGNATURE;
-	bkt->sig_alt[i] = NULL_SIGNATURE;
-	if (h->multi_writer_support) {
+	if (h->use_local_cache) {
 		lcore_id = rte_lcore_id();
 		cached_free_slots = &h->local_free_slots[lcore_id];
 		/* Cache full, need to free it. */
@@ -978,31 +1284,67 @@ remove_entry(const struct rte_hash *h, struct rte_hash_bucket *bkt, unsigned i)
 	}
 }
 
-/* Search one bucket and remove the matched key */
+/* Compact the linked list by moving key from last entry in linked list to the
+ * empty slot.
+ */
+static inline void
+__rte_hash_compact_ll(struct rte_hash_bucket *cur_bkt, int pos) {
+	int i;
+	struct rte_hash_bucket *last_bkt;
+
+	if (!cur_bkt->next)
+		return;
+
+	last_bkt = rte_hash_get_last_bkt(cur_bkt);
+
+	for (i = RTE_HASH_BUCKET_ENTRIES - 1; i >= 0; i--) {
+		if (last_bkt->key_idx[i] != EMPTY_SLOT) {
+			cur_bkt->key_idx[pos] = last_bkt->key_idx[i];
+			cur_bkt->sig_current[pos] = last_bkt->sig_current[i];
+			last_bkt->sig_current[i] = NULL_SIGNATURE;
+			last_bkt->key_idx[i] = EMPTY_SLOT;
+			return;
+		}
+	}
+}
+
+/* Search one bucket and remove the matched key.
+ * Writer is expected to hold the lock while calling this
+ * function.
+ */
 static inline int32_t
 search_and_remove(const struct rte_hash *h, const void *key,
-			struct rte_hash_bucket *bkt, hash_sig_t sig)
+			struct rte_hash_bucket *bkt, uint16_t sig, int *pos)
 {
 	struct rte_hash_key *k, *keys = h->key_store;
 	unsigned int i;
-	int32_t ret;
+	uint32_t key_idx;
 
-	/* Check if key is in primary location */
+	/* Check if key is in bucket */
 	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
-		if (bkt->sig_current[i] == sig &&
-				bkt->key_idx[i] != EMPTY_SLOT) {
+		key_idx = __atomic_load_n(&bkt->key_idx[i],
+					  __ATOMIC_ACQUIRE);
+		if (bkt->sig_current[i] == sig && key_idx != EMPTY_SLOT) {
 			k = (struct rte_hash_key *) ((char *)keys +
-					bkt->key_idx[i] * h->key_entry_size);
+					key_idx * h->key_entry_size);
 			if (rte_hash_cmp_eq(key, k->key, h) == 0) {
-				remove_entry(h, bkt, i);
+				bkt->sig_current[i] = NULL_SIGNATURE;
+				/* Free the key store index if
+				 * no_free_on_del is disabled.
+				 */
+				if (!h->no_free_on_del)
+					remove_entry(h, bkt, i);
+
+				__atomic_store_n(&bkt->key_idx[i],
+						 EMPTY_SLOT,
+						 __ATOMIC_RELEASE);
 
+				*pos = i;
 				/*
 				 * Return index where key is stored,
 				 * subtracting the first dummy index
 				 */
-				ret = bkt->key_idx[i] - 1;
-				bkt->key_idx[i] = EMPTY_SLOT;
-				return ret;
+				return key_idx - 1;
 			}
 		}
 	}
@@ -1013,36 +1355,68 @@ static inline int32_t
 __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
 						hash_sig_t sig)
 {
-	uint32_t bucket_idx;
-	hash_sig_t alt_hash;
-	struct rte_hash_bucket *bkt;
-	int32_t ret;
-
-	bucket_idx = sig & h->bucket_bitmask;
-	bkt = &h->buckets[bucket_idx];
+	uint32_t prim_bucket_idx, sec_bucket_idx;
+	struct rte_hash_bucket *prim_bkt, *sec_bkt, *prev_bkt, *last_bkt;
+	struct rte_hash_bucket *cur_bkt;
+	int pos;
+	int32_t ret, i;
+	uint16_t short_sig;
+
+	short_sig = get_short_sig(sig);
+	prim_bucket_idx = get_prim_bucket_index(h, sig);
+	sec_bucket_idx = get_alt_bucket_index(h, prim_bucket_idx, short_sig);
+	prim_bkt = &h->buckets[prim_bucket_idx];
 
 	__hash_rw_writer_lock(h);
 	/* look for key in primary bucket */
-	ret = search_and_remove(h, key, bkt, sig);
+	ret = search_and_remove(h, key, prim_bkt, short_sig, &pos);
 	if (ret != -1) {
-		__hash_rw_writer_unlock(h);
-		return ret;
+		__rte_hash_compact_ll(prim_bkt, pos);
+		last_bkt = prim_bkt->next;
+		prev_bkt = prim_bkt;
+		goto return_bkt;
 	}
 
 	/* Calculate secondary hash */
-	alt_hash = rte_hash_secondary_hash(sig);
-	bucket_idx = alt_hash & h->bucket_bitmask;
-	bkt = &h->buckets[bucket_idx];
+	sec_bkt = &h->buckets[sec_bucket_idx];
 
-	/* look for key in secondary bucket */
-	ret = search_and_remove(h, key, bkt, alt_hash);
-	if (ret != -1) {
+	FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
+		ret = search_and_remove(h, key, cur_bkt, short_sig, &pos);
+		if (ret != -1) {
+			__rte_hash_compact_ll(cur_bkt, pos);
+			last_bkt = sec_bkt->next;
+			prev_bkt = sec_bkt;
+			goto return_bkt;
+		}
+	}
+
+	__hash_rw_writer_unlock(h);
+	return -ENOENT;
+
+/* Search last bucket to see if empty to be recycled */
+return_bkt:
+	if (!last_bkt) {
 		__hash_rw_writer_unlock(h);
 		return ret;
 	}
+	while (last_bkt->next) {
+		prev_bkt = last_bkt;
+		last_bkt = last_bkt->next;
+	}
+
+	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
+		if (last_bkt->key_idx[i] != EMPTY_SLOT)
+			break;
+	}
+	/* found empty bucket and recycle */
+	if (i == RTE_HASH_BUCKET_ENTRIES) {
+		prev_bkt->next = last_bkt->next = NULL;
+		uint32_t index = last_bkt - h->buckets_ext + 1;
+		rte_ring_sp_enqueue(h->free_ext_bkts, (void *)(uintptr_t)index);
+	}
 
 	__hash_rw_writer_unlock(h);
-	return -ENOENT;
+	return ret;
 }
 
 int32_t
@@ -1080,59 +1454,76 @@ rte_hash_get_key_with_position(const struct rte_hash *h, const int32_t position,
 	return 0;
 }
 
+int __rte_experimental
+rte_hash_free_key_with_position(const struct rte_hash *h,
+				const int32_t position)
+{
+	RETURN_IF_TRUE(((h == NULL) || (position == EMPTY_SLOT)), -EINVAL);
+
+	unsigned int lcore_id, n_slots;
+	struct lcore_cache *cached_free_slots;
+	const int32_t total_entries = h->num_buckets * RTE_HASH_BUCKET_ENTRIES;
+
+	/* Out of bounds */
+	if (position >= total_entries)
+		return -EINVAL;
+
+	if (h->use_local_cache) {
+		lcore_id = rte_lcore_id();
+		cached_free_slots = &h->local_free_slots[lcore_id];
+		/* Cache full, need to free it. */
+		if (cached_free_slots->len == LCORE_CACHE_SIZE) {
+			/* Need to enqueue the free slots in global ring. */
+			n_slots = rte_ring_mp_enqueue_burst(h->free_slots,
+						cached_free_slots->objs,
+						LCORE_CACHE_SIZE, NULL);
+			cached_free_slots->len -= n_slots;
+		}
+		/* Put index of new free slot in cache. */
+		cached_free_slots->objs[cached_free_slots->len] =
+					(void *)((uintptr_t)position);
+		cached_free_slots->len++;
+	} else {
+		rte_ring_sp_enqueue(h->free_slots,
+				(void *)((uintptr_t)position));
+	}
+
+	return 0;
+}
+
 static inline void
 compare_signatures(uint32_t *prim_hash_matches, uint32_t *sec_hash_matches,
 			const struct rte_hash_bucket *prim_bkt,
 			const struct rte_hash_bucket *sec_bkt,
-			hash_sig_t prim_hash, hash_sig_t sec_hash,
+			uint16_t sig,
 			enum rte_hash_sig_compare_function sig_cmp_fn)
 {
 	unsigned int i;
 
+	/* For match mask the first bit of every two bits indicates the match */
 	switch (sig_cmp_fn) {
-#ifdef RTE_MACHINE_CPUFLAG_AVX2
-	case RTE_HASH_COMPARE_AVX2:
-		*prim_hash_matches = _mm256_movemask_ps((__m256)_mm256_cmpeq_epi32(
-				_mm256_load_si256(
-					(__m256i const *)prim_bkt->sig_current),
-				_mm256_set1_epi32(prim_hash)));
-		*sec_hash_matches = _mm256_movemask_ps((__m256)_mm256_cmpeq_epi32(
-				_mm256_load_si256(
-					(__m256i const *)sec_bkt->sig_current),
-				_mm256_set1_epi32(sec_hash)));
-		break;
-#endif
 #ifdef RTE_MACHINE_CPUFLAG_SSE2
 	case RTE_HASH_COMPARE_SSE:
-		/* Compare the first 4 signatures in the bucket */
-		*prim_hash_matches = _mm_movemask_ps((__m128)_mm_cmpeq_epi16(
+		/* Compare all signatures in the bucket */
+		*prim_hash_matches = _mm_movemask_epi8(_mm_cmpeq_epi16(
 				_mm_load_si128(
 					(__m128i const *)prim_bkt->sig_current),
-				_mm_set1_epi32(prim_hash)));
-		*prim_hash_matches |= (_mm_movemask_ps((__m128)_mm_cmpeq_epi16(
-				_mm_load_si128(
-					(__m128i const *)&prim_bkt->sig_current[4]),
-				_mm_set1_epi32(prim_hash)))) << 4;
-		/* Compare the first 4 signatures in the bucket */
-		*sec_hash_matches = _mm_movemask_ps((__m128)_mm_cmpeq_epi16(
+				_mm_set1_epi16(sig)));
+		/* Compare all signatures in the bucket */
+		*sec_hash_matches = _mm_movemask_epi8(_mm_cmpeq_epi16(
 				_mm_load_si128(
 					(__m128i const *)sec_bkt->sig_current),
-				_mm_set1_epi32(sec_hash)));
-		*sec_hash_matches |= (_mm_movemask_ps((__m128)_mm_cmpeq_epi16(
-				_mm_load_si128(
-					(__m128i const *)&sec_bkt->sig_current[4]),
-				_mm_set1_epi32(sec_hash)))) << 4;
+				_mm_set1_epi16(sig)));
 		break;
 #endif
 	default:
 		for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
 			*prim_hash_matches |=
-				((prim_hash == prim_bkt->sig_current[i]) << i);
+				((sig == prim_bkt->sig_current[i]) << (i << 1));
 			*sec_hash_matches |=
-				((sec_hash == sec_bkt->sig_current[i]) << i);
+				((sig == sec_bkt->sig_current[i]) << (i << 1));
 		}
 	}
-
 }
 
 #define PREFETCH_OFFSET 4
@@ -1143,12 +1534,18 @@ __rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
 {
 	uint64_t hits = 0;
 	int32_t i;
+	int32_t ret;
 	uint32_t prim_hash[RTE_HASH_LOOKUP_BULK_MAX];
-	uint32_t sec_hash[RTE_HASH_LOOKUP_BULK_MAX];
+	uint32_t prim_index[RTE_HASH_LOOKUP_BULK_MAX];
+	uint32_t sec_index[RTE_HASH_LOOKUP_BULK_MAX];
+	uint16_t sig[RTE_HASH_LOOKUP_BULK_MAX];
 	const struct rte_hash_bucket *primary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
 	const struct rte_hash_bucket *secondary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
 	uint32_t prim_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
 	uint32_t sec_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
+	struct rte_hash_bucket *cur_bkt, *next_bkt;
+	void *pdata[RTE_HASH_LOOKUP_BULK_MAX];
+	uint32_t cnt_b, cnt_a;
 
 	/* Prefetch first keys */
 	for (i = 0; i < PREFETCH_OFFSET && i < num_keys; i++)
@@ -1162,10 +1559,13 @@ __rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
 		rte_prefetch0(keys[i + PREFETCH_OFFSET]);
 
 		prim_hash[i] = rte_hash_hash(h, keys[i]);
-		sec_hash[i] = rte_hash_secondary_hash(prim_hash[i]);
 
-		primary_bkt[i] = &h->buckets[prim_hash[i] & h->bucket_bitmask];
-		secondary_bkt[i] = &h->buckets[sec_hash[i] & h->bucket_bitmask];
+		sig[i] = get_short_sig(prim_hash[i]);
+		prim_index[i] = get_prim_bucket_index(h, prim_hash[i]);
+		sec_index[i] = get_alt_bucket_index(h, prim_index[i], sig[i]);
+
+		primary_bkt[i] = &h->buckets[prim_index[i]];
+		secondary_bkt[i] = &h->buckets[sec_index[i]];
 
 		rte_prefetch0(primary_bkt[i]);
 		rte_prefetch0(secondary_bkt[i]);
@@ -1174,96 +1574,178 @@ __rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
 	/* Calculate and prefetch rest of the buckets */
 	for (; i < num_keys; i++) {
 		prim_hash[i] = rte_hash_hash(h, keys[i]);
-		sec_hash[i] = rte_hash_secondary_hash(prim_hash[i]);
 
-		primary_bkt[i] = &h->buckets[prim_hash[i] & h->bucket_bitmask];
-		secondary_bkt[i] = &h->buckets[sec_hash[i] & h->bucket_bitmask];
+		sig[i] = get_short_sig(prim_hash[i]);
+		prim_index[i] = get_prim_bucket_index(h, prim_hash[i]);
+		sec_index[i] = get_alt_bucket_index(h, prim_index[i], sig[i]);
+
+		primary_bkt[i] = &h->buckets[prim_index[i]];
+		secondary_bkt[i] = &h->buckets[sec_index[i]];
 
 		rte_prefetch0(primary_bkt[i]);
 		rte_prefetch0(secondary_bkt[i]);
 	}
 
 	__hash_rw_reader_lock(h);
-	/* Compare signatures and prefetch key slot of first hit */
-	for (i = 0; i < num_keys; i++) {
-		compare_signatures(&prim_hitmask[i], &sec_hitmask[i],
+	do {
+		/* Load the table change counter before the lookup
+		 * starts. Acquire semantics will make sure that
+		 * loads in compare_signatures are not hoisted.
+		 */
+		cnt_b = __atomic_load_n(h->tbl_chng_cnt,
+					__ATOMIC_ACQUIRE);
+
+		/* Compare signatures and prefetch key slot of first hit */
+		for (i = 0; i < num_keys; i++) {
+			compare_signatures(&prim_hitmask[i], &sec_hitmask[i],
 				primary_bkt[i], secondary_bkt[i],
-				prim_hash[i], sec_hash[i], h->sig_cmp_fn);
-
-		if (prim_hitmask[i]) {
-			uint32_t first_hit = __builtin_ctzl(prim_hitmask[i]);
-			uint32_t key_idx = primary_bkt[i]->key_idx[first_hit];
-			const struct rte_hash_key *key_slot =
-				(const struct rte_hash_key *)(
-				(const char *)h->key_store +
-				key_idx * h->key_entry_size);
-			rte_prefetch0(key_slot);
-			continue;
-		}
+				sig[i], h->sig_cmp_fn);
+
+			if (prim_hitmask[i]) {
+				uint32_t first_hit =
+						__builtin_ctzl(prim_hitmask[i])
+						>> 1;
+				uint32_t key_idx =
+					primary_bkt[i]->key_idx[first_hit];
+				const struct rte_hash_key *key_slot =
+					(const struct rte_hash_key *)(
+					(const char *)h->key_store +
+					key_idx * h->key_entry_size);
+				rte_prefetch0(key_slot);
+				continue;
+			}
 
-		if (sec_hitmask[i]) {
-			uint32_t first_hit = __builtin_ctzl(sec_hitmask[i]);
-			uint32_t key_idx = secondary_bkt[i]->key_idx[first_hit];
-			const struct rte_hash_key *key_slot =
-				(const struct rte_hash_key *)(
-				(const char *)h->key_store +
-				key_idx * h->key_entry_size);
-			rte_prefetch0(key_slot);
+			if (sec_hitmask[i]) {
+				uint32_t first_hit =
+						__builtin_ctzl(sec_hitmask[i])
+						>> 1;
+				uint32_t key_idx =
+					secondary_bkt[i]->key_idx[first_hit];
+				const struct rte_hash_key *key_slot =
+					(const struct rte_hash_key *)(
+					(const char *)h->key_store +
+					key_idx * h->key_entry_size);
+				rte_prefetch0(key_slot);
+			}
 		}
-	}
 
-	/* Compare keys, first hits in primary first */
-	for (i = 0; i < num_keys; i++) {
-		positions[i] = -ENOENT;
-		while (prim_hitmask[i]) {
-			uint32_t hit_index = __builtin_ctzl(prim_hitmask[i]);
-
-			uint32_t key_idx = primary_bkt[i]->key_idx[hit_index];
-			const struct rte_hash_key *key_slot =
-				(const struct rte_hash_key *)(
-				(const char *)h->key_store +
-				key_idx * h->key_entry_size);
-			/*
-			 * If key index is 0, do not compare key,
-			 * as it is checking the dummy slot
-			 */
-			if (!!key_idx & !rte_hash_cmp_eq(key_slot->key, keys[i], h)) {
-				if (data != NULL)
-					data[i] = key_slot->pdata;
+		/* Compare keys, first hits in primary first */
+		for (i = 0; i < num_keys; i++) {
+			positions[i] = -ENOENT;
+			while (prim_hitmask[i]) {
+				uint32_t hit_index =
+						__builtin_ctzl(prim_hitmask[i])
+						>> 1;
+				uint32_t key_idx =
+				__atomic_load_n(
+					&primary_bkt[i]->key_idx[hit_index],
+					__ATOMIC_ACQUIRE);
+				const struct rte_hash_key *key_slot =
+					(const struct rte_hash_key *)(
+					(const char *)h->key_store +
+					key_idx * h->key_entry_size);
+
+				if (key_idx != EMPTY_SLOT)
+					pdata[i] = __atomic_load_n(
+							&key_slot->pdata,
+							__ATOMIC_ACQUIRE);
+				/*
+				 * If key index is 0, do not compare key,
+				 * as it is checking the dummy slot
+				 */
+				if (!!key_idx &
+					!rte_hash_cmp_eq(
+						key_slot->key, keys[i], h)) {
+					if (data != NULL)
+						data[i] = pdata[i];
+
+					hits |= 1ULL << i;
+					positions[i] = key_idx - 1;
+					goto next_key;
+				}
+				prim_hitmask[i] &= ~(3ULL << (hit_index << 1));
+			}
 
-				hits |= 1ULL << i;
-				positions[i] = key_idx - 1;
-				goto next_key;
+			while (sec_hitmask[i]) {
+				uint32_t hit_index =
+						__builtin_ctzl(sec_hitmask[i])
+						>> 1;
+				uint32_t key_idx =
+				__atomic_load_n(
+					&secondary_bkt[i]->key_idx[hit_index],
+					__ATOMIC_ACQUIRE);
+				const struct rte_hash_key *key_slot =
+					(const struct rte_hash_key *)(
+					(const char *)h->key_store +
+					key_idx * h->key_entry_size);
+
+				if (key_idx != EMPTY_SLOT)
+					pdata[i] = __atomic_load_n(
+							&key_slot->pdata,
+							__ATOMIC_ACQUIRE);
+				/*
+				 * If key index is 0, do not compare key,
+				 * as it is checking the dummy slot
+				 */
+
+				if (!!key_idx &
+					!rte_hash_cmp_eq(
+						key_slot->key, keys[i], h)) {
+					if (data != NULL)
+						data[i] = pdata[i];
+
+					hits |= 1ULL << i;
+					positions[i] = key_idx - 1;
+					goto next_key;
+				}
+				sec_hitmask[i] &= ~(3ULL << (hit_index << 1));
 			}
-			prim_hitmask[i] &= ~(1 << (hit_index));
+next_key:
+			continue;
 		}
 
-		while (sec_hitmask[i]) {
-			uint32_t hit_index = __builtin_ctzl(sec_hitmask[i]);
-
-			uint32_t key_idx = secondary_bkt[i]->key_idx[hit_index];
-			const struct rte_hash_key *key_slot =
-				(const struct rte_hash_key *)(
-				(const char *)h->key_store +
-				key_idx * h->key_entry_size);
-			/*
-			 * If key index is 0, do not compare key,
-			 * as it is checking the dummy slot
-			 */
-
-			if (!!key_idx & !rte_hash_cmp_eq(key_slot->key, keys[i], h)) {
-				if (data != NULL)
-					data[i] = key_slot->pdata;
+		/* The loads of sig_current in compare_signatures
+		 * should not move below the load from tbl_chng_cnt.
+		 */
+		__atomic_thread_fence(__ATOMIC_ACQUIRE);
+		/* Re-read the table change counter to check if the
+		 * table has changed during search. If yes, re-do
+		 * the search.
+		 * This load should not get hoisted. The load
+		 * acquires on cnt_b, primary key index and secondary
+		 * key index will make sure that it does not get
+		 * hoisted.
+		 */
+		cnt_a = __atomic_load_n(h->tbl_chng_cnt,
+					__ATOMIC_ACQUIRE);
+	} while (cnt_b != cnt_a);
+
+	/* all found, do not need to go through ext bkt */
+	if ((hits == ((1ULL << num_keys) - 1)) || !h->ext_table_support) {
+		if (hit_mask != NULL)
+			*hit_mask = hits;
+		__hash_rw_reader_unlock(h);
+		return;
+	}
 
+	/* need to check ext buckets for match */
+	for (i = 0; i < num_keys; i++) {
+		if ((hits & (1ULL << i)) != 0)
+			continue;
+		next_bkt = secondary_bkt[i]->next;
+		FOR_EACH_BUCKET(cur_bkt, next_bkt) {
+			if (data != NULL)
+				ret = search_one_bucket(h, keys[i],
+						sig[i], &data[i], cur_bkt);
+			else
+				ret = search_one_bucket(h, keys[i],
+						sig[i], NULL, cur_bkt);
+			if (ret != -1) {
+				positions[i] = ret;
 				hits |= 1ULL << i;
-				positions[i] = key_idx - 1;
-				goto next_key;
+				break;
 			}
-			sec_hitmask[i] &= ~(1 << (hit_index));
 		}
-
-next_key:
-		continue;
 	}
 
 	__hash_rw_reader_unlock(h);
@@ -1308,27 +1790,30 @@ rte_hash_iterate(const struct rte_hash *h, const void **key, void **data, uint32
 
 	RETURN_IF_TRUE(((h == NULL) || (next == NULL)), -EINVAL);
 
-	const uint32_t total_entries = h->num_buckets * RTE_HASH_BUCKET_ENTRIES;
-	/* Out of bounds */
-	if (*next >= total_entries)
-		return -ENOENT;
+	const uint32_t total_entries_main = h->num_buckets *
+							RTE_HASH_BUCKET_ENTRIES;
+	const uint32_t total_entries = total_entries_main << 1;
+
+	/* Out of bounds of all buckets (both main table and ext table) */
+	if (*next >= total_entries_main)
+		goto extend_table;
 
 	/* Calculate bucket and index of current iterator */
 	bucket_idx = *next / RTE_HASH_BUCKET_ENTRIES;
 	idx = *next % RTE_HASH_BUCKET_ENTRIES;
 
 	/* If current position is empty, go to the next one */
-	while (h->buckets[bucket_idx].key_idx[idx] == EMPTY_SLOT) {
+	while ((position = __atomic_load_n(&h->buckets[bucket_idx].key_idx[idx],
+					__ATOMIC_ACQUIRE)) == EMPTY_SLOT) {
 		(*next)++;
 		/* End of table */
-		if (*next == total_entries)
-			return -ENOENT;
+		if (*next == total_entries_main)
+			goto extend_table;
 		bucket_idx = *next / RTE_HASH_BUCKET_ENTRIES;
 		idx = *next % RTE_HASH_BUCKET_ENTRIES;
 	}
+
 	__hash_rw_reader_lock(h);
-	/* Get position of entry in key table */
-	position = h->buckets[bucket_idx].key_idx[idx];
 	next_key = (struct rte_hash_key *) ((char *)h->key_store +
 				position * h->key_entry_size);
 	/* Return key and data */
@@ -1341,4 +1826,34 @@ rte_hash_iterate(const struct rte_hash *h, const void **key, void **data, uint32
 	(*next)++;
 
 	return position - 1;
+
+/* Begin to iterate extendable buckets */
+extend_table:
+	/* Out of total bound or if ext bucket feature is not enabled */
+	if (*next >= total_entries || !h->ext_table_support)
+		return -ENOENT;
+
+	bucket_idx = (*next - total_entries_main) / RTE_HASH_BUCKET_ENTRIES;
+	idx = (*next - total_entries_main) % RTE_HASH_BUCKET_ENTRIES;
+
+	while ((position = h->buckets_ext[bucket_idx].key_idx[idx]) == EMPTY_SLOT) {
+		(*next)++;
+		if (*next == total_entries)
+			return -ENOENT;
+		bucket_idx = (*next - total_entries_main) /
+						RTE_HASH_BUCKET_ENTRIES;
+		idx = (*next - total_entries_main) % RTE_HASH_BUCKET_ENTRIES;
+	}
+	__hash_rw_reader_lock(h);
+	next_key = (struct rte_hash_key *) ((char *)h->key_store +
+				position * h->key_entry_size);
+	/* Return key and data */
+	*key = next_key->key;
+	*data = next_key->pdata;
+
+	__hash_rw_reader_unlock(h);
+
+	/* Increment iterator */
+	(*next)++;
+	return position - 1;
 }
diff --git a/lib/librte_hash/rte_cuckoo_hash.h b/lib/librte_hash/rte_cuckoo_hash.h
index b43f467d..5dfbbc48 100644
--- a/lib/librte_hash/rte_cuckoo_hash.h
+++ b/lib/librte_hash/rte_cuckoo_hash.h
@@ -1,5 +1,6 @@
 /* SPDX-License-Identifier: BSD-3-Clause
  * Copyright(c) 2016 Intel Corporation
+ * Copyright(c) 2018 Arm Limited
  */
 
 /* rte_cuckoo_hash.h
@@ -104,8 +105,6 @@ const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = {
 
 #define LCORE_CACHE_SIZE		64
 
-#define RTE_HASH_MAX_PUSHES             100
-
 #define RTE_HASH_BFS_QUEUE_MAX_LEN       1000
 
 #define RTE_XABORT_CUCKOO_PATH_INVALIDED 0x4
@@ -125,25 +124,24 @@ struct rte_hash_key {
 	};
 	/* Variable key size */
 	char key[0];
-} __attribute__((aligned(KEY_ALIGNMENT)));
+};
 
 /* All different signature compare functions */
 enum rte_hash_sig_compare_function {
 	RTE_HASH_COMPARE_SCALAR = 0,
 	RTE_HASH_COMPARE_SSE,
-	RTE_HASH_COMPARE_AVX2,
 	RTE_HASH_COMPARE_NUM
 };
 
 /** Bucket structure */
 struct rte_hash_bucket {
-	hash_sig_t sig_current[RTE_HASH_BUCKET_ENTRIES];
+	uint16_t sig_current[RTE_HASH_BUCKET_ENTRIES];
 
 	uint32_t key_idx[RTE_HASH_BUCKET_ENTRIES];
 
-	hash_sig_t sig_alt[RTE_HASH_BUCKET_ENTRIES];
-
 	uint8_t flag[RTE_HASH_BUCKET_ENTRIES];
+
+	void *next;
 } __rte_cache_aligned;
 
 /** A hash table structure. */
@@ -164,10 +162,23 @@ struct rte_hash {
 	/**< Length of hash key. */
 	uint8_t hw_trans_mem_support;
 	/**< If hardware transactional memory is used. */
-	uint8_t multi_writer_support;
-	/**< If multi-writer support is enabled. */
+	uint8_t use_local_cache;
+	/**< If multi-writer support is enabled, use local cache
+	 * to allocate key-store slots.
+	 */
 	uint8_t readwrite_concur_support;
 	/**< If read-write concurrency support is enabled */
+	uint8_t ext_table_support;     /**< Enable extendable bucket table */
+	uint8_t no_free_on_del;
+	/**< If key index should be freed on calling rte_hash_del_xxx APIs.
+	 * If this is set, rte_hash_free_key_with_position must be called to
+	 * free the key index associated with the deleted entry.
+	 * This flag is enabled by default.
+	 */
+	uint8_t readwrite_concur_lf_support;
+	/**< If read-write concurrency lock free support is enabled */
+	uint8_t writer_takes_lock;
+	/**< Indicates if the writer threads need to take lock */
 	rte_hash_function hash_func;    /**< Function used to calculate hash. */
 	uint32_t hash_func_init_val;    /**< Init value used by hash_func. */
 	rte_hash_cmp_eq_t rte_hash_custom_cmp_eq;
@@ -186,10 +197,15 @@ struct rte_hash {
 	 * to the key table.
 	 */
 	rte_rwlock_t *readwrite_lock; /**< Read-write lock thread-safety. */
+	struct rte_hash_bucket *buckets_ext; /**< Extra buckets array */
+	struct rte_ring *free_ext_bkts; /**< Ring of indexes of free buckets */
+	uint32_t *tbl_chng_cnt;
+	/**< Indicates if the hash table changed from last read. */
 } __rte_cache_aligned;
 
 struct queue_node {
 	struct rte_hash_bucket *bkt; /* Current bucket on the bfs search */
+	uint32_t cur_bkt_idx;
 
 	struct queue_node *prev;     /* Parent(bucket) in search path */
 	int prev_slot;               /* Parent(slot) in search path */
diff --git a/lib/librte_hash/rte_hash.h b/lib/librte_hash/rte_hash.h
index 9e7d9315..c93d1a13 100644
--- a/lib/librte_hash/rte_hash.h
+++ b/lib/librte_hash/rte_hash.h
@@ -14,6 +14,8 @@
 #include <stdint.h>
 #include <stddef.h>
 
+#include <rte_compat.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -37,7 +39,27 @@ extern "C" {
 /** Flag to support reader writer concurrency */
 #define RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY 0x04
 
-/** Signature of key that is stored internally. */
+/** Flag to indicate the extendabe bucket table feature should be used */
+#define RTE_HASH_EXTRA_FLAGS_EXT_TABLE 0x08
+
+/** Flag to disable freeing of key index on hash delete.
+ * Refer to rte_hash_del_xxx APIs for more details.
+ * This is enabled by default when RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF
+ * is enabled.
+ */
+#define RTE_HASH_EXTRA_FLAGS_NO_FREE_ON_DEL 0x10
+
+/** Flag to support lock free reader writer concurrency. Both single writer
+ * and multi writer use cases are supported.
+ * Currently, extendable bucket table feature is not supported with
+ * this feature.
+ */
+#define RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF 0x20
+
+/**
+ * The type of hash value of a key.
+ * It should be a value of at least 32bit with fully random pattern.
+ */
 typedef uint32_t hash_sig_t;
 
 /** Type of function that can be used for calculating the hash value. */
@@ -119,7 +141,12 @@ void
 rte_hash_free(struct rte_hash *h);
 
 /**
- * Reset all hash structure, by zeroing all entries
+ * Reset all hash structure, by zeroing all entries.
+ * When RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF is enabled,
+ * it is application's responsibility to make sure that
+ * none of the readers are referencing the hash table
+ * while calling this API.
+ *
  * @param h
  *   Hash table to reset
  */
@@ -143,6 +170,11 @@ rte_hash_count(const struct rte_hash *h);
  * and should only be called from one thread by default.
  * Thread safety can be enabled by setting flag during
  * table creation.
+ * If the key exists already in the table, this API updates its value
+ * with 'data' passed in this API. It is the responsibility of
+ * the application to manage any memory associated with the old value.
+ * The readers might still be using the old value even after this API
+ * has returned.
  *
  * @param h
  *   Hash table to add the key to.
@@ -165,6 +197,11 @@ rte_hash_add_key_data(const struct rte_hash *h, const void *key, void *data);
  * and should only be called from one thread by default.
  * Thread safety can be enabled by setting flag during
  * table creation.
+ * If the key exists already in the table, this API updates its value
+ * with 'data' passed in this API. It is the responsibility of
+ * the application to manage any memory associated with the old value.
+ * The readers might still be using the old value even after this API
+ * has returned.
  *
  * @param h
  *   Hash table to add the key to.
@@ -230,6 +267,14 @@ rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, hash_sig_t
  * and should only be called from one thread by default.
  * Thread safety can be enabled by setting flag during
  * table creation.
+ * If RTE_HASH_EXTRA_FLAGS_NO_FREE_ON_DEL or
+ * RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF is enabled,
+ * the key index returned by rte_hash_add_key_xxx APIs will not be
+ * freed by this API. rte_hash_free_key_with_position API must be called
+ * additionally to free the index associated with the key.
+ * rte_hash_free_key_with_position API should be called after all
+ * the readers have stopped referencing the entry corresponding to
+ * this key. RCU mechanisms could be used to determine such a state.
  *
  * @param h
  *   Hash table to remove the key from.
@@ -251,6 +296,14 @@ rte_hash_del_key(const struct rte_hash *h, const void *key);
  * and should only be called from one thread by default.
  * Thread safety can be enabled by setting flag during
  * table creation.
+ * If RTE_HASH_EXTRA_FLAGS_NO_FREE_ON_DEL or
+ * RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF is enabled,
+ * the key index returned by rte_hash_add_key_xxx APIs will not be
+ * freed by this API. rte_hash_free_key_with_position API must be called
+ * additionally to free the index associated with the key.
+ * rte_hash_free_key_with_position API should be called after all
+ * the readers have stopped referencing the entry corresponding to
+ * this key. RCU mechanisms could be used to determine such a state.
  *
  * @param h
  *   Hash table to remove the key from.
@@ -290,6 +343,34 @@ rte_hash_get_key_with_position(const struct rte_hash *h, const int32_t position,
 			       void **key);
 
 /**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Free a hash key in the hash table given the position
+ * of the key. This operation is not multi-thread safe and should
+ * only be called from one thread by default. Thread safety
+ * can be enabled by setting flag during table creation.
+ * If RTE_HASH_EXTRA_FLAGS_NO_FREE_ON_DEL or
+ * RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF is enabled,
+ * the key index returned by rte_hash_del_key_xxx APIs must be freed
+ * using this API. This API should be called after all the readers
+ * have stopped referencing the entry corresponding to this key.
+ * RCU mechanisms could be used to determine such a state.
+ * This API does not validate if the key is already freed.
+ *
+ * @param h
+ *   Hash table to free the key from.
+ * @param position
+ *   Position returned when the key was deleted.
+ * @return
+ *   - 0 if freed successfully
+ *   - -EINVAL if the parameters are invalid.
+ */
+int __rte_experimental
+rte_hash_free_key_with_position(const struct rte_hash *h,
+				const int32_t position);
+
+/**
  * Find a key-value pair in the hash table.
  * This operation is multi-thread safe with regarding to other lookup threads.
  * Read-write concurrency can be enabled by setting flag during
diff --git a/lib/librte_hash/rte_hash_version.map b/lib/librte_hash/rte_hash_version.map
index e216ac8e..734ae28b 100644
--- a/lib/librte_hash/rte_hash_version.map
+++ b/lib/librte_hash/rte_hash_version.map
@@ -53,3 +53,10 @@ DPDK_18.08 {
 	rte_hash_count;
 
 } DPDK_16.07;
+
+EXPERIMENTAL {
+	global:
+
+	rte_hash_free_key_with_position;
+
+};
diff --git a/lib/librte_ip_frag/ip_frag_common.h b/lib/librte_ip_frag/ip_frag_common.h
index 197acf8d..0f62e2e1 100644
--- a/lib/librte_ip_frag/ip_frag_common.h
+++ b/lib/librte_ip_frag/ip_frag_common.h
@@ -25,6 +25,12 @@
 #define IPv6_KEY_BYTES_FMT \
 	"%08" PRIx64 "%08" PRIx64 "%08" PRIx64 "%08" PRIx64
 
+#ifdef RTE_LIBRTE_IP_FRAG_TBL_STAT
+#define	IP_FRAG_TBL_STAT_UPDATE(s, f, v)	((s)->f += (v))
+#else
+#define	IP_FRAG_TBL_STAT_UPDATE(s, f, v)	do {} while (0)
+#endif /* IP_FRAG_TBL_STAT */
+
 /* internal functions declarations */
 struct rte_mbuf * ip_frag_process(struct ip_frag_pkt *fp,
 		struct rte_ip_frag_death_row *dr, struct rte_mbuf *mb,
@@ -69,10 +75,11 @@ ip_frag_key_invalidate(struct ip_frag_key * key)
 }
 
 /* compare two keys */
-static inline int
+static inline uint64_t
 ip_frag_key_cmp(const struct ip_frag_key * k1, const struct ip_frag_key * k2)
 {
-	uint32_t i, val;
+	uint32_t i;
+	uint64_t val;
 	val = k1->id ^ k2->id;
 	for (i = 0; i < k1->key_len; i++)
 		val |= k1->src_dst[i] ^ k2->src_dst[i];
@@ -149,4 +156,16 @@ ip_frag_reset(struct ip_frag_pkt *fp, uint64_t tms)
 	fp->frags[IP_FIRST_FRAG_IDX] = zero_frag;
 }
 
+/* local frag table helper functions */
+static inline void
+ip_frag_tbl_del(struct rte_ip_frag_tbl *tbl, struct rte_ip_frag_death_row *dr,
+	struct ip_frag_pkt *fp)
+{
+	ip_frag_free(fp, dr);
+	ip_frag_key_invalidate(&fp->key);
+	TAILQ_REMOVE(&tbl->lru, fp, lru);
+	tbl->use_entries--;
+	IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, del_num, 1);
+}
+
 #endif /* _IP_FRAG_COMMON_H_ */
diff --git a/lib/librte_ip_frag/ip_frag_internal.c b/lib/librte_ip_frag/ip_frag_internal.c
index 2560c771..97470a87 100644
--- a/lib/librte_ip_frag/ip_frag_internal.c
+++ b/lib/librte_ip_frag/ip_frag_internal.c
@@ -14,24 +14,6 @@
 #define	IP_FRAG_TBL_POS(tbl, sig)	\
 	((tbl)->pkt + ((sig) & (tbl)->entry_mask))
 
-#ifdef RTE_LIBRTE_IP_FRAG_TBL_STAT
-#define	IP_FRAG_TBL_STAT_UPDATE(s, f, v)	((s)->f += (v))
-#else
-#define	IP_FRAG_TBL_STAT_UPDATE(s, f, v)	do {} while (0)
-#endif /* IP_FRAG_TBL_STAT */
-
-/* local frag table helper functions */
-static inline void
-ip_frag_tbl_del(struct rte_ip_frag_tbl *tbl, struct rte_ip_frag_death_row *dr,
-	struct ip_frag_pkt *fp)
-{
-	ip_frag_free(fp, dr);
-	ip_frag_key_invalidate(&fp->key);
-	TAILQ_REMOVE(&tbl->lru, fp, lru);
-	tbl->use_entries--;
-	IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, del_num, 1);
-}
-
 static inline void
 ip_frag_tbl_add(struct rte_ip_frag_tbl *tbl,  struct ip_frag_pkt *fp,
 	const struct ip_frag_key *key, uint64_t tms)
diff --git a/lib/librte_ip_frag/rte_ip_frag.h b/lib/librte_ip_frag/rte_ip_frag.h
index b3f3f78d..7f425f61 100644
--- a/lib/librte_ip_frag/rte_ip_frag.h
+++ b/lib/librte_ip_frag/rte_ip_frag.h
@@ -65,10 +65,13 @@ struct ip_frag_pkt {
 
 #define IP_FRAG_DEATH_ROW_LEN 32 /**< death row size (in packets) */
 
+/* death row size in mbufs */
+#define IP_FRAG_DEATH_ROW_MBUF_LEN (IP_FRAG_DEATH_ROW_LEN * (IP_MAX_FRAG_NUM + 1))
+
 /** mbuf death row (packets to be freed) */
 struct rte_ip_frag_death_row {
 	uint32_t cnt;          /**< number of mbufs currently on death row */
-	struct rte_mbuf *row[IP_FRAG_DEATH_ROW_LEN * (IP_MAX_FRAG_NUM + 1)];
+	struct rte_mbuf *row[IP_FRAG_DEATH_ROW_MBUF_LEN];
 	/**< mbufs to be freed */
 };
 
@@ -325,6 +328,20 @@ void rte_ip_frag_free_death_row(struct rte_ip_frag_death_row *dr,
 void
 rte_ip_frag_table_statistics_dump(FILE * f, const struct rte_ip_frag_tbl *tbl);
 
+/**
+ * Delete expired fragments
+ *
+ * @param tbl
+ *   Table to delete expired fragments from
+ * @param dr
+ *   Death row to free buffers to
+ * @param tms
+ *   Current timestamp
+ */
+void __rte_experimental
+rte_frag_table_del_expired_entries(struct rte_ip_frag_tbl *tbl,
+	struct rte_ip_frag_death_row *dr, uint64_t tms);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_ip_frag/rte_ip_frag_common.c b/lib/librte_ip_frag/rte_ip_frag_common.c
index 659a1795..a23f6f24 100644
--- a/lib/librte_ip_frag/rte_ip_frag_common.c
+++ b/lib/librte_ip_frag/rte_ip_frag_common.c
@@ -121,3 +121,24 @@ rte_ip_frag_table_statistics_dump(FILE *f, const struct rte_ip_frag_tbl *tbl)
 		fail_nospace,
 		fail_total - fail_nospace);
 }
+
+/* Delete expired fragments */
+void __rte_experimental
+rte_frag_table_del_expired_entries(struct rte_ip_frag_tbl *tbl,
+	struct rte_ip_frag_death_row *dr, uint64_t tms)
+{
+	uint64_t max_cycles;
+	struct ip_frag_pkt *fp;
+
+	max_cycles = tbl->max_cycles;
+
+	TAILQ_FOREACH(fp, &tbl->lru, lru)
+		if (max_cycles + fp->start < tms) {
+			/* check that death row has enough space */
+			if (IP_FRAG_DEATH_ROW_MBUF_LEN - dr->cnt >= fp->last_idx)
+				ip_frag_tbl_del(tbl, dr, fp);
+			else
+				return;
+		} else
+			return;
+}
diff --git a/lib/librte_ip_frag/rte_ip_frag_version.map b/lib/librte_ip_frag/rte_ip_frag_version.map
index d1acf07c..d40d5515 100644
--- a/lib/librte_ip_frag/rte_ip_frag_version.map
+++ b/lib/librte_ip_frag/rte_ip_frag_version.map
@@ -18,3 +18,9 @@ DPDK_17.08 {
     rte_ip_frag_table_destroy;
 
 } DPDK_2.0;
+
+EXPERIMENTAL {
+	global:
+
+	rte_frag_table_del_expired_entries;
+};
diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c
index 65f6a2b0..c9726d4f 100644
--- a/lib/librte_kni/rte_kni.c
+++ b/lib/librte_kni/rte_kni.c
@@ -18,6 +18,9 @@
 #include <rte_log.h>
 #include <rte_kni.h>
 #include <rte_memzone.h>
+#include <rte_tailq.h>
+#include <rte_rwlock.h>
+#include <rte_eal_memconfig.h>
 #include <exec-env/rte_kni_common.h>
 #include "rte_kni_fifo.h"
 
@@ -30,7 +33,23 @@
 
 #define KNI_REQUEST_MBUF_NUM_MAX      32
 
-#define KNI_MEM_CHECK(cond) do { if (cond) goto kni_fail; } while (0)
+#define KNI_MEM_CHECK(cond, fail) do { if (cond) goto fail; } while (0)
+
+#define KNI_MZ_NAME_FMT			"kni_info_%s"
+#define KNI_TX_Q_MZ_NAME_FMT		"kni_tx_%s"
+#define KNI_RX_Q_MZ_NAME_FMT		"kni_rx_%s"
+#define KNI_ALLOC_Q_MZ_NAME_FMT		"kni_alloc_%s"
+#define KNI_FREE_Q_MZ_NAME_FMT		"kni_free_%s"
+#define KNI_REQ_Q_MZ_NAME_FMT		"kni_req_%s"
+#define KNI_RESP_Q_MZ_NAME_FMT		"kni_resp_%s"
+#define KNI_SYNC_ADDR_MZ_NAME_FMT	"kni_sync_%s"
+
+TAILQ_HEAD(rte_kni_list, rte_tailq_entry);
+
+static struct rte_tailq_elem rte_kni_tailq = {
+	.name = "RTE_KNI",
+};
+EAL_REGISTER_TAILQ(rte_kni_tailq)
 
 /**
  * KNI context
@@ -42,18 +61,26 @@ struct rte_kni {
 	struct rte_mempool *pktmbuf_pool;   /**< pkt mbuf mempool */
 	unsigned mbuf_size;                 /**< mbuf size */
 
+	const struct rte_memzone *m_tx_q;   /**< TX queue memzone */
+	const struct rte_memzone *m_rx_q;   /**< RX queue memzone */
+	const struct rte_memzone *m_alloc_q;/**< Alloc queue memzone */
+	const struct rte_memzone *m_free_q; /**< Free queue memzone */
+
 	struct rte_kni_fifo *tx_q;          /**< TX queue */
 	struct rte_kni_fifo *rx_q;          /**< RX queue */
 	struct rte_kni_fifo *alloc_q;       /**< Allocated mbufs queue */
 	struct rte_kni_fifo *free_q;        /**< To be freed mbufs queue */
 
+	const struct rte_memzone *m_req_q;  /**< Request queue memzone */
+	const struct rte_memzone *m_resp_q; /**< Response queue memzone */
+	const struct rte_memzone *m_sync_addr;/**< Sync addr memzone */
+
 	/* For request & response */
 	struct rte_kni_fifo *req_q;         /**< Request queue */
 	struct rte_kni_fifo *resp_q;        /**< Response queue */
 	void * sync_addr;                   /**< Req/Resp Mem address */
 
 	struct rte_kni_ops ops;             /**< operations for request */
-	uint8_t in_use : 1;                 /**< kni in use */
 };
 
 enum kni_ops_status {
@@ -61,231 +88,111 @@ enum kni_ops_status {
 	KNI_REQ_REGISTERED,
 };
 
-/**
- * KNI memzone pool slot
- */
-struct rte_kni_memzone_slot {
-	uint32_t id;
-	uint8_t in_use : 1;                    /**< slot in use */
-
-	/* Memzones */
-	const struct rte_memzone *m_ctx;       /**< KNI ctx */
-	const struct rte_memzone *m_tx_q;      /**< TX queue */
-	const struct rte_memzone *m_rx_q;      /**< RX queue */
-	const struct rte_memzone *m_alloc_q;   /**< Allocated mbufs queue */
-	const struct rte_memzone *m_free_q;    /**< To be freed mbufs queue */
-	const struct rte_memzone *m_req_q;     /**< Request queue */
-	const struct rte_memzone *m_resp_q;    /**< Response queue */
-	const struct rte_memzone *m_sync_addr;
-
-	/* Free linked list */
-	struct rte_kni_memzone_slot *next;     /**< Next slot link.list */
-};
-
-/**
- * KNI memzone pool
- */
-struct rte_kni_memzone_pool {
-	uint8_t initialized : 1;            /**< Global KNI pool init flag */
-
-	uint32_t max_ifaces;                /**< Max. num of KNI ifaces */
-	struct rte_kni_memzone_slot *slots;        /**< Pool slots */
-	rte_spinlock_t mutex;               /**< alloc/release mutex */
-
-	/* Free memzone slots linked-list */
-	struct rte_kni_memzone_slot *free;         /**< First empty slot */
-	struct rte_kni_memzone_slot *free_tail;    /**< Last empty slot */
-};
-
-
 static void kni_free_mbufs(struct rte_kni *kni);
 static void kni_allocate_mbufs(struct rte_kni *kni);
 
 static volatile int kni_fd = -1;
-static struct rte_kni_memzone_pool kni_memzone_pool = {
-	.initialized = 0,
-};
 
-static const struct rte_memzone *
-kni_memzone_reserve(const char *name, size_t len, int socket_id,
-						unsigned flags)
+/* Shall be called before any allocation happens */
+int
+rte_kni_init(unsigned int max_kni_ifaces __rte_unused)
 {
-	const struct rte_memzone *mz = rte_memzone_lookup(name);
-
-	if (mz == NULL)
-		mz = rte_memzone_reserve(name, len, socket_id, flags);
+	/* Check FD and open */
+	if (kni_fd < 0) {
+		kni_fd = open("/dev/" KNI_DEVICE, O_RDWR);
+		if (kni_fd < 0) {
+			RTE_LOG(ERR, KNI,
+				"Can not open /dev/%s\n", KNI_DEVICE);
+			return -1;
+		}
+	}
 
-	return mz;
+	return 0;
 }
 
-/* Pool mgmt */
-static struct rte_kni_memzone_slot*
-kni_memzone_pool_alloc(void)
+static struct rte_kni *
+__rte_kni_get(const char *name)
 {
-	struct rte_kni_memzone_slot *slot;
+	struct rte_kni *kni;
+	struct rte_tailq_entry *te;
+	struct rte_kni_list *kni_list;
 
-	rte_spinlock_lock(&kni_memzone_pool.mutex);
+	kni_list = RTE_TAILQ_CAST(rte_kni_tailq.head, rte_kni_list);
 
-	if (!kni_memzone_pool.free) {
-		rte_spinlock_unlock(&kni_memzone_pool.mutex);
-		return NULL;
+	TAILQ_FOREACH(te, kni_list, next) {
+		kni = te->data;
+		if (strncmp(name, kni->name, RTE_KNI_NAMESIZE) == 0)
+			break;
 	}
 
-	slot = kni_memzone_pool.free;
-	kni_memzone_pool.free = slot->next;
-	slot->in_use = 1;
+	if (te == NULL)
+		kni = NULL;
 
-	if (!kni_memzone_pool.free)
-		kni_memzone_pool.free_tail = NULL;
-
-	rte_spinlock_unlock(&kni_memzone_pool.mutex);
-
-	return slot;
+	return kni;
 }
 
-static void
-kni_memzone_pool_release(struct rte_kni_memzone_slot *slot)
+static int
+kni_reserve_mz(struct rte_kni *kni)
 {
-	rte_spinlock_lock(&kni_memzone_pool.mutex);
+	char mz_name[RTE_MEMZONE_NAMESIZE];
 
-	if (kni_memzone_pool.free)
-		kni_memzone_pool.free_tail->next = slot;
-	else
-		kni_memzone_pool.free = slot;
+	snprintf(mz_name, RTE_MEMZONE_NAMESIZE, KNI_TX_Q_MZ_NAME_FMT, kni->name);
+	kni->m_tx_q = rte_memzone_reserve(mz_name, KNI_FIFO_SIZE, SOCKET_ID_ANY, 0);
+	KNI_MEM_CHECK(kni->m_tx_q == NULL, tx_q_fail);
 
-	kni_memzone_pool.free_tail = slot;
-	slot->next = NULL;
-	slot->in_use = 0;
+	snprintf(mz_name, RTE_MEMZONE_NAMESIZE, KNI_RX_Q_MZ_NAME_FMT, kni->name);
+	kni->m_rx_q = rte_memzone_reserve(mz_name, KNI_FIFO_SIZE, SOCKET_ID_ANY, 0);
+	KNI_MEM_CHECK(kni->m_rx_q == NULL, rx_q_fail);
 
-	rte_spinlock_unlock(&kni_memzone_pool.mutex);
-}
+	snprintf(mz_name, RTE_MEMZONE_NAMESIZE, KNI_ALLOC_Q_MZ_NAME_FMT, kni->name);
+	kni->m_alloc_q = rte_memzone_reserve(mz_name, KNI_FIFO_SIZE, SOCKET_ID_ANY, 0);
+	KNI_MEM_CHECK(kni->m_alloc_q == NULL, alloc_q_fail);
 
+	snprintf(mz_name, RTE_MEMZONE_NAMESIZE, KNI_FREE_Q_MZ_NAME_FMT, kni->name);
+	kni->m_free_q = rte_memzone_reserve(mz_name, KNI_FIFO_SIZE, SOCKET_ID_ANY, 0);
+	KNI_MEM_CHECK(kni->m_free_q == NULL, free_q_fail);
 
-/* Shall be called before any allocation happens */
-void
-rte_kni_init(unsigned int max_kni_ifaces)
-{
-	uint32_t i;
-	struct rte_kni_memzone_slot *it;
-	const struct rte_memzone *mz;
-#define OBJNAMSIZ 32
-	char obj_name[OBJNAMSIZ];
-	char mz_name[RTE_MEMZONE_NAMESIZE];
-
-	/* Immediately return if KNI is already initialized */
-	if (kni_memzone_pool.initialized) {
-		RTE_LOG(WARNING, KNI, "Double call to rte_kni_init()");
-		return;
-	}
+	snprintf(mz_name, RTE_MEMZONE_NAMESIZE, KNI_REQ_Q_MZ_NAME_FMT, kni->name);
+	kni->m_req_q = rte_memzone_reserve(mz_name, KNI_FIFO_SIZE, SOCKET_ID_ANY, 0);
+	KNI_MEM_CHECK(kni->m_req_q == NULL, req_q_fail);
 
-	if (max_kni_ifaces == 0) {
-		RTE_LOG(ERR, KNI, "Invalid number of max_kni_ifaces %d\n",
-							max_kni_ifaces);
-		RTE_LOG(ERR, KNI, "Unable to initialize KNI\n");
-		return;
-	}
+	snprintf(mz_name, RTE_MEMZONE_NAMESIZE, KNI_RESP_Q_MZ_NAME_FMT, kni->name);
+	kni->m_resp_q = rte_memzone_reserve(mz_name, KNI_FIFO_SIZE, SOCKET_ID_ANY, 0);
+	KNI_MEM_CHECK(kni->m_resp_q == NULL, resp_q_fail);
 
-	/* Check FD and open */
-	if (kni_fd < 0) {
-		kni_fd = open("/dev/" KNI_DEVICE, O_RDWR);
-		if (kni_fd < 0) {
-			RTE_LOG(ERR, KNI,
-				"Can not open /dev/%s\n", KNI_DEVICE);
-			return;
-		}
-	}
+	snprintf(mz_name, RTE_MEMZONE_NAMESIZE, KNI_SYNC_ADDR_MZ_NAME_FMT, kni->name);
+	kni->m_sync_addr = rte_memzone_reserve(mz_name, KNI_FIFO_SIZE, SOCKET_ID_ANY, 0);
+	KNI_MEM_CHECK(kni->m_sync_addr == NULL, sync_addr_fail);
 
-	/* Allocate slot objects */
-	kni_memzone_pool.slots = (struct rte_kni_memzone_slot *)
-					rte_malloc(NULL,
-					sizeof(struct rte_kni_memzone_slot) *
-					max_kni_ifaces,
-					0);
-	KNI_MEM_CHECK(kni_memzone_pool.slots == NULL);
-
-	/* Initialize general pool variables */
-	kni_memzone_pool.initialized = 1;
-	kni_memzone_pool.max_ifaces = max_kni_ifaces;
-	kni_memzone_pool.free = &kni_memzone_pool.slots[0];
-	rte_spinlock_init(&kni_memzone_pool.mutex);
-
-	/* Pre-allocate all memzones of all the slots; panic on error */
-	for (i = 0; i < max_kni_ifaces; i++) {
-
-		/* Recover current slot */
-		it = &kni_memzone_pool.slots[i];
-		it->id = i;
-
-		/* Allocate KNI context */
-		snprintf(mz_name, RTE_MEMZONE_NAMESIZE, "KNI_INFO_%d", i);
-		mz = kni_memzone_reserve(mz_name, sizeof(struct rte_kni),
-					SOCKET_ID_ANY, 0);
-		KNI_MEM_CHECK(mz == NULL);
-		it->m_ctx = mz;
-
-		/* TX RING */
-		snprintf(obj_name, OBJNAMSIZ, "kni_tx_%d", i);
-		mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
-							SOCKET_ID_ANY, 0);
-		KNI_MEM_CHECK(mz == NULL);
-		it->m_tx_q = mz;
-
-		/* RX RING */
-		snprintf(obj_name, OBJNAMSIZ, "kni_rx_%d", i);
-		mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
-							SOCKET_ID_ANY, 0);
-		KNI_MEM_CHECK(mz == NULL);
-		it->m_rx_q = mz;
-
-		/* ALLOC RING */
-		snprintf(obj_name, OBJNAMSIZ, "kni_alloc_%d", i);
-		mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
-							SOCKET_ID_ANY, 0);
-		KNI_MEM_CHECK(mz == NULL);
-		it->m_alloc_q = mz;
-
-		/* FREE RING */
-		snprintf(obj_name, OBJNAMSIZ, "kni_free_%d", i);
-		mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
-							SOCKET_ID_ANY, 0);
-		KNI_MEM_CHECK(mz == NULL);
-		it->m_free_q = mz;
-
-		/* Request RING */
-		snprintf(obj_name, OBJNAMSIZ, "kni_req_%d", i);
-		mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
-							SOCKET_ID_ANY, 0);
-		KNI_MEM_CHECK(mz == NULL);
-		it->m_req_q = mz;
-
-		/* Response RING */
-		snprintf(obj_name, OBJNAMSIZ, "kni_resp_%d", i);
-		mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
-							SOCKET_ID_ANY, 0);
-		KNI_MEM_CHECK(mz == NULL);
-		it->m_resp_q = mz;
-
-		/* Req/Resp sync mem area */
-		snprintf(obj_name, OBJNAMSIZ, "kni_sync_%d", i);
-		mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
-							SOCKET_ID_ANY, 0);
-		KNI_MEM_CHECK(mz == NULL);
-		it->m_sync_addr = mz;
-
-		if ((i+1) == max_kni_ifaces) {
-			it->next = NULL;
-			kni_memzone_pool.free_tail = it;
-		} else
-			it->next = &kni_memzone_pool.slots[i+1];
-	}
-
-	return;
+	return 0;
 
-kni_fail:
-	RTE_LOG(ERR, KNI, "Unable to allocate memory for max_kni_ifaces:%d."
-		"Increase the amount of hugepages memory\n", max_kni_ifaces);
+sync_addr_fail:
+	rte_memzone_free(kni->m_resp_q);
+resp_q_fail:
+	rte_memzone_free(kni->m_req_q);
+req_q_fail:
+	rte_memzone_free(kni->m_free_q);
+free_q_fail:
+	rte_memzone_free(kni->m_alloc_q);
+alloc_q_fail:
+	rte_memzone_free(kni->m_rx_q);
+rx_q_fail:
+	rte_memzone_free(kni->m_tx_q);
+tx_q_fail:
+	return -1;
 }
 
+static void
+kni_release_mz(struct rte_kni *kni)
+{
+	rte_memzone_free(kni->m_tx_q);
+	rte_memzone_free(kni->m_rx_q);
+	rte_memzone_free(kni->m_alloc_q);
+	rte_memzone_free(kni->m_free_q);
+	rte_memzone_free(kni->m_req_q);
+	rte_memzone_free(kni->m_resp_q);
+	rte_memzone_free(kni->m_sync_addr);
+}
 
 struct rte_kni *
 rte_kni_alloc(struct rte_mempool *pktmbuf_pool,
@@ -294,41 +201,45 @@ rte_kni_alloc(struct rte_mempool *pktmbuf_pool,
 {
 	int ret;
 	struct rte_kni_device_info dev_info;
-	struct rte_kni *ctx;
-	char intf_name[RTE_KNI_NAMESIZE];
-	const struct rte_memzone *mz;
-	struct rte_kni_memzone_slot *slot = NULL;
+	struct rte_kni *kni;
+	struct rte_tailq_entry *te;
+	struct rte_kni_list *kni_list;
 
 	if (!pktmbuf_pool || !conf || !conf->name[0])
 		return NULL;
 
 	/* Check if KNI subsystem has been initialized */
-	if (kni_memzone_pool.initialized != 1) {
+	if (kni_fd < 0) {
 		RTE_LOG(ERR, KNI, "KNI subsystem has not been initialized. Invoke rte_kni_init() first\n");
 		return NULL;
 	}
 
-	/* Get an available slot from the pool */
-	slot = kni_memzone_pool_alloc();
-	if (!slot) {
-		RTE_LOG(ERR, KNI, "Cannot allocate more KNI interfaces; increase the number of max_kni_ifaces(current %d) or release unused ones.\n",
-			kni_memzone_pool.max_ifaces);
-		return NULL;
+	rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
+
+	kni = __rte_kni_get(conf->name);
+	if (kni != NULL) {
+		RTE_LOG(ERR, KNI, "KNI already exists\n");
+		goto unlock;
 	}
 
-	/* Recover ctx */
-	ctx = slot->m_ctx->addr;
-	snprintf(intf_name, RTE_KNI_NAMESIZE, "%s", conf->name);
+	te = rte_zmalloc("KNI_TAILQ_ENTRY", sizeof(*te), 0);
+	if (te == NULL) {
+		RTE_LOG(ERR, KNI, "Failed to allocate tailq entry\n");
+		goto unlock;
+	}
 
-	if (ctx->in_use) {
-		RTE_LOG(ERR, KNI, "KNI %s is in use\n", ctx->name);
-		return NULL;
+	kni = rte_zmalloc("KNI", sizeof(struct rte_kni), RTE_CACHE_LINE_SIZE);
+	if (kni == NULL) {
+		RTE_LOG(ERR, KNI, "KNI memory allocation failed\n");
+		goto kni_fail;
 	}
-	memset(ctx, 0, sizeof(struct rte_kni));
+
+	snprintf(kni->name, RTE_KNI_NAMESIZE, "%s", conf->name);
+
 	if (ops)
-		memcpy(&ctx->ops, ops, sizeof(struct rte_kni_ops));
+		memcpy(&kni->ops, ops, sizeof(struct rte_kni_ops));
 	else
-		ctx->ops.port_id = UINT16_MAX;
+		kni->ops.port_id = UINT16_MAX;
 
 	memset(&dev_info, 0, sizeof(dev_info));
 	dev_info.bus = conf->addr.bus;
@@ -344,72 +255,79 @@ rte_kni_alloc(struct rte_mempool *pktmbuf_pool,
 
 	memcpy(dev_info.mac_addr, conf->mac_addr, ETHER_ADDR_LEN);
 
-	snprintf(ctx->name, RTE_KNI_NAMESIZE, "%s", intf_name);
-	snprintf(dev_info.name, RTE_KNI_NAMESIZE, "%s", intf_name);
+	snprintf(dev_info.name, RTE_KNI_NAMESIZE, "%s", conf->name);
 
 	RTE_LOG(INFO, KNI, "pci: %02x:%02x:%02x \t %02x:%02x\n",
 		dev_info.bus, dev_info.devid, dev_info.function,
 			dev_info.vendor_id, dev_info.device_id);
+
+	ret = kni_reserve_mz(kni);
+	if (ret < 0)
+		goto mz_fail;
+
 	/* TX RING */
-	mz = slot->m_tx_q;
-	ctx->tx_q = mz->addr;
-	kni_fifo_init(ctx->tx_q, KNI_FIFO_COUNT_MAX);
-	dev_info.tx_phys = mz->phys_addr;
+	kni->tx_q = kni->m_tx_q->addr;
+	kni_fifo_init(kni->tx_q, KNI_FIFO_COUNT_MAX);
+	dev_info.tx_phys = kni->m_tx_q->phys_addr;
 
 	/* RX RING */
-	mz = slot->m_rx_q;
-	ctx->rx_q = mz->addr;
-	kni_fifo_init(ctx->rx_q, KNI_FIFO_COUNT_MAX);
-	dev_info.rx_phys = mz->phys_addr;
+	kni->rx_q = kni->m_rx_q->addr;
+	kni_fifo_init(kni->rx_q, KNI_FIFO_COUNT_MAX);
+	dev_info.rx_phys = kni->m_rx_q->phys_addr;
 
 	/* ALLOC RING */
-	mz = slot->m_alloc_q;
-	ctx->alloc_q = mz->addr;
-	kni_fifo_init(ctx->alloc_q, KNI_FIFO_COUNT_MAX);
-	dev_info.alloc_phys = mz->phys_addr;
+	kni->alloc_q = kni->m_alloc_q->addr;
+	kni_fifo_init(kni->alloc_q, KNI_FIFO_COUNT_MAX);
+	dev_info.alloc_phys = kni->m_alloc_q->phys_addr;
 
 	/* FREE RING */
-	mz = slot->m_free_q;
-	ctx->free_q = mz->addr;
-	kni_fifo_init(ctx->free_q, KNI_FIFO_COUNT_MAX);
-	dev_info.free_phys = mz->phys_addr;
+	kni->free_q = kni->m_free_q->addr;
+	kni_fifo_init(kni->free_q, KNI_FIFO_COUNT_MAX);
+	dev_info.free_phys = kni->m_free_q->phys_addr;
 
 	/* Request RING */
-	mz = slot->m_req_q;
-	ctx->req_q = mz->addr;
-	kni_fifo_init(ctx->req_q, KNI_FIFO_COUNT_MAX);
-	dev_info.req_phys = mz->phys_addr;
+	kni->req_q = kni->m_req_q->addr;
+	kni_fifo_init(kni->req_q, KNI_FIFO_COUNT_MAX);
+	dev_info.req_phys = kni->m_req_q->phys_addr;
 
 	/* Response RING */
-	mz = slot->m_resp_q;
-	ctx->resp_q = mz->addr;
-	kni_fifo_init(ctx->resp_q, KNI_FIFO_COUNT_MAX);
-	dev_info.resp_phys = mz->phys_addr;
+	kni->resp_q = kni->m_resp_q->addr;
+	kni_fifo_init(kni->resp_q, KNI_FIFO_COUNT_MAX);
+	dev_info.resp_phys = kni->m_resp_q->phys_addr;
 
 	/* Req/Resp sync mem area */
-	mz = slot->m_sync_addr;
-	ctx->sync_addr = mz->addr;
-	dev_info.sync_va = mz->addr;
-	dev_info.sync_phys = mz->phys_addr;
+	kni->sync_addr = kni->m_sync_addr->addr;
+	dev_info.sync_va = kni->m_sync_addr->addr;
+	dev_info.sync_phys = kni->m_sync_addr->phys_addr;
 
-	ctx->pktmbuf_pool = pktmbuf_pool;
-	ctx->group_id = conf->group_id;
-	ctx->slot_id = slot->id;
-	ctx->mbuf_size = conf->mbuf_size;
+	kni->pktmbuf_pool = pktmbuf_pool;
+	kni->group_id = conf->group_id;
+	kni->mbuf_size = conf->mbuf_size;
 
 	ret = ioctl(kni_fd, RTE_KNI_IOCTL_CREATE, &dev_info);
-	KNI_MEM_CHECK(ret < 0);
+	if (ret < 0)
+		goto ioctl_fail;
+
+	te->data = kni;
+
+	kni_list = RTE_TAILQ_CAST(rte_kni_tailq.head, rte_kni_list);
+	TAILQ_INSERT_TAIL(kni_list, te, next);
 
-	ctx->in_use = 1;
+	rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
 
 	/* Allocate mbufs and then put them into alloc_q */
-	kni_allocate_mbufs(ctx);
+	kni_allocate_mbufs(kni);
 
-	return ctx;
+	return kni;
 
+ioctl_fail:
+	kni_release_mz(kni);
+mz_fail:
+	rte_free(kni);
 kni_fail:
-	if (slot)
-		kni_memzone_pool_release(&kni_memzone_pool.slots[slot->id]);
+	rte_free(te);
+unlock:
+	rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
 
 	return NULL;
 }
@@ -462,19 +380,36 @@ kni_free_fifo_phy(struct rte_mempool *mp, struct rte_kni_fifo *fifo)
 int
 rte_kni_release(struct rte_kni *kni)
 {
+	struct rte_tailq_entry *te;
+	struct rte_kni_list *kni_list;
 	struct rte_kni_device_info dev_info;
-	uint32_t slot_id;
 	uint32_t retry = 5;
 
-	if (!kni || !kni->in_use)
+	if (!kni)
 		return -1;
 
+	kni_list = RTE_TAILQ_CAST(rte_kni_tailq.head, rte_kni_list);
+
+	rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
+
+	TAILQ_FOREACH(te, kni_list, next) {
+		if (te->data == kni)
+			break;
+	}
+
+	if (te == NULL)
+		goto unlock;
+
 	snprintf(dev_info.name, sizeof(dev_info.name), "%s", kni->name);
 	if (ioctl(kni_fd, RTE_KNI_IOCTL_RELEASE, &dev_info) < 0) {
 		RTE_LOG(ERR, KNI, "Fail to release kni device\n");
-		return -1;
+		goto unlock;
 	}
 
+	TAILQ_REMOVE(kni_list, te, next);
+
+	rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+
 	/* mbufs in all fifo should be released, except request/response */
 
 	/* wait until all rxq packets processed by kernel */
@@ -488,20 +423,18 @@ rte_kni_release(struct rte_kni *kni)
 	kni_free_fifo(kni->tx_q);
 	kni_free_fifo(kni->free_q);
 
-	slot_id = kni->slot_id;
+	kni_release_mz(kni);
 
-	/* Memset the KNI struct */
-	memset(kni, 0, sizeof(struct rte_kni));
+	rte_free(kni);
 
-	/* Release memzone */
-	if (slot_id > kni_memzone_pool.max_ifaces) {
-		RTE_LOG(ERR, KNI, "KNI pool: corrupted slot ID: %d, max: %d\n",
-			slot_id, kni_memzone_pool.max_ifaces);
-		return -1;
-	}
-	kni_memzone_pool_release(&kni_memzone_pool.slots[slot_id]);
+	rte_free(te);
 
 	return 0;
+
+unlock:
+	rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+
+	return -1;
 }
 
 /* default callback for request of configuring device mac address */
@@ -711,24 +644,18 @@ kni_allocate_mbufs(struct rte_kni *kni)
 struct rte_kni *
 rte_kni_get(const char *name)
 {
-	uint32_t i;
-	struct rte_kni_memzone_slot *it;
 	struct rte_kni *kni;
 
 	if (name == NULL || name[0] == '\0')
 		return NULL;
 
-	/* Note: could be improved perf-wise if necessary */
-	for (i = 0; i < kni_memzone_pool.max_ifaces; i++) {
-		it = &kni_memzone_pool.slots[i];
-		if (it->in_use == 0)
-			continue;
-		kni = it->m_ctx->addr;
-		if (strncmp(kni->name, name, RTE_KNI_NAMESIZE) == 0)
-			return kni;
-	}
+	rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK);
 
-	return NULL;
+	kni = __rte_kni_get(name);
+
+	rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK);
+
+	return kni;
 }
 
 const char *
@@ -790,6 +717,47 @@ rte_kni_unregister_handlers(struct rte_kni *kni)
 
 	return 0;
 }
+
+int __rte_experimental
+rte_kni_update_link(struct rte_kni *kni, unsigned int linkup)
+{
+	char path[64];
+	char old_carrier[2];
+	const char *new_carrier;
+	int old_linkup;
+	int fd, ret;
+
+	if (kni == NULL)
+		return -1;
+
+	snprintf(path, sizeof(path), "/sys/devices/virtual/net/%s/carrier",
+		kni->name);
+
+	fd = open(path, O_RDWR);
+	if (fd == -1) {
+		RTE_LOG(ERR, KNI, "Failed to open file: %s.\n", path);
+		return -1;
+	}
+
+	ret = read(fd, old_carrier, 2);
+	if (ret < 1) {
+		close(fd);
+		return -1;
+	}
+	old_linkup = (old_carrier[0] == '1');
+
+	new_carrier = linkup ? "1" : "0";
+	ret = write(fd, new_carrier, 1);
+	if (ret < 1) {
+		RTE_LOG(ERR, KNI, "Failed to write file: %s.\n", path);
+		close(fd);
+		return -1;
+	}
+
+	close(fd);
+	return old_linkup;
+}
+
 void
 rte_kni_close(void)
 {
diff --git a/lib/librte_kni/rte_kni.h b/lib/librte_kni/rte_kni.h
index 99055e2c..02ca43b4 100644
--- a/lib/librte_kni/rte_kni.h
+++ b/lib/librte_kni/rte_kni.h
@@ -81,8 +81,12 @@ struct rte_kni_conf {
  *
  * @param max_kni_ifaces
  *  The maximum number of KNI interfaces that can coexist concurrently
+ *
+ * @return
+ *  - 0 indicates success.
+ *  - negative value indicates failure.
  */
-void rte_kni_init(unsigned int max_kni_ifaces);
+int rte_kni_init(unsigned int max_kni_ifaces);
 
 
 /**
@@ -229,6 +233,26 @@ int rte_kni_register_handlers(struct rte_kni *kni, struct rte_kni_ops *ops);
 int rte_kni_unregister_handlers(struct rte_kni *kni);
 
 /**
+ * Update link carrier state for KNI port.
+ *
+ * Update the linkup/linkdown state of a KNI interface in the kernel.
+ *
+ * @param kni
+ *  pointer to struct rte_kni.
+ * @param linkup
+ *  New link state:
+ *  0 for linkdown.
+ *  > 0 for linkup.
+ *
+ * @return
+ *  On failure: -1
+ *  Previous link state == linkdown: 0
+ *  Previous link state == linkup: 1
+ */
+int __rte_experimental
+rte_kni_update_link(struct rte_kni *kni, unsigned int linkup);
+
+/**
  *  Close KNI device.
  */
 void rte_kni_close(void);
diff --git a/lib/librte_kni/rte_kni_fifo.h b/lib/librte_kni/rte_kni_fifo.h
index ac26a8c0..287d7deb 100644
--- a/lib/librte_kni/rte_kni_fifo.h
+++ b/lib/librte_kni/rte_kni_fifo.h
@@ -5,6 +5,36 @@
 
 
 /**
+ * @internal when c11 memory model enabled use c11 atomic memory barrier.
+ * when under non c11 memory model use rte_smp_* memory barrier.
+ *
+ * @param src
+ *   Pointer to the source data.
+ * @param dst
+ *   Pointer to the destination data.
+ * @param value
+ *   Data value.
+ */
+#ifdef RTE_USE_C11_MEM_MODEL
+#define __KNI_LOAD_ACQUIRE(src) ({                         \
+		__atomic_load_n((src), __ATOMIC_ACQUIRE);           \
+	})
+#define __KNI_STORE_RELEASE(dst, value) do {               \
+		__atomic_store_n((dst), value, __ATOMIC_RELEASE);   \
+	} while(0)
+#else
+#define __KNI_LOAD_ACQUIRE(src) ({                         \
+		typeof (*(src)) val = *(src);                       \
+		rte_smp_rmb();                                      \
+		val;                                                \
+	})
+#define __KNI_STORE_RELEASE(dst, value) do {               \
+		*(dst) = value;                                     \
+		rte_smp_wmb();                                      \
+	} while(0)
+#endif
+
+/**
  * Initializes the kni fifo structure
  */
 static void
@@ -28,8 +58,8 @@ kni_fifo_put(struct rte_kni_fifo *fifo, void **data, unsigned num)
 {
 	unsigned i = 0;
 	unsigned fifo_write = fifo->write;
-	unsigned fifo_read = fifo->read;
 	unsigned new_write = fifo_write;
+	unsigned fifo_read = __KNI_LOAD_ACQUIRE(&fifo->read);
 
 	for (i = 0; i < num; i++) {
 		new_write = (new_write + 1) & (fifo->len - 1);
@@ -39,7 +69,7 @@ kni_fifo_put(struct rte_kni_fifo *fifo, void **data, unsigned num)
 		fifo->buffer[fifo_write] = data[i];
 		fifo_write = new_write;
 	}
-	fifo->write = fifo_write;
+	__KNI_STORE_RELEASE(&fifo->write, fifo_write);
 	return i;
 }
 
@@ -51,7 +81,8 @@ kni_fifo_get(struct rte_kni_fifo *fifo, void **data, unsigned num)
 {
 	unsigned i = 0;
 	unsigned new_read = fifo->read;
-	unsigned fifo_write = fifo->write;
+	unsigned fifo_write = __KNI_LOAD_ACQUIRE(&fifo->write);
+
 	for (i = 0; i < num; i++) {
 		if (new_read == fifo_write)
 			break;
@@ -59,7 +90,7 @@ kni_fifo_get(struct rte_kni_fifo *fifo, void **data, unsigned num)
 		data[i] = fifo->buffer[new_read];
 		new_read = (new_read + 1) & (fifo->len - 1);
 	}
-	fifo->read = new_read;
+	__KNI_STORE_RELEASE(&fifo->read, new_read);
 	return i;
 }
 
@@ -69,5 +100,7 @@ kni_fifo_get(struct rte_kni_fifo *fifo, void **data, unsigned num)
 static inline uint32_t
 kni_fifo_count(struct rte_kni_fifo *fifo)
 {
-	return (fifo->len + fifo->write - fifo->read) & (fifo->len - 1);
+	unsigned fifo_write = __KNI_LOAD_ACQUIRE(&fifo->write);
+	unsigned fifo_read = __KNI_LOAD_ACQUIRE(&fifo->read);
+	return (fifo->len + fifo_write - fifo_read) & (fifo->len - 1);
 }
diff --git a/lib/librte_kni/rte_kni_version.map b/lib/librte_kni/rte_kni_version.map
index acd515eb..c877dc6a 100644
--- a/lib/librte_kni/rte_kni_version.map
+++ b/lib/librte_kni/rte_kni_version.map
@@ -15,3 +15,9 @@ DPDK_2.0 {
 
 	local: *;
 };
+
+EXPERIMENTAL {
+	global:
+
+	rte_kni_update_link;
+};
diff --git a/lib/librte_kvargs/rte_kvargs.c b/lib/librte_kvargs/rte_kvargs.c
index a28f7694..f7030c63 100644
--- a/lib/librte_kvargs/rte_kvargs.c
+++ b/lib/librte_kvargs/rte_kvargs.c
@@ -44,6 +44,20 @@ rte_kvargs_tokenize(struct rte_kvargs *kvlist, const char *params)
 		    kvlist->pairs[i].value == NULL)
 			return -1;
 
+		/* Detect list [a,b] to skip comma delimiter in list. */
+		str = kvlist->pairs[i].value;
+		if (str[0] == '[') {
+			/* Find the end of the list. */
+			while (str[strlen(str) - 1] != ']') {
+				/* Restore the comma erased by strtok_r(). */
+				str[strlen(str)] = ',';
+				/* Parse until next comma. */
+				str = strtok_r(NULL, RTE_KVARGS_PAIRS_DELIM, &ctx1);
+				if (str == NULL)
+					return -1; /* no closing bracket */
+			}
+		}
+
 		kvlist->count++;
 		str = NULL;
 	}
@@ -120,6 +134,9 @@ rte_kvargs_process(const struct rte_kvargs *kvlist,
 	const struct rte_kvargs_pair *pair;
 	unsigned i;
 
+	if (kvlist == NULL)
+		return 0;
+
 	for (i = 0; i < kvlist->count; i++) {
 		pair = &kvlist->pairs[i];
 		if (key_match == NULL || strcmp(pair->key, key_match) == 0) {
diff --git a/lib/librte_kvargs/rte_kvargs.h b/lib/librte_kvargs/rte_kvargs.h
index fc041956..1946195d 100644
--- a/lib/librte_kvargs/rte_kvargs.h
+++ b/lib/librte_kvargs/rte_kvargs.h
@@ -110,7 +110,7 @@ struct rte_kvargs *rte_kvargs_parse_delim(const char *args,
  * rte_kvargs_parse().
  *
  * @param kvlist
- *   The rte_kvargs structure
+ *   The rte_kvargs structure. No error if NULL.
  */
 void rte_kvargs_free(struct rte_kvargs *kvlist);
 
@@ -119,11 +119,10 @@ void rte_kvargs_free(struct rte_kvargs *kvlist);
  *
  * For each key/value association that matches the given key, calls the
  * handler function with the for a given arg_name passing the value on the
- * dictionary for that key and a given extra argument. If *kvlist* is NULL
- * function does nothing.
+ * dictionary for that key and a given extra argument.
  *
  * @param kvlist
- *   The rte_kvargs structure
+ *   The rte_kvargs structure. No error if NULL.
  * @param key_match
  *   The key on which the handler should be called, or NULL to process handler
  *   on all associations
diff --git a/lib/librte_latencystats/rte_latencystats.c b/lib/librte_latencystats/rte_latencystats.c
index 1fdec68e..5715549e 100644
--- a/lib/librte_latencystats/rte_latencystats.c
+++ b/lib/librte_latencystats/rte_latencystats.c
@@ -125,8 +125,11 @@ add_time_stamps(uint16_t pid __rte_unused,
 	for (i = 0; i < nb_pkts; i++) {
 		diff_tsc = now - prev_tsc;
 		timer_tsc += diff_tsc;
-		if (timer_tsc >= samp_intvl) {
+
+		if ((pkts[i]->ol_flags & PKT_RX_TIMESTAMP) == 0
+				&& (timer_tsc >= samp_intvl)) {
 			pkts[i]->timestamp = now;
+			pkts[i]->ol_flags |= PKT_RX_TIMESTAMP;
 			timer_tsc = 0;
 		}
 		prev_tsc = now;
@@ -156,7 +159,7 @@ calc_latency(uint16_t pid __rte_unused,
 
 	now = rte_rdtsc();
 	for (i = 0; i < nb_pkts; i++) {
-		if (pkts[i]->timestamp)
+		if (pkts[i]->ol_flags & PKT_RX_TIMESTAMP)
 			latency[cnt++] = now - pkts[i]->timestamp;
 	}
 
diff --git a/lib/librte_lpm/Makefile b/lib/librte_lpm/Makefile
index 482bd72e..a7946a1c 100644
--- a/lib/librte_lpm/Makefile
+++ b/lib/librte_lpm/Makefile
@@ -8,7 +8,7 @@ LIB = librte_lpm.a
 
 CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
-LDLIBS += -lrte_eal
+LDLIBS += -lrte_eal -lrte_hash
 
 EXPORT_MAP := rte_lpm_version.map
 
diff --git a/lib/librte_lpm/meson.build b/lib/librte_lpm/meson.build
index 06784942..a5176d8a 100644
--- a/lib/librte_lpm/meson.build
+++ b/lib/librte_lpm/meson.build
@@ -7,3 +7,4 @@ headers = files('rte_lpm.h', 'rte_lpm6.h')
 # since header files have different names, we can install all vector headers
 # without worrying about which architecture we actually need
 headers += files('rte_lpm_altivec.h', 'rte_lpm_neon.h', 'rte_lpm_sse.h')
+deps += ['hash']
diff --git a/lib/librte_lpm/rte_lpm6.c b/lib/librte_lpm/rte_lpm6.c
index 149677eb..6212003f 100644
--- a/lib/librte_lpm/rte_lpm6.c
+++ b/lib/librte_lpm/rte_lpm6.c
@@ -21,6 +21,9 @@
 #include <rte_errno.h>
 #include <rte_rwlock.h>
 #include <rte_spinlock.h>
+#include <rte_hash.h>
+#include <assert.h>
+#include <rte_jhash.h>
 
 #include "rte_lpm6.h"
 
@@ -37,6 +40,9 @@
 #define BYTE_SIZE                                 8
 #define BYTES2_SIZE                              16
 
+#define RULE_HASH_TABLE_EXTRA_SPACE              64
+#define TBL24_IND                        UINT32_MAX
+
 #define lpm6_tbl8_gindex next_hop
 
 /** Flags for setting an entry as valid/invalid. */
@@ -70,6 +76,23 @@ struct rte_lpm6_rule {
 	uint8_t depth; /**< Rule depth. */
 };
 
+/** Rules tbl entry key. */
+struct rte_lpm6_rule_key {
+	uint8_t ip[RTE_LPM6_IPV6_ADDR_SIZE]; /**< Rule IP address. */
+	uint8_t depth; /**< Rule depth. */
+};
+
+/* Header of tbl8 */
+struct rte_lpm_tbl8_hdr {
+	uint32_t owner_tbl_ind; /**< owner table: TBL24_IND if owner is tbl24,
+				  *  otherwise index of tbl8
+				  */
+	uint32_t owner_entry_ind; /**< index of the owner table entry where
+				    *  pointer to the tbl8 is stored
+				    */
+	uint32_t ref_cnt; /**< table reference counter */
+};
+
 /** LPM6 structure. */
 struct rte_lpm6 {
 	/* LPM metadata. */
@@ -77,12 +100,17 @@ struct rte_lpm6 {
 	uint32_t max_rules;              /**< Max number of rules. */
 	uint32_t used_rules;             /**< Used rules so far. */
 	uint32_t number_tbl8s;           /**< Number of tbl8s to allocate. */
-	uint32_t next_tbl8;              /**< Next tbl8 to be used. */
 
 	/* LPM Tables. */
-	struct rte_lpm6_rule *rules_tbl; /**< LPM rules. */
+	struct rte_hash *rules_tbl; /**< LPM rules. */
 	struct rte_lpm6_tbl_entry tbl24[RTE_LPM6_TBL24_NUM_ENTRIES]
 			__rte_cache_aligned; /**< LPM tbl24 table. */
+
+	uint32_t *tbl8_pool; /**< pool of indexes of free tbl8s */
+	uint32_t tbl8_pool_pos; /**< current position in the tbl8 pool */
+
+	struct rte_lpm_tbl8_hdr *tbl8_hdrs; /* array of tbl8 headers */
+
 	struct rte_lpm6_tbl_entry tbl8[0]
 			__rte_cache_aligned; /**< LPM tbl8 table. */
 };
@@ -93,22 +121,122 @@ struct rte_lpm6 {
  * and set the rest to 0.
  */
 static inline void
-mask_ip(uint8_t *ip, uint8_t depth)
+ip6_mask_addr(uint8_t *ip, uint8_t depth)
 {
-        int16_t part_depth, mask;
-        int i;
+	int16_t part_depth, mask;
+	int i;
 
-		part_depth = depth;
+	part_depth = depth;
 
-		for (i = 0; i < RTE_LPM6_IPV6_ADDR_SIZE; i++) {
-			if (part_depth < BYTE_SIZE && part_depth >= 0) {
-				mask = (uint16_t)(~(UINT8_MAX >> part_depth));
-				ip[i] = (uint8_t)(ip[i] & mask);
-			} else if (part_depth < 0) {
-				ip[i] = 0;
-			}
-			part_depth -= BYTE_SIZE;
-		}
+	for (i = 0; i < RTE_LPM6_IPV6_ADDR_SIZE; i++) {
+		if (part_depth < BYTE_SIZE && part_depth >= 0) {
+			mask = (uint16_t)(~(UINT8_MAX >> part_depth));
+			ip[i] = (uint8_t)(ip[i] & mask);
+		} else if (part_depth < 0)
+			ip[i] = 0;
+
+		part_depth -= BYTE_SIZE;
+	}
+}
+
+/* copy ipv6 address */
+static inline void
+ip6_copy_addr(uint8_t *dst, const uint8_t *src)
+{
+	rte_memcpy(dst, src, RTE_LPM6_IPV6_ADDR_SIZE);
+}
+
+/*
+ * LPM6 rule hash function
+ *
+ * It's used as a hash function for the rte_hash
+ *	containing rules
+ */
+static inline uint32_t
+rule_hash(const void *data, __rte_unused uint32_t data_len,
+		  uint32_t init_val)
+{
+	return rte_jhash(data, sizeof(struct rte_lpm6_rule_key), init_val);
+}
+
+/*
+ * Init pool of free tbl8 indexes
+ */
+static void
+tbl8_pool_init(struct rte_lpm6 *lpm)
+{
+	uint32_t i;
+
+	/* put entire range of indexes to the tbl8 pool */
+	for (i = 0; i < lpm->number_tbl8s; i++)
+		lpm->tbl8_pool[i] = i;
+
+	lpm->tbl8_pool_pos = 0;
+}
+
+/*
+ * Get an index of a free tbl8 from the pool
+ */
+static inline uint32_t
+tbl8_get(struct rte_lpm6 *lpm, uint32_t *tbl8_ind)
+{
+	if (lpm->tbl8_pool_pos == lpm->number_tbl8s)
+		/* no more free tbl8 */
+		return -ENOSPC;
+
+	/* next index */
+	*tbl8_ind = lpm->tbl8_pool[lpm->tbl8_pool_pos++];
+	return 0;
+}
+
+/*
+ * Put an index of a free tbl8 back to the pool
+ */
+static inline uint32_t
+tbl8_put(struct rte_lpm6 *lpm, uint32_t tbl8_ind)
+{
+	if (lpm->tbl8_pool_pos == 0)
+		/* pool is full */
+		return -ENOSPC;
+
+	lpm->tbl8_pool[--lpm->tbl8_pool_pos] = tbl8_ind;
+	return 0;
+}
+
+/*
+ * Returns number of tbl8s available in the pool
+ */
+static inline uint32_t
+tbl8_available(struct rte_lpm6 *lpm)
+{
+	return lpm->number_tbl8s - lpm->tbl8_pool_pos;
+}
+
+/*
+ * Init a rule key.
+ *	  note that ip must be already masked
+ */
+static inline void
+rule_key_init(struct rte_lpm6_rule_key *key, uint8_t *ip, uint8_t depth)
+{
+	ip6_copy_addr(key->ip, ip);
+	key->depth = depth;
+}
+
+/*
+ * Rebuild the entire LPM tree by reinserting all rules
+ */
+static void
+rebuild_lpm(struct rte_lpm6 *lpm)
+{
+	uint64_t next_hop;
+	struct rte_lpm6_rule_key *rule_key;
+	uint32_t iter = 0;
+
+	while (rte_hash_iterate(lpm->rules_tbl, (void *) &rule_key,
+			(void **) &next_hop, &iter) >= 0)
+		rte_lpm6_add(lpm, rule_key->ip, rule_key->depth,
+			(uint32_t) next_hop);
 }
 
 /*
@@ -121,8 +249,11 @@ rte_lpm6_create(const char *name, int socket_id,
 	char mem_name[RTE_LPM6_NAMESIZE];
 	struct rte_lpm6 *lpm = NULL;
 	struct rte_tailq_entry *te;
-	uint64_t mem_size, rules_size;
+	uint64_t mem_size;
 	struct rte_lpm6_list *lpm_list;
+	struct rte_hash *rules_tbl = NULL;
+	uint32_t *tbl8_pool = NULL;
+	struct rte_lpm_tbl8_hdr *tbl8_hdrs = NULL;
 
 	lpm_list = RTE_TAILQ_CAST(rte_lpm6_tailq.head, rte_lpm6_list);
 
@@ -136,12 +267,54 @@ rte_lpm6_create(const char *name, int socket_id,
 		return NULL;
 	}
 
+	/* create rules hash table */
+	snprintf(mem_name, sizeof(mem_name), "LRH_%s", name);
+	struct rte_hash_parameters rule_hash_tbl_params = {
+		.entries = config->max_rules * 1.2 +
+			RULE_HASH_TABLE_EXTRA_SPACE,
+		.key_len = sizeof(struct rte_lpm6_rule_key),
+		.hash_func = rule_hash,
+		.hash_func_init_val = 0,
+		.name = mem_name,
+		.reserved = 0,
+		.socket_id = socket_id,
+		.extra_flag = 0
+	};
+
+	rules_tbl = rte_hash_create(&rule_hash_tbl_params);
+	if (rules_tbl == NULL) {
+		RTE_LOG(ERR, LPM, "LPM rules hash table allocation failed: %s (%d)",
+				  rte_strerror(rte_errno), rte_errno);
+		goto fail_wo_unlock;
+	}
+
+	/* allocate tbl8 indexes pool */
+	tbl8_pool = rte_malloc(NULL,
+			sizeof(uint32_t) * config->number_tbl8s,
+			RTE_CACHE_LINE_SIZE);
+	if (tbl8_pool == NULL) {
+		RTE_LOG(ERR, LPM, "LPM tbl8 pool allocation failed: %s (%d)",
+				  rte_strerror(rte_errno), rte_errno);
+		rte_errno = ENOMEM;
+		goto fail_wo_unlock;
+	}
+
+	/* allocate tbl8 headers */
+	tbl8_hdrs = rte_malloc(NULL,
+			sizeof(struct rte_lpm_tbl8_hdr) * config->number_tbl8s,
+			RTE_CACHE_LINE_SIZE);
+	if (tbl8_hdrs == NULL) {
+		RTE_LOG(ERR, LPM, "LPM tbl8 headers allocation failed: %s (%d)",
+				  rte_strerror(rte_errno), rte_errno);
+		rte_errno = ENOMEM;
+		goto fail_wo_unlock;
+	}
+
 	snprintf(mem_name, sizeof(mem_name), "LPM_%s", name);
 
 	/* Determine the amount of memory to allocate. */
 	mem_size = sizeof(*lpm) + (sizeof(lpm->tbl8[0]) *
 			RTE_LPM6_TBL8_GROUP_NUM_ENTRIES * config->number_tbl8s);
-	rules_size = sizeof(struct rte_lpm6_rule) * config->max_rules;
 
 	rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
 
@@ -154,7 +327,7 @@ rte_lpm6_create(const char *name, int socket_id,
 	lpm = NULL;
 	if (te != NULL) {
 		rte_errno = EEXIST;
-		goto exit;
+		goto fail;
 	}
 
 	/* allocate tailq entry */
@@ -162,7 +335,7 @@ rte_lpm6_create(const char *name, int socket_id,
 	if (te == NULL) {
 		RTE_LOG(ERR, LPM, "Failed to allocate tailq entry!\n");
 		rte_errno = ENOMEM;
-		goto exit;
+		goto fail;
 	}
 
 	/* Allocate memory to store the LPM data structures. */
@@ -173,34 +346,35 @@ rte_lpm6_create(const char *name, int socket_id,
 		RTE_LOG(ERR, LPM, "LPM memory allocation failed\n");
 		rte_free(te);
 		rte_errno = ENOMEM;
-		goto exit;
-	}
-
-	lpm->rules_tbl = rte_zmalloc_socket(NULL,
-			(size_t)rules_size, RTE_CACHE_LINE_SIZE, socket_id);
-
-	if (lpm->rules_tbl == NULL) {
-		RTE_LOG(ERR, LPM, "LPM rules_tbl allocation failed\n");
-		rte_free(lpm);
-		lpm = NULL;
-		rte_free(te);
-		rte_errno = ENOMEM;
-		goto exit;
+		goto fail;
 	}
 
 	/* Save user arguments. */
 	lpm->max_rules = config->max_rules;
 	lpm->number_tbl8s = config->number_tbl8s;
 	snprintf(lpm->name, sizeof(lpm->name), "%s", name);
+	lpm->rules_tbl = rules_tbl;
+	lpm->tbl8_pool = tbl8_pool;
+	lpm->tbl8_hdrs = tbl8_hdrs;
+
+	/* init the stack */
+	tbl8_pool_init(lpm);
 
 	te->data = (void *) lpm;
 
 	TAILQ_INSERT_TAIL(lpm_list, te, next);
+	rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+	return lpm;
 
-exit:
+fail:
 	rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
 
-	return lpm;
+fail_wo_unlock:
+	rte_free(tbl8_hdrs);
+	rte_free(tbl8_pool);
+	rte_hash_free(rules_tbl);
+
+	return NULL;
 }
 
 /*
@@ -259,50 +433,88 @@ rte_lpm6_free(struct rte_lpm6 *lpm)
 
 	rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
 
-	rte_free(lpm->rules_tbl);
+	rte_free(lpm->tbl8_hdrs);
+	rte_free(lpm->tbl8_pool);
+	rte_hash_free(lpm->rules_tbl);
 	rte_free(lpm);
 	rte_free(te);
 }
 
+/* Find a rule */
+static inline int
+rule_find_with_key(struct rte_lpm6 *lpm,
+		  const struct rte_lpm6_rule_key *rule_key,
+		  uint32_t *next_hop)
+{
+	uint64_t hash_val;
+	int ret;
+
+	/* lookup for a rule */
+	ret = rte_hash_lookup_data(lpm->rules_tbl, (const void *) rule_key,
+		(void **) &hash_val);
+	if (ret >= 0) {
+		*next_hop = (uint32_t) hash_val;
+		return 1;
+	}
+
+	return 0;
+}
+
+/* Find a rule */
+static int
+rule_find(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth,
+		  uint32_t *next_hop)
+{
+	struct rte_lpm6_rule_key rule_key;
+
+	/* init a rule key */
+	rule_key_init(&rule_key, ip, depth);
+
+	return rule_find_with_key(lpm, &rule_key, next_hop);
+}
+
 /*
  * Checks if a rule already exists in the rules table and updates
  * the nexthop if so. Otherwise it adds a new rule if enough space is available.
+ *
+ * Returns:
+ *    0 - next hop of existed rule is updated
+ *    1 - new rule successfully added
+ *   <0 - error
  */
-static inline int32_t
-rule_add(struct rte_lpm6 *lpm, uint8_t *ip, uint32_t next_hop, uint8_t depth)
+static inline int
+rule_add(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth, uint32_t next_hop)
 {
-	uint32_t rule_index;
-
-	/* Scan through rule list to see if rule already exists. */
-	for (rule_index = 0; rule_index < lpm->used_rules; rule_index++) {
+	int ret, rule_exist;
+	struct rte_lpm6_rule_key rule_key;
+	uint32_t unused;
 
-		/* If rule already exists update its next_hop and return. */
-		if ((memcmp (lpm->rules_tbl[rule_index].ip, ip,
-				RTE_LPM6_IPV6_ADDR_SIZE) == 0) &&
-				lpm->rules_tbl[rule_index].depth == depth) {
-			lpm->rules_tbl[rule_index].next_hop = next_hop;
+	/* init a rule key */
+	rule_key_init(&rule_key, ip, depth);
 
-			return rule_index;
-		}
-	}
+	/* Scan through rule list to see if rule already exists. */
+	rule_exist = rule_find_with_key(lpm, &rule_key, &unused);
 
 	/*
 	 * If rule does not exist check if there is space to add a new rule to
 	 * this rule group. If there is no space return error.
 	 */
-	if (lpm->used_rules == lpm->max_rules) {
+	if (!rule_exist && lpm->used_rules == lpm->max_rules)
 		return -ENOSPC;
-	}
 
-	/* If there is space for the new rule add it. */
-	rte_memcpy(lpm->rules_tbl[rule_index].ip, ip, RTE_LPM6_IPV6_ADDR_SIZE);
-	lpm->rules_tbl[rule_index].next_hop = next_hop;
-	lpm->rules_tbl[rule_index].depth = depth;
+	/* add the rule or update rules next hop */
+	ret = rte_hash_add_key_data(lpm->rules_tbl, &rule_key,
+		(void *)(uintptr_t) next_hop);
+	if (ret < 0)
+		return ret;
 
 	/* Increment the used rules counter for this rule group. */
-	lpm->used_rules++;
+	if (!rule_exist) {
+		lpm->used_rules++;
+		return 1;
+	}
 
-	return rule_index;
+	return 0;
 }
 
 /*
@@ -311,24 +523,24 @@ rule_add(struct rte_lpm6 *lpm, uint8_t *ip, uint32_t next_hop, uint8_t depth)
  * in the IP address returns a match.
  */
 static void
-expand_rule(struct rte_lpm6 *lpm, uint32_t tbl8_gindex, uint8_t depth,
-		uint32_t next_hop)
+expand_rule(struct rte_lpm6 *lpm, uint32_t tbl8_gindex, uint8_t old_depth,
+		uint8_t new_depth, uint32_t next_hop, uint8_t valid)
 {
 	uint32_t tbl8_group_end, tbl8_gindex_next, j;
 
 	tbl8_group_end = tbl8_gindex + RTE_LPM6_TBL8_GROUP_NUM_ENTRIES;
 
 	struct rte_lpm6_tbl_entry new_tbl8_entry = {
-		.valid = VALID,
-		.valid_group = VALID,
-		.depth = depth,
+		.valid = valid,
+		.valid_group = valid,
+		.depth = new_depth,
 		.next_hop = next_hop,
 		.ext_entry = 0,
 	};
 
 	for (j = tbl8_gindex; j < tbl8_group_end; j++) {
 		if (!lpm->tbl8[j].valid || (lpm->tbl8[j].ext_entry == 0
-				&& lpm->tbl8[j].depth <= depth)) {
+				&& lpm->tbl8[j].depth <= old_depth)) {
 
 			lpm->tbl8[j] = new_tbl8_entry;
 
@@ -336,37 +548,123 @@ expand_rule(struct rte_lpm6 *lpm, uint32_t tbl8_gindex, uint8_t depth,
 
 			tbl8_gindex_next = lpm->tbl8[j].lpm6_tbl8_gindex
 					* RTE_LPM6_TBL8_GROUP_NUM_ENTRIES;
-			expand_rule(lpm, tbl8_gindex_next, depth, next_hop);
+			expand_rule(lpm, tbl8_gindex_next, old_depth, new_depth,
+					next_hop, valid);
 		}
 	}
 }
 
 /*
+ * Init a tbl8 header
+ */
+static inline void
+init_tbl8_header(struct rte_lpm6 *lpm, uint32_t tbl_ind,
+		uint32_t owner_tbl_ind, uint32_t owner_entry_ind)
+{
+	struct rte_lpm_tbl8_hdr *tbl_hdr = &lpm->tbl8_hdrs[tbl_ind];
+	tbl_hdr->owner_tbl_ind = owner_tbl_ind;
+	tbl_hdr->owner_entry_ind = owner_entry_ind;
+	tbl_hdr->ref_cnt = 0;
+}
+
+/*
+ * Calculate index to the table based on the number and position
+ * of the bytes being inspected in this step.
+ */
+static uint32_t
+get_bitshift(const uint8_t *ip, uint8_t first_byte, uint8_t bytes)
+{
+	uint32_t entry_ind, i;
+	int8_t bitshift;
+
+	entry_ind = 0;
+	for (i = first_byte; i < (uint32_t)(first_byte + bytes); i++) {
+		bitshift = (int8_t)((bytes - i)*BYTE_SIZE);
+
+		if (bitshift < 0)
+			bitshift = 0;
+		entry_ind = entry_ind | ip[i-1] << bitshift;
+	}
+
+	return entry_ind;
+}
+
+/*
+ * Simulate adding a new route to the LPM counting number
+ * of new tables that will be needed
+ *
+ * It returns 0 on success, or 1 if
+ * the process needs to be continued by calling the function again.
+ */
+static inline int
+simulate_add_step(struct rte_lpm6 *lpm, struct rte_lpm6_tbl_entry *tbl,
+		struct rte_lpm6_tbl_entry **next_tbl, const uint8_t *ip,
+		uint8_t bytes, uint8_t first_byte, uint8_t depth,
+		uint32_t *need_tbl_nb)
+{
+	uint32_t entry_ind;
+	uint8_t bits_covered;
+	uint32_t next_tbl_ind;
+
+	/*
+	 * Calculate index to the table based on the number and position
+	 * of the bytes being inspected in this step.
+	 */
+	entry_ind = get_bitshift(ip, first_byte, bytes);
+
+	/* Number of bits covered in this step */
+	bits_covered = (uint8_t)((bytes+first_byte-1)*BYTE_SIZE);
+
+	if (depth <= bits_covered) {
+		*need_tbl_nb = 0;
+		return 0;
+	}
+
+	if (tbl[entry_ind].valid == 0 || tbl[entry_ind].ext_entry == 0) {
+		/* from this point on a new table is needed on each level
+		 * that is not covered yet
+		 */
+		depth -= bits_covered;
+		uint32_t cnt = depth >> 3; /* depth / BYTE_SIZE */
+		if (depth & 7) /* 0b00000111 */
+			/* if depth % 8 > 0 then one more table is needed
+			 * for those last bits
+			 */
+			cnt++;
+
+		*need_tbl_nb = cnt;
+		return 0;
+	}
+
+	next_tbl_ind = tbl[entry_ind].lpm6_tbl8_gindex;
+	*next_tbl = &(lpm->tbl8[next_tbl_ind *
+		RTE_LPM6_TBL8_GROUP_NUM_ENTRIES]);
+	*need_tbl_nb = 0;
+	return 1;
+}
+
+/*
  * Partially adds a new route to the data structure (tbl24+tbl8s).
  * It returns 0 on success, a negative number on failure, or 1 if
  * the process needs to be continued by calling the function again.
  */
 static inline int
 add_step(struct rte_lpm6 *lpm, struct rte_lpm6_tbl_entry *tbl,
-		struct rte_lpm6_tbl_entry **tbl_next, uint8_t *ip, uint8_t bytes,
-		uint8_t first_byte, uint8_t depth, uint32_t next_hop)
+		uint32_t tbl_ind, struct rte_lpm6_tbl_entry **next_tbl,
+		uint32_t *next_tbl_ind, uint8_t *ip, uint8_t bytes,
+		uint8_t first_byte, uint8_t depth, uint32_t next_hop,
+		uint8_t is_new_rule)
 {
-	uint32_t tbl_index, tbl_range, tbl8_group_start, tbl8_group_end, i;
-	int32_t tbl8_gindex;
-	int8_t bitshift;
+	uint32_t entry_ind, tbl_range, tbl8_group_start, tbl8_group_end, i;
+	uint32_t tbl8_gindex;
 	uint8_t bits_covered;
+	int ret;
 
 	/*
 	 * Calculate index to the table based on the number and position
 	 * of the bytes being inspected in this step.
 	 */
-	tbl_index = 0;
-	for (i = first_byte; i < (uint32_t)(first_byte + bytes); i++) {
-		bitshift = (int8_t)((bytes - i)*BYTE_SIZE);
-
-		if (bitshift < 0) bitshift = 0;
-		tbl_index = tbl_index | ip[i-1] << bitshift;
-	}
+	entry_ind = get_bitshift(ip, first_byte, bytes);
 
 	/* Number of bits covered in this step */
 	bits_covered = (uint8_t)((bytes+first_byte-1)*BYTE_SIZE);
@@ -378,7 +676,7 @@ add_step(struct rte_lpm6 *lpm, struct rte_lpm6_tbl_entry *tbl,
 	if (depth <= bits_covered) {
 		tbl_range = 1 << (bits_covered - depth);
 
-		for (i = tbl_index; i < (tbl_index + tbl_range); i++) {
+		for (i = entry_ind; i < (entry_ind + tbl_range); i++) {
 			if (!tbl[i].valid || (tbl[i].ext_entry == 0 &&
 					tbl[i].depth <= depth)) {
 
@@ -400,10 +698,15 @@ add_step(struct rte_lpm6 *lpm, struct rte_lpm6_tbl_entry *tbl,
 				 */
 				tbl8_gindex = tbl[i].lpm6_tbl8_gindex *
 						RTE_LPM6_TBL8_GROUP_NUM_ENTRIES;
-				expand_rule(lpm, tbl8_gindex, depth, next_hop);
+				expand_rule(lpm, tbl8_gindex, depth, depth,
+						next_hop, VALID);
 			}
 		}
 
+		/* update tbl8 rule reference counter */
+		if (tbl_ind != TBL24_IND && is_new_rule)
+			lpm->tbl8_hdrs[tbl_ind].ref_cnt++;
+
 		return 0;
 	}
 	/*
@@ -412,12 +715,24 @@ add_step(struct rte_lpm6 *lpm, struct rte_lpm6_tbl_entry *tbl,
 	 */
 	else {
 		/* If it's invalid a new tbl8 is needed */
-		if (!tbl[tbl_index].valid) {
-			if (lpm->next_tbl8 < lpm->number_tbl8s)
-				tbl8_gindex = (lpm->next_tbl8)++;
-			else
+		if (!tbl[entry_ind].valid) {
+			/* get a new table */
+			ret = tbl8_get(lpm, &tbl8_gindex);
+			if (ret != 0)
 				return -ENOSPC;
 
+			/* invalidate all new tbl8 entries */
+			tbl8_group_start = tbl8_gindex *
+					RTE_LPM6_TBL8_GROUP_NUM_ENTRIES;
+			memset(&lpm->tbl8[tbl8_group_start], 0,
+					  RTE_LPM6_TBL8_GROUP_NUM_ENTRIES);
+
+			/* init the new table's header:
+			 *   save the reference to the owner table
+			 */
+			init_tbl8_header(lpm, tbl8_gindex, tbl_ind, entry_ind);
+
+			/* reference to a new tbl8 */
 			struct rte_lpm6_tbl_entry new_tbl_entry = {
 				.lpm6_tbl8_gindex = tbl8_gindex,
 				.depth = 0,
@@ -426,17 +741,20 @@ add_step(struct rte_lpm6 *lpm, struct rte_lpm6_tbl_entry *tbl,
 				.ext_entry = 1,
 			};
 
-			tbl[tbl_index] = new_tbl_entry;
+			tbl[entry_ind] = new_tbl_entry;
+
+			/* update the current table's reference counter */
+			if (tbl_ind != TBL24_IND)
+				lpm->tbl8_hdrs[tbl_ind].ref_cnt++;
 		}
 		/*
-		 * If it's valid but not extended the rule that was stored *
+		 * If it's valid but not extended the rule that was stored
 		 * here needs to be moved to the next table.
 		 */
-		else if (tbl[tbl_index].ext_entry == 0) {
-			/* Search for free tbl8 group. */
-			if (lpm->next_tbl8 < lpm->number_tbl8s)
-				tbl8_gindex = (lpm->next_tbl8)++;
-			else
+		else if (tbl[entry_ind].ext_entry == 0) {
+			/* get a new tbl8 index */
+			ret = tbl8_get(lpm, &tbl8_gindex);
+			if (ret != 0)
 				return -ENOSPC;
 
 			tbl8_group_start = tbl8_gindex *
@@ -444,13 +762,22 @@ add_step(struct rte_lpm6 *lpm, struct rte_lpm6_tbl_entry *tbl,
 			tbl8_group_end = tbl8_group_start +
 					RTE_LPM6_TBL8_GROUP_NUM_ENTRIES;
 
+			struct rte_lpm6_tbl_entry tbl_entry = {
+				.next_hop = tbl[entry_ind].next_hop,
+				.depth = tbl[entry_ind].depth,
+				.valid = VALID,
+				.valid_group = VALID,
+				.ext_entry = 0
+			};
+
 			/* Populate new tbl8 with tbl value. */
-			for (i = tbl8_group_start; i < tbl8_group_end; i++) {
-				lpm->tbl8[i].valid = VALID;
-				lpm->tbl8[i].depth = tbl[tbl_index].depth;
-				lpm->tbl8[i].next_hop = tbl[tbl_index].next_hop;
-				lpm->tbl8[i].ext_entry = 0;
-			}
+			for (i = tbl8_group_start; i < tbl8_group_end; i++)
+				lpm->tbl8[i] = tbl_entry;
+
+			/* init the new table's header:
+			 *   save the reference to the owner table
+			 */
+			init_tbl8_header(lpm, tbl8_gindex, tbl_ind, entry_ind);
 
 			/*
 			 * Update tbl entry to point to new tbl8 entry. Note: The
@@ -465,11 +792,16 @@ add_step(struct rte_lpm6 *lpm, struct rte_lpm6_tbl_entry *tbl,
 				.ext_entry = 1,
 			};
 
-			tbl[tbl_index] = new_tbl_entry;
+			tbl[entry_ind] = new_tbl_entry;
+
+			/* update the current table's reference counter */
+			if (tbl_ind != TBL24_IND)
+				lpm->tbl8_hdrs[tbl_ind].ref_cnt++;
 		}
 
-		*tbl_next = &(lpm->tbl8[tbl[tbl_index].lpm6_tbl8_gindex *
-				RTE_LPM6_TBL8_GROUP_NUM_ENTRIES]);
+		*next_tbl_ind = tbl[entry_ind].lpm6_tbl8_gindex;
+		*next_tbl = &(lpm->tbl8[*next_tbl_ind *
+				  RTE_LPM6_TBL8_GROUP_NUM_ENTRIES]);
 	}
 
 	return 1;
@@ -486,13 +818,56 @@ rte_lpm6_add_v20(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth,
 }
 VERSION_SYMBOL(rte_lpm6_add, _v20, 2.0);
 
+
+/*
+ * Simulate adding a route to LPM
+ *
+ *	Returns:
+ *    0 on success
+ *    -ENOSPC not enought tbl8 left
+ */
+static int
+simulate_add(struct rte_lpm6 *lpm, const uint8_t *masked_ip, uint8_t depth)
+{
+	struct rte_lpm6_tbl_entry *tbl;
+	struct rte_lpm6_tbl_entry *tbl_next = NULL;
+	int ret, i;
+
+	/* number of new tables needed for a step */
+	uint32_t need_tbl_nb;
+	/* total number of new tables needed */
+	uint32_t total_need_tbl_nb;
+
+	/* Inspect the first three bytes through tbl24 on the first step. */
+	ret = simulate_add_step(lpm, lpm->tbl24, &tbl_next, masked_ip,
+			ADD_FIRST_BYTE, 1, depth, &need_tbl_nb);
+	total_need_tbl_nb = need_tbl_nb;
+	/*
+	 * Inspect one by one the rest of the bytes until
+	 * the process is completed.
+	 */
+	for (i = ADD_FIRST_BYTE; i < RTE_LPM6_IPV6_ADDR_SIZE && ret == 1; i++) {
+		tbl = tbl_next;
+		ret = simulate_add_step(lpm, tbl, &tbl_next, masked_ip, 1,
+				(uint8_t)(i+1), depth, &need_tbl_nb);
+		total_need_tbl_nb += need_tbl_nb;
+	}
+
+	if (tbl8_available(lpm) < total_need_tbl_nb)
+		/* not enought tbl8 to add a rule */
+		return -ENOSPC;
+
+	return 0;
+}
+
 int
 rte_lpm6_add_v1705(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth,
 		uint32_t next_hop)
 {
 	struct rte_lpm6_tbl_entry *tbl;
 	struct rte_lpm6_tbl_entry *tbl_next = NULL;
-	int32_t rule_index;
+	/* init to avoid compiler warning */
+	uint32_t tbl_next_num = 123456;
 	int status;
 	uint8_t masked_ip[RTE_LPM6_IPV6_ADDR_SIZE];
 	int i;
@@ -502,26 +877,26 @@ rte_lpm6_add_v1705(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth,
 		return -EINVAL;
 
 	/* Copy the IP and mask it to avoid modifying user's input data. */
-	memcpy(masked_ip, ip, RTE_LPM6_IPV6_ADDR_SIZE);
-	mask_ip(masked_ip, depth);
+	ip6_copy_addr(masked_ip, ip);
+	ip6_mask_addr(masked_ip, depth);
 
-	/* Add the rule to the rule table. */
-	rule_index = rule_add(lpm, masked_ip, next_hop, depth);
+	/* Simulate adding a new route */
+	int ret = simulate_add(lpm, masked_ip, depth);
+	if (ret < 0)
+		return ret;
 
+	/* Add the rule to the rule table. */
+	int is_new_rule = rule_add(lpm, masked_ip, depth, next_hop);
 	/* If there is no space available for new rule return error. */
-	if (rule_index < 0) {
-		return rule_index;
-	}
+	if (is_new_rule < 0)
+		return is_new_rule;
 
 	/* Inspect the first three bytes through tbl24 on the first step. */
 	tbl = lpm->tbl24;
-	status = add_step (lpm, tbl, &tbl_next, masked_ip, ADD_FIRST_BYTE, 1,
-			depth, next_hop);
-	if (status < 0) {
-		rte_lpm6_delete(lpm, masked_ip, depth);
-
-		return status;
-	}
+	status = add_step(lpm, tbl, TBL24_IND, &tbl_next, &tbl_next_num,
+			masked_ip, ADD_FIRST_BYTE, 1, depth, next_hop,
+			is_new_rule);
+	assert(status >= 0);
 
 	/*
 	 * Inspect one by one the rest of the bytes until
@@ -529,13 +904,10 @@ rte_lpm6_add_v1705(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth,
 	 */
 	for (i = ADD_FIRST_BYTE; i < RTE_LPM6_IPV6_ADDR_SIZE && status == 1; i++) {
 		tbl = tbl_next;
-		status = add_step (lpm, tbl, &tbl_next, masked_ip, 1, (uint8_t)(i+1),
-				depth, next_hop);
-		if (status < 0) {
-			rte_lpm6_delete(lpm, masked_ip, depth);
-
-			return status;
-		}
+		status = add_step(lpm, tbl, tbl_next_num, &tbl_next,
+				&tbl_next_num, masked_ip, 1, (uint8_t)(i+1),
+				depth, next_hop, is_new_rule);
+		assert(status >= 0);
 	}
 
 	return status;
@@ -610,9 +982,8 @@ rte_lpm6_lookup_v1705(const struct rte_lpm6 *lpm, uint8_t *ip,
 	uint32_t tbl24_index;
 
 	/* DEBUG: Check user input arguments. */
-	if ((lpm == NULL) || (ip == NULL) || (next_hop == NULL)) {
+	if ((lpm == NULL) || (ip == NULL) || (next_hop == NULL))
 		return -EINVAL;
-	}
 
 	first_byte = LOOKUP_FIRST_BYTE;
 	tbl24_index = (ip[0] << BYTES2_SIZE) | (ip[1] << BYTE_SIZE) | ip[2];
@@ -648,9 +1019,8 @@ rte_lpm6_lookup_bulk_func_v20(const struct rte_lpm6 *lpm,
 	int status;
 
 	/* DEBUG: Check user input arguments. */
-	if ((lpm == NULL) || (ips == NULL) || (next_hops == NULL)) {
+	if ((lpm == NULL) || (ips == NULL) || (next_hops == NULL))
 		return -EINVAL;
-	}
 
 	for (i = 0; i < n; i++) {
 		first_byte = LOOKUP_FIRST_BYTE;
@@ -725,30 +1095,6 @@ MAP_STATIC_SYMBOL(int rte_lpm6_lookup_bulk_func(const struct rte_lpm6 *lpm,
 		rte_lpm6_lookup_bulk_func_v1705);
 
 /*
- * Finds a rule in rule table.
- * NOTE: Valid range for depth parameter is 1 .. 128 inclusive.
- */
-static inline int32_t
-rule_find(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth)
-{
-	uint32_t rule_index;
-
-	/* Scan used rules at given depth to find rule. */
-	for (rule_index = 0; rule_index < lpm->used_rules; rule_index++) {
-		/* If rule is found return the rule index. */
-		if ((memcmp (lpm->rules_tbl[rule_index].ip, ip,
-				RTE_LPM6_IPV6_ADDR_SIZE) == 0) &&
-				lpm->rules_tbl[rule_index].depth == depth) {
-
-			return rule_index;
-		}
-	}
-
-	/* If rule is not found return -ENOENT. */
-	return -ENOENT;
-}
-
-/*
  * Look for a rule in the high-level rules table
  */
 int
@@ -775,8 +1121,7 @@ int
 rte_lpm6_is_rule_present_v1705(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth,
 		uint32_t *next_hop)
 {
-	uint8_t ip_masked[RTE_LPM6_IPV6_ADDR_SIZE];
-	int32_t rule_index;
+	uint8_t masked_ip[RTE_LPM6_IPV6_ADDR_SIZE];
 
 	/* Check user arguments. */
 	if ((lpm == NULL) || next_hop == NULL || ip == NULL ||
@@ -784,19 +1129,10 @@ rte_lpm6_is_rule_present_v1705(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth,
 		return -EINVAL;
 
 	/* Copy the IP and mask it to avoid modifying user's input data. */
-	memcpy(ip_masked, ip, RTE_LPM6_IPV6_ADDR_SIZE);
-	mask_ip(ip_masked, depth);
-
-	/* Look for the rule using rule_find. */
-	rule_index = rule_find(lpm, ip_masked, depth);
-
-	if (rule_index >= 0) {
-		*next_hop = lpm->rules_tbl[rule_index].next_hop;
-		return 1;
-	}
+	ip6_copy_addr(masked_ip, ip);
+	ip6_mask_addr(masked_ip, depth);
 
-	/* If rule is not found return 0. */
-	return 0;
+	return rule_find(lpm, masked_ip, depth, next_hop);
 }
 BIND_DEFAULT_SYMBOL(rte_lpm6_is_rule_present, _v1705, 17.05);
 MAP_STATIC_SYMBOL(int rte_lpm6_is_rule_present(struct rte_lpm6 *lpm,
@@ -806,133 +1142,66 @@ MAP_STATIC_SYMBOL(int rte_lpm6_is_rule_present(struct rte_lpm6 *lpm,
 /*
  * Delete a rule from the rule table.
  * NOTE: Valid range for depth parameter is 1 .. 128 inclusive.
+ * return
+ *	  0 on success
+ *   <0 on failure
  */
-static inline void
-rule_delete(struct rte_lpm6 *lpm, int32_t rule_index)
-{
-	/*
-	 * Overwrite redundant rule with last rule in group and decrement rule
-	 * counter.
-	 */
-	lpm->rules_tbl[rule_index] = lpm->rules_tbl[lpm->used_rules-1];
-	lpm->used_rules--;
-}
-
-/*
- * Deletes a rule
- */
-int
-rte_lpm6_delete(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth)
+static inline int
+rule_delete(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth)
 {
-	int32_t rule_to_delete_index;
-	uint8_t ip_masked[RTE_LPM6_IPV6_ADDR_SIZE];
-	unsigned i;
-
-	/*
-	 * Check input arguments.
-	 */
-	if ((lpm == NULL) || (depth < 1) || (depth > RTE_LPM6_MAX_DEPTH)) {
-		return -EINVAL;
-	}
-
-	/* Copy the IP and mask it to avoid modifying user's input data. */
-	memcpy(ip_masked, ip, RTE_LPM6_IPV6_ADDR_SIZE);
-	mask_ip(ip_masked, depth);
-
-	/*
-	 * Find the index of the input rule, that needs to be deleted, in the
-	 * rule table.
-	 */
-	rule_to_delete_index = rule_find(lpm, ip_masked, depth);
-
-	/*
-	 * Check if rule_to_delete_index was found. If no rule was found the
-	 * function rule_find returns -ENOENT.
-	 */
-	if (rule_to_delete_index < 0)
-		return rule_to_delete_index;
-
-	/* Delete the rule from the rule table. */
-	rule_delete(lpm, rule_to_delete_index);
+	int ret;
+	struct rte_lpm6_rule_key rule_key;
 
-	/*
-	 * Set all the table entries to 0 (ie delete every rule
-	 * from the data structure.
-	 */
-	lpm->next_tbl8 = 0;
-	memset(lpm->tbl24, 0, sizeof(lpm->tbl24));
-	memset(lpm->tbl8, 0, sizeof(lpm->tbl8[0])
-			* RTE_LPM6_TBL8_GROUP_NUM_ENTRIES * lpm->number_tbl8s);
+	/* init rule key */
+	rule_key_init(&rule_key, ip, depth);
 
-	/*
-	 * Add every rule again (except for the one that was removed from
-	 * the rules table).
-	 */
-	for (i = 0; i < lpm->used_rules; i++) {
-		rte_lpm6_add(lpm, lpm->rules_tbl[i].ip, lpm->rules_tbl[i].depth,
-				lpm->rules_tbl[i].next_hop);
-	}
+	/* delete the rule */
+	ret = rte_hash_del_key(lpm->rules_tbl, (void *) &rule_key);
+	if (ret >= 0)
+		lpm->used_rules--;
 
-	return 0;
+	return ret;
 }
 
 /*
  * Deletes a group of rules
+ *
+ * Note that the function rebuilds the lpm table,
+ * rather than doing incremental updates like
+ * the regular delete function
  */
 int
 rte_lpm6_delete_bulk_func(struct rte_lpm6 *lpm,
-		uint8_t ips[][RTE_LPM6_IPV6_ADDR_SIZE], uint8_t *depths, unsigned n)
+		uint8_t ips[][RTE_LPM6_IPV6_ADDR_SIZE], uint8_t *depths,
+		unsigned n)
 {
-	int32_t rule_to_delete_index;
-	uint8_t ip_masked[RTE_LPM6_IPV6_ADDR_SIZE];
+	uint8_t masked_ip[RTE_LPM6_IPV6_ADDR_SIZE];
 	unsigned i;
 
-	/*
-	 * Check input arguments.
-	 */
-	if ((lpm == NULL) || (ips == NULL) || (depths == NULL)) {
+	/* Check input arguments. */
+	if ((lpm == NULL) || (ips == NULL) || (depths == NULL))
 		return -EINVAL;
-	}
 
 	for (i = 0; i < n; i++) {
-		/* Copy the IP and mask it to avoid modifying user's input data. */
-		memcpy(ip_masked, ips[i], RTE_LPM6_IPV6_ADDR_SIZE);
-		mask_ip(ip_masked, depths[i]);
-
-		/*
-		 * Find the index of the input rule, that needs to be deleted, in the
-		 * rule table.
-		 */
-		rule_to_delete_index = rule_find(lpm, ip_masked, depths[i]);
-
-		/*
-		 * Check if rule_to_delete_index was found. If no rule was found the
-		 * function rule_find returns -ENOENT.
-		 */
-		if (rule_to_delete_index < 0)
-			continue;
-
-		/* Delete the rule from the rule table. */
-		rule_delete(lpm, rule_to_delete_index);
+		ip6_copy_addr(masked_ip, ips[i]);
+		ip6_mask_addr(masked_ip, depths[i]);
+		rule_delete(lpm, masked_ip, depths[i]);
 	}
 
 	/*
 	 * Set all the table entries to 0 (ie delete every rule
 	 * from the data structure.
 	 */
-	lpm->next_tbl8 = 0;
 	memset(lpm->tbl24, 0, sizeof(lpm->tbl24));
 	memset(lpm->tbl8, 0, sizeof(lpm->tbl8[0])
 			* RTE_LPM6_TBL8_GROUP_NUM_ENTRIES * lpm->number_tbl8s);
+	tbl8_pool_init(lpm);
 
 	/*
 	 * Add every rule again (except for the ones that were removed from
 	 * the rules table).
 	 */
-	for (i = 0; i < lpm->used_rules; i++) {
-		rte_lpm6_add(lpm, lpm->rules_tbl[i].ip, lpm->rules_tbl[i].depth,
-				lpm->rules_tbl[i].next_hop);
-	}
+	rebuild_lpm(lpm);
 
 	return 0;
 }
@@ -946,9 +1215,6 @@ rte_lpm6_delete_all(struct rte_lpm6 *lpm)
 	/* Zero used rules counter. */
 	lpm->used_rules = 0;
 
-	/* Zero next tbl8 index. */
-	lpm->next_tbl8 = 0;
-
 	/* Zero tbl24. */
 	memset(lpm->tbl24, 0, sizeof(lpm->tbl24));
 
@@ -956,6 +1222,268 @@ rte_lpm6_delete_all(struct rte_lpm6 *lpm)
 	memset(lpm->tbl8, 0, sizeof(lpm->tbl8[0]) *
 			RTE_LPM6_TBL8_GROUP_NUM_ENTRIES * lpm->number_tbl8s);
 
+	/* init pool of free tbl8 indexes */
+	tbl8_pool_init(lpm);
+
 	/* Delete all rules form the rules table. */
-	memset(lpm->rules_tbl, 0, sizeof(struct rte_lpm6_rule) * lpm->max_rules);
+	rte_hash_reset(lpm->rules_tbl);
+}
+
+/*
+ * Convert a depth to a one byte long mask
+ *   Example: 4 will be converted to 0xF0
+ */
+static uint8_t __attribute__((pure))
+depth_to_mask_1b(uint8_t depth)
+{
+	/* To calculate a mask start with a 1 on the left hand side and right
+	 * shift while populating the left hand side with 1's
+	 */
+	return (signed char)0x80 >> (depth - 1);
+}
+
+/*
+ * Find a less specific rule
+ */
+static int
+rule_find_less_specific(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth,
+	struct rte_lpm6_rule *rule)
+{
+	int ret;
+	uint32_t next_hop;
+	uint8_t mask;
+	struct rte_lpm6_rule_key rule_key;
+
+	if (depth == 1)
+		return 0;
+
+	rule_key_init(&rule_key, ip, depth);
+
+	while (depth > 1) {
+		depth--;
+
+		/* each iteration zero one more bit of the key */
+		mask = depth & 7; /* depth % BYTE_SIZE */
+		if (mask > 0)
+			mask = depth_to_mask_1b(mask);
+
+		rule_key.depth = depth;
+		rule_key.ip[depth >> 3] &= mask;
+
+		ret = rule_find_with_key(lpm, &rule_key, &next_hop);
+		if (ret) {
+			rule->depth = depth;
+			ip6_copy_addr(rule->ip, rule_key.ip);
+			rule->next_hop = next_hop;
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * Find range of tbl8 cells occupied by a rule
+ */
+static void
+rule_find_range(struct rte_lpm6 *lpm, const uint8_t *ip, uint8_t depth,
+		  struct rte_lpm6_tbl_entry **from,
+		  struct rte_lpm6_tbl_entry **to,
+		  uint32_t *out_tbl_ind)
+{
+	uint32_t ind;
+	uint32_t first_3bytes = (uint32_t)ip[0] << 16 | ip[1] << 8 | ip[2];
+
+	if (depth <= 24) {
+		/* rule is within the top level */
+		ind = first_3bytes;
+		*from = &lpm->tbl24[ind];
+		ind += (1 << (24 - depth)) - 1;
+		*to = &lpm->tbl24[ind];
+		*out_tbl_ind = TBL24_IND;
+	} else {
+		/* top level entry */
+		struct rte_lpm6_tbl_entry *tbl = &lpm->tbl24[first_3bytes];
+		assert(tbl->ext_entry == 1);
+		/* first tbl8 */
+		uint32_t tbl_ind = tbl->lpm6_tbl8_gindex;
+		tbl = &lpm->tbl8[tbl_ind *
+				RTE_LPM6_TBL8_GROUP_NUM_ENTRIES];
+		/* current ip byte, the top level is already behind */
+		uint8_t byte = 3;
+		/* minus top level */
+		depth -= 24;
+
+		/* interate through levels (tbl8s)
+		 * until we reach the last one
+		 */
+		while (depth > 8) {
+			tbl += ip[byte];
+			assert(tbl->ext_entry == 1);
+			/* go to the next level/tbl8 */
+			tbl_ind = tbl->lpm6_tbl8_gindex;
+			tbl = &lpm->tbl8[tbl_ind *
+					RTE_LPM6_TBL8_GROUP_NUM_ENTRIES];
+			byte += 1;
+			depth -= 8;
+		}
+
+		/* last level/tbl8 */
+		ind = ip[byte] & depth_to_mask_1b(depth);
+		*from = &tbl[ind];
+		ind += (1 << (8 - depth)) - 1;
+		*to = &tbl[ind];
+		*out_tbl_ind = tbl_ind;
+	}
+}
+
+/*
+ * Remove a table from the LPM tree
+ */
+static void
+remove_tbl(struct rte_lpm6 *lpm, struct rte_lpm_tbl8_hdr *tbl_hdr,
+		  uint32_t tbl_ind, struct rte_lpm6_rule *lsp_rule)
+{
+	struct rte_lpm6_tbl_entry *owner_entry;
+
+	if (tbl_hdr->owner_tbl_ind == TBL24_IND)
+		owner_entry = &lpm->tbl24[tbl_hdr->owner_entry_ind];
+	else {
+		uint32_t owner_tbl_ind = tbl_hdr->owner_tbl_ind;
+		owner_entry = &lpm->tbl8[
+			owner_tbl_ind * RTE_LPM6_TBL8_GROUP_NUM_ENTRIES +
+			tbl_hdr->owner_entry_ind];
+
+		struct rte_lpm_tbl8_hdr *owner_tbl_hdr =
+			&lpm->tbl8_hdrs[owner_tbl_ind];
+		if (--owner_tbl_hdr->ref_cnt == 0)
+			remove_tbl(lpm, owner_tbl_hdr, owner_tbl_ind, lsp_rule);
+	}
+
+	assert(owner_entry->ext_entry == 1);
+
+	/* unlink the table */
+	if (lsp_rule != NULL) {
+		struct rte_lpm6_tbl_entry new_tbl_entry = {
+			.next_hop = lsp_rule->next_hop,
+			.depth = lsp_rule->depth,
+			.valid = VALID,
+			.valid_group = VALID,
+			.ext_entry = 0
+		};
+
+		*owner_entry = new_tbl_entry;
+	} else {
+		struct rte_lpm6_tbl_entry new_tbl_entry = {
+			.next_hop = 0,
+			.depth = 0,
+			.valid = INVALID,
+			.valid_group = INVALID,
+			.ext_entry = 0
+		};
+
+		*owner_entry = new_tbl_entry;
+	}
+
+	/* return the table to the pool */
+	tbl8_put(lpm, tbl_ind);
+}
+
+/*
+ * Deletes a rule
+ */
+int
+rte_lpm6_delete(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth)
+{
+	uint8_t masked_ip[RTE_LPM6_IPV6_ADDR_SIZE];
+	struct rte_lpm6_rule lsp_rule_obj;
+	struct rte_lpm6_rule *lsp_rule;
+	int ret;
+	uint32_t tbl_ind;
+	struct rte_lpm6_tbl_entry *from, *to;
+
+	/* Check input arguments. */
+	if ((lpm == NULL) || (depth < 1) || (depth > RTE_LPM6_MAX_DEPTH))
+		return -EINVAL;
+
+	/* Copy the IP and mask it to avoid modifying user's input data. */
+	ip6_copy_addr(masked_ip, ip);
+	ip6_mask_addr(masked_ip, depth);
+
+	/* Delete the rule from the rule table. */
+	ret = rule_delete(lpm, masked_ip, depth);
+	if (ret < 0)
+		return -ENOENT;
+
+	/* find rule cells */
+	rule_find_range(lpm, masked_ip, depth, &from, &to, &tbl_ind);
+
+	/* find a less specific rule (a rule with smaller depth)
+	 * note: masked_ip will be modified, don't use it anymore
+	 */
+	ret = rule_find_less_specific(lpm, masked_ip, depth,
+			&lsp_rule_obj);
+	lsp_rule = ret ? &lsp_rule_obj : NULL;
+
+	/* decrement the table rule counter,
+	 * note that tbl24 doesn't have a header
+	 */
+	if (tbl_ind != TBL24_IND) {
+		struct rte_lpm_tbl8_hdr *tbl_hdr = &lpm->tbl8_hdrs[tbl_ind];
+		if (--tbl_hdr->ref_cnt == 0) {
+			/* remove the table */
+			remove_tbl(lpm, tbl_hdr, tbl_ind, lsp_rule);
+			return 0;
+		}
+	}
+
+	/* iterate rule cells */
+	for (; from <= to; from++)
+		if (from->ext_entry == 1) {
+			/* reference to a more specific space
+			 * of the prefix/rule. Entries in a more
+			 * specific space that are not used by
+			 * a more specific prefix must be occupied
+			 * by the prefix
+			 */
+			if (lsp_rule != NULL)
+				expand_rule(lpm,
+					from->lpm6_tbl8_gindex *
+					RTE_LPM6_TBL8_GROUP_NUM_ENTRIES,
+					depth, lsp_rule->depth,
+					lsp_rule->next_hop, VALID);
+			else
+				/* since the prefix has no less specific prefix,
+				 * its more specific space must be invalidated
+				 */
+				expand_rule(lpm,
+					from->lpm6_tbl8_gindex *
+					RTE_LPM6_TBL8_GROUP_NUM_ENTRIES,
+					depth, 0, 0, INVALID);
+		} else if (from->depth == depth) {
+			/* entry is not a reference and belongs to the prefix */
+			if (lsp_rule != NULL) {
+				struct rte_lpm6_tbl_entry new_tbl_entry = {
+					.next_hop = lsp_rule->next_hop,
+					.depth = lsp_rule->depth,
+					.valid = VALID,
+					.valid_group = VALID,
+					.ext_entry = 0
+				};
+
+				*from = new_tbl_entry;
+			} else {
+				struct rte_lpm6_tbl_entry new_tbl_entry = {
+					.next_hop = 0,
+					.depth = 0,
+					.valid = INVALID,
+					.valid_group = INVALID,
+					.ext_entry = 0
+				};
+
+				*from = new_tbl_entry;
+			}
+		}
+
+	return 0;
 }
diff --git a/lib/librte_mbuf/meson.build b/lib/librte_mbuf/meson.build
index 45ffb0db..94d9c4c9 100644
--- a/lib/librte_mbuf/meson.build
+++ b/lib/librte_mbuf/meson.build
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
-version = 3
+version = 4
 sources = files('rte_mbuf.c', 'rte_mbuf_ptype.c', 'rte_mbuf_pool_ops.c')
 headers = files('rte_mbuf.h', 'rte_mbuf_ptype.h', 'rte_mbuf_pool_ops.h')
 deps += ['mempool']
diff --git a/lib/librte_mbuf/rte_mbuf.c b/lib/librte_mbuf/rte_mbuf.c
index e714c5a5..9790b4fb 100644
--- a/lib/librte_mbuf/rte_mbuf.c
+++ b/lib/librte_mbuf/rte_mbuf.c
@@ -296,11 +296,19 @@ const char *rte_get_rx_ol_flag_name(uint64_t mask)
 	case PKT_RX_VLAN_STRIPPED: return "PKT_RX_VLAN_STRIPPED";
 	case PKT_RX_IEEE1588_PTP: return "PKT_RX_IEEE1588_PTP";
 	case PKT_RX_IEEE1588_TMST: return "PKT_RX_IEEE1588_TMST";
+	case PKT_RX_FDIR_ID: return "PKT_RX_FDIR_ID";
+	case PKT_RX_FDIR_FLX: return "PKT_RX_FDIR_FLX";
 	case PKT_RX_QINQ_STRIPPED: return "PKT_RX_QINQ_STRIPPED";
+	case PKT_RX_QINQ: return "PKT_RX_QINQ";
 	case PKT_RX_LRO: return "PKT_RX_LRO";
 	case PKT_RX_TIMESTAMP: return "PKT_RX_TIMESTAMP";
 	case PKT_RX_SEC_OFFLOAD: return "PKT_RX_SEC_OFFLOAD";
 	case PKT_RX_SEC_OFFLOAD_FAILED: return "PKT_RX_SEC_OFFLOAD_FAILED";
+	case PKT_RX_OUTER_L4_CKSUM_BAD: return "PKT_RX_OUTER_L4_CKSUM_BAD";
+	case PKT_RX_OUTER_L4_CKSUM_GOOD: return "PKT_RX_OUTER_L4_CKSUM_GOOD";
+	case PKT_RX_OUTER_L4_CKSUM_INVALID:
+		return "PKT_RX_OUTER_L4_CKSUM_INVALID";
+
 	default: return NULL;
 	}
 }
@@ -333,12 +341,21 @@ rte_get_rx_ol_flag_list(uint64_t mask, char *buf, size_t buflen)
 		{ PKT_RX_VLAN_STRIPPED, PKT_RX_VLAN_STRIPPED, NULL },
 		{ PKT_RX_IEEE1588_PTP, PKT_RX_IEEE1588_PTP, NULL },
 		{ PKT_RX_IEEE1588_TMST, PKT_RX_IEEE1588_TMST, NULL },
+		{ PKT_RX_FDIR_ID, PKT_RX_FDIR_ID, NULL },
+		{ PKT_RX_FDIR_FLX, PKT_RX_FDIR_FLX, NULL },
 		{ PKT_RX_QINQ_STRIPPED, PKT_RX_QINQ_STRIPPED, NULL },
 		{ PKT_RX_LRO, PKT_RX_LRO, NULL },
 		{ PKT_RX_TIMESTAMP, PKT_RX_TIMESTAMP, NULL },
 		{ PKT_RX_SEC_OFFLOAD, PKT_RX_SEC_OFFLOAD, NULL },
 		{ PKT_RX_SEC_OFFLOAD_FAILED, PKT_RX_SEC_OFFLOAD_FAILED, NULL },
 		{ PKT_RX_QINQ, PKT_RX_QINQ, NULL },
+		{ PKT_RX_OUTER_L4_CKSUM_BAD, PKT_RX_OUTER_L4_CKSUM_MASK, NULL },
+		{ PKT_RX_OUTER_L4_CKSUM_GOOD, PKT_RX_OUTER_L4_CKSUM_MASK,
+		  NULL },
+		{ PKT_RX_OUTER_L4_CKSUM_INVALID, PKT_RX_OUTER_L4_CKSUM_MASK,
+		  NULL },
+		{ PKT_RX_OUTER_L4_CKSUM_UNKNOWN, PKT_RX_OUTER_L4_CKSUM_MASK,
+		  "PKT_RX_OUTER_L4_CKSUM_UNKNOWN" },
 	};
 	const char *name;
 	unsigned int i;
@@ -373,7 +390,7 @@ rte_get_rx_ol_flag_list(uint64_t mask, char *buf, size_t buflen)
 const char *rte_get_tx_ol_flag_name(uint64_t mask)
 {
 	switch (mask) {
-	case PKT_TX_VLAN_PKT: return "PKT_TX_VLAN_PKT";
+	case PKT_TX_VLAN: return "PKT_TX_VLAN";
 	case PKT_TX_IP_CKSUM: return "PKT_TX_IP_CKSUM";
 	case PKT_TX_TCP_CKSUM: return "PKT_TX_TCP_CKSUM";
 	case PKT_TX_SCTP_CKSUM: return "PKT_TX_SCTP_CKSUM";
@@ -393,8 +410,12 @@ const char *rte_get_tx_ol_flag_name(uint64_t mask)
 	case PKT_TX_TUNNEL_VXLAN_GPE: return "PKT_TX_TUNNEL_VXLAN_GPE";
 	case PKT_TX_TUNNEL_IP: return "PKT_TX_TUNNEL_IP";
 	case PKT_TX_TUNNEL_UDP: return "PKT_TX_TUNNEL_UDP";
+	case PKT_TX_QINQ: return "PKT_TX_QINQ";
 	case PKT_TX_MACSEC: return "PKT_TX_MACSEC";
 	case PKT_TX_SEC_OFFLOAD: return "PKT_TX_SEC_OFFLOAD";
+	case PKT_TX_UDP_SEG: return "PKT_TX_UDP_SEG";
+	case PKT_TX_OUTER_UDP_CKSUM: return "PKT_TX_OUTER_UDP_CKSUM";
+	case PKT_TX_METADATA: return "PKT_TX_METADATA";
 	default: return NULL;
 	}
 }
@@ -404,7 +425,7 @@ int
 rte_get_tx_ol_flag_list(uint64_t mask, char *buf, size_t buflen)
 {
 	const struct flag_mask tx_flags[] = {
-		{ PKT_TX_VLAN_PKT, PKT_TX_VLAN_PKT, NULL },
+		{ PKT_TX_VLAN, PKT_TX_VLAN, NULL },
 		{ PKT_TX_IP_CKSUM, PKT_TX_IP_CKSUM, NULL },
 		{ PKT_TX_TCP_CKSUM, PKT_TX_L4_MASK, NULL },
 		{ PKT_TX_SCTP_CKSUM, PKT_TX_L4_MASK, NULL },
@@ -417,24 +438,20 @@ rte_get_tx_ol_flag_list(uint64_t mask, char *buf, size_t buflen)
 		{ PKT_TX_OUTER_IP_CKSUM, PKT_TX_OUTER_IP_CKSUM, NULL },
 		{ PKT_TX_OUTER_IPV4, PKT_TX_OUTER_IPV4, NULL },
 		{ PKT_TX_OUTER_IPV6, PKT_TX_OUTER_IPV6, NULL },
-		{ PKT_TX_TUNNEL_VXLAN, PKT_TX_TUNNEL_MASK,
-		  "PKT_TX_TUNNEL_NONE" },
-		{ PKT_TX_TUNNEL_GRE, PKT_TX_TUNNEL_MASK,
-		  "PKT_TX_TUNNEL_NONE" },
-		{ PKT_TX_TUNNEL_IPIP, PKT_TX_TUNNEL_MASK,
-		  "PKT_TX_TUNNEL_NONE" },
-		{ PKT_TX_TUNNEL_GENEVE, PKT_TX_TUNNEL_MASK,
-		  "PKT_TX_TUNNEL_NONE" },
-		{ PKT_TX_TUNNEL_MPLSINUDP, PKT_TX_TUNNEL_MASK,
-		  "PKT_TX_TUNNEL_NONE" },
-		{ PKT_TX_TUNNEL_VXLAN_GPE, PKT_TX_TUNNEL_MASK,
-		  "PKT_TX_TUNNEL_NONE" },
-		{ PKT_TX_TUNNEL_IP, PKT_TX_TUNNEL_MASK,
-		  "PKT_TX_TUNNEL_NONE" },
-		{ PKT_TX_TUNNEL_UDP, PKT_TX_TUNNEL_MASK,
-		  "PKT_TX_TUNNEL_NONE" },
+		{ PKT_TX_TUNNEL_VXLAN, PKT_TX_TUNNEL_MASK, NULL },
+		{ PKT_TX_TUNNEL_GRE, PKT_TX_TUNNEL_MASK, NULL },
+		{ PKT_TX_TUNNEL_IPIP, PKT_TX_TUNNEL_MASK, NULL },
+		{ PKT_TX_TUNNEL_GENEVE, PKT_TX_TUNNEL_MASK, NULL },
+		{ PKT_TX_TUNNEL_MPLSINUDP, PKT_TX_TUNNEL_MASK, NULL },
+		{ PKT_TX_TUNNEL_VXLAN_GPE, PKT_TX_TUNNEL_MASK, NULL },
+		{ PKT_TX_TUNNEL_IP, PKT_TX_TUNNEL_MASK, NULL },
+		{ PKT_TX_TUNNEL_UDP, PKT_TX_TUNNEL_MASK, NULL },
+		{ PKT_TX_QINQ, PKT_TX_QINQ, NULL },
 		{ PKT_TX_MACSEC, PKT_TX_MACSEC, NULL },
 		{ PKT_TX_SEC_OFFLOAD, PKT_TX_SEC_OFFLOAD, NULL },
+		{ PKT_TX_UDP_SEG, PKT_TX_UDP_SEG, NULL },
+		{ PKT_TX_OUTER_UDP_CKSUM, PKT_TX_OUTER_UDP_CKSUM, NULL },
+		{ PKT_TX_METADATA, PKT_TX_METADATA, NULL },
 	};
 	const char *name;
 	unsigned int i;
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index 9ce5d76d..3dbc6695 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -140,7 +140,7 @@ extern "C" {
  * The 2 vlans have been stripped by the hardware and their tci are
  * saved in mbuf->vlan_tci (inner) and mbuf->vlan_tci_outer (outer).
  * This can only happen if vlan stripping is enabled in the RX
- * configuration of the PMD. If this flag is set,
+ * configuration of the PMD.
  * When PKT_RX_QINQ_STRIPPED is set, the flags (PKT_RX_VLAN |
  * PKT_RX_VLAN_STRIPPED | PKT_RX_QINQ) must also be set.
  */
@@ -170,18 +170,54 @@ extern "C" {
 
 /**
  * The RX packet is a double VLAN, and the outer tci has been
- * saved in in mbuf->vlan_tci_outer.
+ * saved in in mbuf->vlan_tci_outer. If PKT_RX_QINQ set, PKT_RX_VLAN
+ * also should be set and inner tci should be saved to mbuf->vlan_tci.
  * If the flag PKT_RX_QINQ_STRIPPED is also present, both VLANs
  * headers have been stripped from mbuf data, else they are still
  * present.
  */
 #define PKT_RX_QINQ          (1ULL << 20)
 
+/**
+ * Mask of bits used to determine the status of outer RX L4 checksum.
+ * - PKT_RX_OUTER_L4_CKSUM_UNKNOWN: no info about the outer RX L4 checksum
+ * - PKT_RX_OUTER_L4_CKSUM_BAD: the outer L4 checksum in the packet is wrong
+ * - PKT_RX_OUTER_L4_CKSUM_GOOD: the outer L4 checksum in the packet is valid
+ * - PKT_RX_OUTER_L4_CKSUM_INVALID: invalid outer L4 checksum state.
+ *
+ * The detection of PKT_RX_OUTER_L4_CKSUM_GOOD shall be based on the given
+ * HW capability, At minimum, the PMD should support
+ * PKT_RX_OUTER_L4_CKSUM_UNKNOWN and PKT_RX_OUTER_L4_CKSUM_BAD states
+ * if the DEV_RX_OFFLOAD_OUTER_UDP_CKSUM offload is available.
+ */
+#define PKT_RX_OUTER_L4_CKSUM_MASK	((1ULL << 21) | (1ULL << 22))
+
+#define PKT_RX_OUTER_L4_CKSUM_UNKNOWN	0
+#define PKT_RX_OUTER_L4_CKSUM_BAD	(1ULL << 21)
+#define PKT_RX_OUTER_L4_CKSUM_GOOD	(1ULL << 22)
+#define PKT_RX_OUTER_L4_CKSUM_INVALID	((1ULL << 21) | (1ULL << 22))
+
 /* add new RX flags here */
 
 /* add new TX flags here */
 
 /**
+ * Indicate that the metadata field in the mbuf is in use.
+ */
+#define PKT_TX_METADATA	(1ULL << 40)
+
+/**
+ * Outer UDP checksum offload flag. This flag is used for enabling
+ * outer UDP checksum in PMD. To use outer UDP checksum, the user needs to
+ * 1) Enable the following in mbuff,
+ * a) Fill outer_l2_len and outer_l3_len in mbuf.
+ * b) Set the PKT_TX_OUTER_UDP_CKSUM flag.
+ * c) Set the PKT_TX_OUTER_IPV4 or PKT_TX_OUTER_IPV6 flag.
+ * 2) Configure DEV_TX_OFFLOAD_OUTER_UDP_CKSUM offload flag.
+ */
+#define PKT_TX_OUTER_UDP_CKSUM     (1ULL << 41)
+
+/**
  * UDP Fragmentation Offload flag. This flag is used for enabling UDP
  * fragmentation in SW or in HW. When use UFO, mbuf->tso_segsz is used
  * to store the MSS of UDP fragments.
@@ -334,16 +370,23 @@ extern "C" {
  * which can be set for packet.
  */
 #define PKT_TX_OFFLOAD_MASK (    \
+		PKT_TX_OUTER_IPV6 |	 \
+		PKT_TX_OUTER_IPV4 |	 \
+		PKT_TX_OUTER_IP_CKSUM |  \
+		PKT_TX_VLAN_PKT |        \
+		PKT_TX_IPV6 |		 \
+		PKT_TX_IPV4 |		 \
 		PKT_TX_IP_CKSUM |        \
 		PKT_TX_L4_MASK |         \
-		PKT_TX_OUTER_IP_CKSUM |  \
-		PKT_TX_TCP_SEG |         \
 		PKT_TX_IEEE1588_TMST |	 \
+		PKT_TX_TCP_SEG |         \
 		PKT_TX_QINQ_PKT |        \
-		PKT_TX_VLAN_PKT |        \
 		PKT_TX_TUNNEL_MASK |	 \
 		PKT_TX_MACSEC |		 \
-		PKT_TX_SEC_OFFLOAD)
+		PKT_TX_SEC_OFFLOAD |	 \
+		PKT_TX_UDP_SEG |	 \
+		PKT_TX_OUTER_UDP_CKSUM | \
+		PKT_TX_METADATA)
 
 /**
  * Mbuf having an external buffer attached. shinfo in mbuf must be filled.
@@ -464,7 +507,9 @@ struct rte_mbuf {
 	};
 	uint16_t nb_segs;         /**< Number of segments. */
 
-	/** Input port (16 bits to support more than 256 virtual ports). */
+	/** Input port (16 bits to support more than 256 virtual ports).
+	 * The event eth Tx adapter uses this field to specify the output port.
+	 */
 	uint16_t port;
 
 	uint64_t ol_flags;        /**< Offload features. */
@@ -511,28 +556,47 @@ struct rte_mbuf {
 	/** VLAN TCI (CPU order), valid if PKT_RX_VLAN is set. */
 	uint16_t vlan_tci;
 
+	RTE_STD_C11
 	union {
-		uint32_t rss;     /**< RSS hash result if RSS enabled */
-		struct {
-			RTE_STD_C11
-			union {
-				struct {
-					uint16_t hash;
-					uint16_t id;
+		union {
+			uint32_t rss;     /**< RSS hash result if RSS enabled */
+			struct {
+				union {
+					struct {
+						uint16_t hash;
+						uint16_t id;
+					};
+					uint32_t lo;
+					/**< Second 4 flexible bytes */
 				};
+				uint32_t hi;
+				/**< First 4 flexible bytes or FD ID, dependent
+				 * on PKT_RX_FDIR_* flag in ol_flags.
+				 */
+			} fdir;	/**< Filter identifier if FDIR enabled */
+			struct {
 				uint32_t lo;
-				/**< Second 4 flexible bytes */
-			};
-			uint32_t hi;
-			/**< First 4 flexible bytes or FD ID, dependent on
-			     PKT_RX_FDIR_* flag in ol_flags. */
-		} fdir;           /**< Filter identifier if FDIR enabled */
+				uint32_t hi;
+				/**< The event eth Tx adapter uses this field
+				 * to store Tx queue id.
+				 * @see rte_event_eth_tx_adapter_txq_set()
+				 */
+			} sched;          /**< Hierarchical scheduler */
+			/**< User defined tags. See rte_distributor_process() */
+			uint32_t usr;
+		} hash;                   /**< hash information */
 		struct {
-			uint32_t lo;
-			uint32_t hi;
-		} sched;          /**< Hierarchical scheduler */
-		uint32_t usr;	  /**< User defined tags. See rte_distributor_process() */
-	} hash;                   /**< hash information */
+			/**
+			 * Application specific metadata value
+			 * for egress flow rule match.
+			 * Valid if PKT_TX_METADATA is set.
+			 * Located here to allow conjunct use
+			 * with hash.sched.hi.
+			 */
+			uint32_t tx_metadata;
+			uint32_t reserved;
+		};
+	};
 
 	/** Outer VLAN TCI (CPU order), valid if PKT_RX_QINQ is set. */
 	uint16_t vlan_tci_outer;
@@ -1038,14 +1102,6 @@ rte_mbuf_raw_free(struct rte_mbuf *m)
 	rte_mempool_put(m->pool, m);
 }
 
-/* compat with older versions */
-__rte_deprecated
-static inline void
-__rte_mbuf_raw_free(struct rte_mbuf *m)
-{
-	rte_mbuf_raw_free(m);
-}
-
 /**
  * The packet mbuf constructor.
  *
@@ -1658,14 +1714,6 @@ rte_pktmbuf_prefree_seg(struct rte_mbuf *m)
 	return NULL;
 }
 
-/* deprecated, replaced by rte_pktmbuf_prefree_seg() */
-__rte_deprecated
-static inline struct rte_mbuf *
-__rte_pktmbuf_prefree_seg(struct rte_mbuf *m)
-{
-	return rte_pktmbuf_prefree_seg(m);
-}
-
 /**
  * Free a segment of a packet mbuf into its original mempool.
  *
diff --git a/lib/librte_mbuf/rte_mbuf_ptype.c b/lib/librte_mbuf/rte_mbuf_ptype.c
index d7835e28..d6f906b0 100644
--- a/lib/librte_mbuf/rte_mbuf_ptype.c
+++ b/lib/librte_mbuf/rte_mbuf_ptype.c
@@ -19,6 +19,8 @@ const char *rte_get_ptype_l2_name(uint32_t ptype)
 	case RTE_PTYPE_L2_ETHER_VLAN: return "L2_ETHER_VLAN";
 	case RTE_PTYPE_L2_ETHER_QINQ: return "L2_ETHER_QINQ";
 	case RTE_PTYPE_L2_ETHER_PPPOE: return "L2_ETHER_PPPOE";
+	case RTE_PTYPE_L2_ETHER_FCOE: return "L2_ETHER_FCOE";
+	case RTE_PTYPE_L2_ETHER_MPLS: return "L2_ETHER_MPLS";
 	default: return "L2_UNKNOWN";
 	}
 }
@@ -47,6 +49,7 @@ const char *rte_get_ptype_l4_name(uint32_t ptype)
 	case RTE_PTYPE_L4_SCTP: return "L4_SCTP";
 	case RTE_PTYPE_L4_ICMP: return "L4_ICMP";
 	case RTE_PTYPE_L4_NONFRAG: return "L4_NONFRAG";
+	case RTE_PTYPE_L4_IGMP: return "L4_IGMP";
 	default: return "L4_UNKNOWN";
 	}
 }
diff --git a/lib/librte_mbuf/rte_mbuf_ptype.h b/lib/librte_mbuf/rte_mbuf_ptype.h
index 01acc66e..23bc635f 100644
--- a/lib/librte_mbuf/rte_mbuf_ptype.h
+++ b/lib/librte_mbuf/rte_mbuf_ptype.h
@@ -131,6 +131,20 @@ extern "C" {
  */
 #define RTE_PTYPE_L2_ETHER_PPPOE            0x00000008
 /**
+ * FCoE packet type.
+ *
+ * Packet format:
+ * <'ether type'=[0x8906]>
+ */
+#define RTE_PTYPE_L2_ETHER_FCOE             0x00000009
+/**
+ * MPLS packet type.
+ *
+ * Packet format:
+ * <'ether type'=[0x8847|0x8848]>
+ */
+#define RTE_PTYPE_L2_ETHER_MPLS             0x0000000a
+/**
  * Mask of layer 2 packet types.
  * It is used for outer packet for tunneling cases.
  */
@@ -287,6 +301,14 @@ extern "C" {
  */
 #define RTE_PTYPE_L4_NONFRAG                0x00000600
 /**
+ * IGMP (Internet Group Management Protocol) packet type.
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=2, 'MF'=0, 'frag_offset'=0>
+ */
+#define RTE_PTYPE_L4_IGMP                   0x00000700
+/**
  * Mask of layer 4 packet types.
  * It is used for outer packet for tunneling cases.
  */
diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
index 03e6b5f7..683b216f 100644
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -99,25 +99,44 @@ static unsigned optimize_object_size(unsigned obj_size)
 	return new_obj_size * RTE_MEMPOOL_ALIGN;
 }
 
+struct pagesz_walk_arg {
+	int socket_id;
+	size_t min;
+};
+
 static int
 find_min_pagesz(const struct rte_memseg_list *msl, void *arg)
 {
-	size_t *min = arg;
+	struct pagesz_walk_arg *wa = arg;
+	bool valid;
+
+	/*
+	 * we need to only look at page sizes available for a particular socket
+	 * ID.  so, we either need an exact match on socket ID (can match both
+	 * native and external memory), or, if SOCKET_ID_ANY was specified as a
+	 * socket ID argument, we must only look at native memory and ignore any
+	 * page sizes associated with external memory.
+	 */
+	valid = msl->socket_id == wa->socket_id;
+	valid |= wa->socket_id == SOCKET_ID_ANY && msl->external == 0;
 
-	if (msl->page_sz < *min)
-		*min = msl->page_sz;
+	if (valid && msl->page_sz < wa->min)
+		wa->min = msl->page_sz;
 
 	return 0;
 }
 
 static size_t
-get_min_page_size(void)
+get_min_page_size(int socket_id)
 {
-	size_t min_pagesz = SIZE_MAX;
+	struct pagesz_walk_arg wa;
 
-	rte_memseg_list_walk(find_min_pagesz, &min_pagesz);
+	wa.min = SIZE_MAX;
+	wa.socket_id = socket_id;
 
-	return min_pagesz == SIZE_MAX ? (size_t) getpagesize() : min_pagesz;
+	rte_memseg_list_walk(find_min_pagesz, &wa);
+
+	return wa.min == SIZE_MAX ? (size_t) getpagesize() : wa.min;
 }
 
 
@@ -409,12 +428,18 @@ rte_mempool_populate_default(struct rte_mempool *mp)
 	rte_iova_t iova;
 	unsigned mz_id, n;
 	int ret;
-	bool no_contig, try_contig, no_pageshift;
+	bool no_contig, try_contig, no_pageshift, external;
 
 	ret = mempool_ops_alloc_once(mp);
 	if (ret != 0)
 		return ret;
 
+	/* check if we can retrieve a valid socket ID */
+	ret = rte_malloc_heap_socket_is_external(mp->socket_id);
+	if (ret < 0)
+		return -EINVAL;
+	external = ret;
+
 	/* mempool must not be populated */
 	if (mp->nb_mem_chunks != 0)
 		return -EEXIST;
@@ -462,15 +487,25 @@ rte_mempool_populate_default(struct rte_mempool *mp)
 	 * in one contiguous chunk as well (otherwise we might end up wasting a
 	 * 1G page on a 10MB memzone). If we fail to get enough contiguous
 	 * memory, then we'll go and reserve space page-by-page.
+	 *
+	 * We also have to take into account the fact that memory that we're
+	 * going to allocate from can belong to an externally allocated memory
+	 * area, in which case the assumption of IOVA as VA mode being
+	 * synonymous with IOVA contiguousness will not hold. We should also try
+	 * to go for contiguous memory even if we're in no-huge mode, because
+	 * external memory may in fact be IOVA-contiguous.
 	 */
-	no_pageshift = no_contig || rte_eal_iova_mode() == RTE_IOVA_VA;
-	try_contig = !no_contig && !no_pageshift && rte_eal_has_hugepages();
+	external = rte_malloc_heap_socket_is_external(mp->socket_id) == 1;
+	no_pageshift = no_contig ||
+			(!external && rte_eal_iova_mode() == RTE_IOVA_VA);
+	try_contig = !no_contig && !no_pageshift &&
+			(rte_eal_has_hugepages() || external);
 
 	if (no_pageshift) {
 		pg_sz = 0;
 		pg_shift = 0;
 	} else if (try_contig) {
-		pg_sz = get_min_page_size();
+		pg_sz = get_min_page_size(mp->socket_id);
 		pg_shift = rte_bsf32(pg_sz);
 	} else {
 		pg_sz = getpagesize();
diff --git a/lib/librte_net/Makefile b/lib/librte_net/Makefile
index 85e403f4..c3082069 100644
--- a/lib/librte_net/Makefile
+++ b/lib/librte_net/Makefile
@@ -20,6 +20,6 @@ SRCS-$(CONFIG_RTE_LIBRTE_NET) += rte_arp.c
 SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include := rte_ip.h rte_tcp.h rte_udp.h rte_esp.h
 SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include += rte_sctp.h rte_icmp.h rte_arp.h
 SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include += rte_ether.h rte_gre.h rte_net.h
-SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include += rte_net_crc.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include += rte_net_crc.h rte_mpls.h
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_net/meson.build b/lib/librte_net/meson.build
index d3ea1feb..7d66f693 100644
--- a/lib/librte_net/meson.build
+++ b/lib/librte_net/meson.build
@@ -13,7 +13,8 @@ headers = files('rte_ip.h',
 	'rte_ether.h',
 	'rte_gre.h',
 	'rte_net.h',
-	'rte_net_crc.h')
+	'rte_net_crc.h',
+	'rte_mpls.h')
 
 sources = files('rte_arp.c', 'rte_net.c', 'rte_net_crc.c')
 deps += ['mbuf']
diff --git a/lib/librte_net/net_crc_sse.h b/lib/librte_net/net_crc_sse.h
index da815243..1c7b7a54 100644
--- a/lib/librte_net/net_crc_sse.h
+++ b/lib/librte_net/net_crc_sse.h
@@ -21,8 +21,8 @@ struct crc_pclmulqdq_ctx {
 	__m128i rk7_rk8;
 };
 
-struct crc_pclmulqdq_ctx crc32_eth_pclmulqdq __rte_aligned(16);
-struct crc_pclmulqdq_ctx crc16_ccitt_pclmulqdq __rte_aligned(16);
+static struct crc_pclmulqdq_ctx crc32_eth_pclmulqdq __rte_aligned(16);
+static struct crc_pclmulqdq_ctx crc16_ccitt_pclmulqdq __rte_aligned(16);
 /**
  * @brief Performs one folding round
  *
diff --git a/lib/librte_net/rte_ether.h b/lib/librte_net/rte_ether.h
index bee2b34f..c2c5e249 100644
--- a/lib/librte_net/rte_ether.h
+++ b/lib/librte_net/rte_ether.h
@@ -306,6 +306,8 @@ struct vxlan_hdr {
 #define ETHER_TYPE_SLOW 0x8809 /**< Slow protocols (LACP and Marker). */
 #define ETHER_TYPE_TEB  0x6558 /**< Transparent Ethernet Bridging. */
 #define ETHER_TYPE_LLDP 0x88CC /**< LLDP Protocol. */
+#define ETHER_TYPE_MPLS 0x8847 /**< MPLS ethertype. */
+#define ETHER_TYPE_MPLSM 0x8848 /**< MPLS multicast ethertype. */
 
 #define ETHER_VXLAN_HLEN (sizeof(struct udp_hdr) + sizeof(struct vxlan_hdr))
 /**< VXLAN tunnel header length. */
diff --git a/lib/librte_net/rte_mpls.h b/lib/librte_net/rte_mpls.h
new file mode 100644
index 00000000..11d26ba3
--- /dev/null
+++ b/lib/librte_net/rte_mpls.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2016 6WIND S.A.
+ */
+
+#ifndef _RTE_MPLS_H_
+#define _RTE_MPLS_H_
+
+/**
+ * @file
+ *
+ * MPLS-related defines
+ */
+
+#include <stdint.h>
+#include <rte_byteorder.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * MPLS header.
+ */
+struct mpls_hdr {
+	uint16_t tag_msb;   /**< Label(msb). */
+#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+	uint8_t tag_lsb:4;  /**< Label(lsb). */
+	uint8_t tc:3;       /**< Traffic class. */
+	uint8_t bs:1;       /**< Bottom of stack. */
+#else
+	uint8_t bs:1;       /**< Bottom of stack. */
+	uint8_t tc:3;       /**< Traffic class. */
+	uint8_t tag_lsb:4;  /**< label(lsb) */
+#endif
+	uint8_t  ttl;       /**< Time to live. */
+} __attribute__((__packed__));
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RTE_MPLS_H_ */
diff --git a/lib/librte_net/rte_net.c b/lib/librte_net/rte_net.c
index 9eb7c743..378a4126 100644
--- a/lib/librte_net/rte_net.c
+++ b/lib/librte_net/rte_net.c
@@ -13,6 +13,7 @@
 #include <rte_udp.h>
 #include <rte_sctp.h>
 #include <rte_gre.h>
+#include <rte_mpls.h>
 #include <rte_net.h>
 
 /* get l3 packet type from ip6 next protocol */
@@ -274,9 +275,27 @@ uint32_t rte_net_get_ptype(const struct rte_mbuf *m,
 		off += 2 * sizeof(*vh);
 		hdr_lens->l2_len += 2 * sizeof(*vh);
 		proto = vh->eth_proto;
+	} else if ((proto == rte_cpu_to_be_16(ETHER_TYPE_MPLS)) ||
+		(proto == rte_cpu_to_be_16(ETHER_TYPE_MPLSM))) {
+		unsigned int i;
+		const struct mpls_hdr *mh;
+		struct mpls_hdr mh_copy;
+
+#define MAX_MPLS_HDR 5
+		for (i = 0; i < MAX_MPLS_HDR; i++) {
+			mh = rte_pktmbuf_read(m, off + (i * sizeof(*mh)),
+				sizeof(*mh), &mh_copy);
+			if (unlikely(mh == NULL))
+				return pkt_type;
+		}
+		if (i == MAX_MPLS_HDR)
+			return pkt_type;
+		pkt_type = RTE_PTYPE_L2_ETHER_MPLS;
+		hdr_lens->l2_len += (sizeof(*mh) * i);
+		return pkt_type;
 	}
 
- l3:
+l3:
 	if ((layers & RTE_PTYPE_L3_MASK) == 0)
 		return pkt_type;
 
diff --git a/lib/librte_net/rte_net.h b/lib/librte_net/rte_net.h
index b6ab6e1d..e59760a0 100644
--- a/lib/librte_net/rte_net.h
+++ b/lib/librte_net/rte_net.h
@@ -122,14 +122,16 @@ rte_net_intel_cksum_flags_prepare(struct rte_mbuf *m, uint64_t ol_flags)
 		(ol_flags & PKT_TX_OUTER_IPV6))
 		inner_l3_offset += m->outer_l2_len + m->outer_l3_len;
 
-	if ((ol_flags & PKT_TX_UDP_CKSUM) == PKT_TX_UDP_CKSUM) {
-		if (ol_flags & PKT_TX_IPV4) {
-			ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *,
-					inner_l3_offset);
+	if (ol_flags & PKT_TX_IPV4) {
+		ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *,
+				inner_l3_offset);
 
-			if (ol_flags & PKT_TX_IP_CKSUM)
-				ipv4_hdr->hdr_checksum = 0;
+		if (ol_flags & PKT_TX_IP_CKSUM)
+			ipv4_hdr->hdr_checksum = 0;
+	}
 
+	if ((ol_flags & PKT_TX_UDP_CKSUM) == PKT_TX_UDP_CKSUM) {
+		if (ol_flags & PKT_TX_IPV4) {
 			udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr +
 					m->l3_len);
 			udp_hdr->dgram_cksum = rte_ipv4_phdr_cksum(ipv4_hdr,
@@ -146,12 +148,6 @@ rte_net_intel_cksum_flags_prepare(struct rte_mbuf *m, uint64_t ol_flags)
 	} else if ((ol_flags & PKT_TX_TCP_CKSUM) ||
 			(ol_flags & PKT_TX_TCP_SEG)) {
 		if (ol_flags & PKT_TX_IPV4) {
-			ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *,
-					inner_l3_offset);
-
-			if (ol_flags & PKT_TX_IP_CKSUM)
-				ipv4_hdr->hdr_checksum = 0;
-
 			/* non-TSO tcp or TSO */
 			tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr +
 					m->l3_len);
diff --git a/lib/librte_pdump/Makefile b/lib/librte_pdump/Makefile
index 0ee0fa1a..b241151d 100644
--- a/lib/librte_pdump/Makefile
+++ b/lib/librte_pdump/Makefile
@@ -8,8 +8,6 @@ LIB = librte_pdump.a
 
 CFLAGS += -DALLOW_EXPERIMENTAL_API
 CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
-CFLAGS += -D_GNU_SOURCE
-LDLIBS += -lpthread
 LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_ethdev
 
 EXPORT_MAP := rte_pdump_version.map
diff --git a/lib/librte_pipeline/Makefile b/lib/librte_pipeline/Makefile
index 84afe98c..cf265503 100644
--- a/lib/librte_pipeline/Makefile
+++ b/lib/librte_pipeline/Makefile
@@ -12,7 +12,7 @@ CFLAGS += -DALLOW_EXPERIMENTAL_API
 CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS)
 LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_table
-LDLIBS += -lrte_port -lrte_meter -lrte_sched
+LDLIBS += -lrte_port -lrte_meter -lrte_sched -lrte_cryptodev
 
 EXPORT_MAP := rte_pipeline_version.map
 
diff --git a/lib/librte_pipeline/meson.build b/lib/librte_pipeline/meson.build
index dc16ab42..04e5f517 100644
--- a/lib/librte_pipeline/meson.build
+++ b/lib/librte_pipeline/meson.build
@@ -5,4 +5,4 @@ version = 3
 allow_experimental_apis = true
 sources = files('rte_pipeline.c', 'rte_port_in_action.c', 'rte_table_action.c')
 headers = files('rte_pipeline.h', 'rte_port_in_action.h', 'rte_table_action.h')
-deps += ['port', 'table', 'meter', 'sched']
+deps += ['port', 'table', 'meter', 'sched', 'cryptodev']
diff --git a/lib/librte_pipeline/rte_pipeline.c b/lib/librte_pipeline/rte_pipeline.c
index 0cb8b804..2c047a8a 100644
--- a/lib/librte_pipeline/rte_pipeline.c
+++ b/lib/librte_pipeline/rte_pipeline.c
@@ -178,8 +178,7 @@ rte_pipeline_check_params(struct rte_pipeline_params *params)
 	}
 
 	/* socket */
-	if ((params->socket_id < 0) ||
-	    (params->socket_id >= RTE_MAX_NUMA_NODES)) {
+	if (params->socket_id < 0) {
 		RTE_LOG(ERR, PIPELINE,
 			"%s: Incorrect value for parameter socket_id\n",
 			__func__);
diff --git a/lib/librte_pipeline/rte_pipeline_version.map b/lib/librte_pipeline/rte_pipeline_version.map
index d820b22f..420f065d 100644
--- a/lib/librte_pipeline/rte_pipeline_version.map
+++ b/lib/librte_pipeline/rte_pipeline_version.map
@@ -72,4 +72,5 @@ EXPERIMENTAL {
 	rte_table_action_stats_read;
 	rte_table_action_time_read;
 	rte_table_action_ttl_read;
+	rte_table_action_crypto_sym_session_get;
 };
diff --git a/lib/librte_pipeline/rte_table_action.c b/lib/librte_pipeline/rte_table_action.c
index 83ffa5de..537e6593 100644
--- a/lib/librte_pipeline/rte_table_action.c
+++ b/lib/librte_pipeline/rte_table_action.c
@@ -1,7 +1,6 @@
 /* SPDX-License-Identifier: BSD-3-Clause
  * Copyright(c) 2010-2018 Intel Corporation
  */
-
 #include <stdlib.h>
 #include <string.h>
 
@@ -15,6 +14,8 @@
 #include <rte_esp.h>
 #include <rte_tcp.h>
 #include <rte_udp.h>
+#include <rte_cryptodev.h>
+#include <rte_cryptodev_pmd.h>
 
 #include "rte_table_action.h"
 
@@ -430,6 +431,7 @@ encap_valid(enum rte_table_action_encap_type encap)
 	case RTE_TABLE_ACTION_ENCAP_QINQ:
 	case RTE_TABLE_ACTION_ENCAP_MPLS:
 	case RTE_TABLE_ACTION_ENCAP_PPPOE:
+	case RTE_TABLE_ACTION_ENCAP_VXLAN:
 		return 1;
 	default:
 		return 0;
@@ -498,6 +500,38 @@ struct encap_pppoe_data {
 	struct pppoe_ppp_hdr pppoe_ppp;
 } __attribute__((__packed__));
 
+#define IP_PROTO_UDP                                       17
+
+struct encap_vxlan_ipv4_data {
+	struct ether_hdr ether;
+	struct ipv4_hdr ipv4;
+	struct udp_hdr udp;
+	struct vxlan_hdr vxlan;
+} __attribute__((__packed__));
+
+struct encap_vxlan_ipv4_vlan_data {
+	struct ether_hdr ether;
+	struct vlan_hdr vlan;
+	struct ipv4_hdr ipv4;
+	struct udp_hdr udp;
+	struct vxlan_hdr vxlan;
+} __attribute__((__packed__));
+
+struct encap_vxlan_ipv6_data {
+	struct ether_hdr ether;
+	struct ipv6_hdr ipv6;
+	struct udp_hdr udp;
+	struct vxlan_hdr vxlan;
+} __attribute__((__packed__));
+
+struct encap_vxlan_ipv6_vlan_data {
+	struct ether_hdr ether;
+	struct vlan_hdr vlan;
+	struct ipv6_hdr ipv6;
+	struct udp_hdr udp;
+	struct vxlan_hdr vxlan;
+} __attribute__((__packed__));
+
 static size_t
 encap_data_size(struct rte_table_action_encap_config *encap)
 {
@@ -517,6 +551,18 @@ encap_data_size(struct rte_table_action_encap_config *encap)
 	case 1LLU << RTE_TABLE_ACTION_ENCAP_PPPOE:
 		return sizeof(struct encap_pppoe_data);
 
+	case 1LLU << RTE_TABLE_ACTION_ENCAP_VXLAN:
+		if (encap->vxlan.ip_version)
+			if (encap->vxlan.vlan)
+				return sizeof(struct encap_vxlan_ipv4_vlan_data);
+			else
+				return sizeof(struct encap_vxlan_ipv4_data);
+		else
+			if (encap->vxlan.vlan)
+				return sizeof(struct encap_vxlan_ipv6_vlan_data);
+			else
+				return sizeof(struct encap_vxlan_ipv6_data);
+
 	default:
 		return 0;
 	}
@@ -550,6 +596,9 @@ encap_apply_check(struct rte_table_action_encap_params *p,
 	case RTE_TABLE_ACTION_ENCAP_PPPOE:
 		return 0;
 
+	case RTE_TABLE_ACTION_ENCAP_VXLAN:
+		return 0;
+
 	default:
 		return -EINVAL;
 	}
@@ -679,6 +728,168 @@ encap_pppoe_apply(void *data,
 }
 
 static int
+encap_vxlan_apply(void *data,
+	struct rte_table_action_encap_params *p,
+	struct rte_table_action_encap_config *cfg)
+{
+	if ((p->vxlan.vxlan.vni > 0xFFFFFF) ||
+		(cfg->vxlan.ip_version && (p->vxlan.ipv4.dscp > 0x3F)) ||
+		(!cfg->vxlan.ip_version && (p->vxlan.ipv6.flow_label > 0xFFFFF)) ||
+		(!cfg->vxlan.ip_version && (p->vxlan.ipv6.dscp > 0x3F)) ||
+		(cfg->vxlan.vlan && (p->vxlan.vlan.vid > 0xFFF)))
+		return -1;
+
+	if (cfg->vxlan.ip_version)
+		if (cfg->vxlan.vlan) {
+			struct encap_vxlan_ipv4_vlan_data *d = data;
+
+			/* Ethernet */
+			ether_addr_copy(&p->vxlan.ether.da, &d->ether.d_addr);
+			ether_addr_copy(&p->vxlan.ether.sa, &d->ether.s_addr);
+			d->ether.ether_type = rte_htons(ETHER_TYPE_VLAN);
+
+			/* VLAN */
+			d->vlan.vlan_tci = rte_htons(VLAN(p->vxlan.vlan.pcp,
+				p->vxlan.vlan.dei,
+				p->vxlan.vlan.vid));
+			d->vlan.eth_proto = rte_htons(ETHER_TYPE_IPv4);
+
+			/* IPv4*/
+			d->ipv4.version_ihl = 0x45;
+			d->ipv4.type_of_service = p->vxlan.ipv4.dscp << 2;
+			d->ipv4.total_length = 0; /* not pre-computed */
+			d->ipv4.packet_id = 0;
+			d->ipv4.fragment_offset = 0;
+			d->ipv4.time_to_live = p->vxlan.ipv4.ttl;
+			d->ipv4.next_proto_id = IP_PROTO_UDP;
+			d->ipv4.hdr_checksum = 0;
+			d->ipv4.src_addr = rte_htonl(p->vxlan.ipv4.sa);
+			d->ipv4.dst_addr = rte_htonl(p->vxlan.ipv4.da);
+
+			d->ipv4.hdr_checksum = rte_ipv4_cksum(&d->ipv4);
+
+			/* UDP */
+			d->udp.src_port = rte_htons(p->vxlan.udp.sp);
+			d->udp.dst_port = rte_htons(p->vxlan.udp.dp);
+			d->udp.dgram_len = 0; /* not pre-computed */
+			d->udp.dgram_cksum = 0;
+
+			/* VXLAN */
+			d->vxlan.vx_flags = rte_htonl(0x08000000);
+			d->vxlan.vx_vni = rte_htonl(p->vxlan.vxlan.vni << 8);
+
+			return 0;
+		} else {
+			struct encap_vxlan_ipv4_data *d = data;
+
+			/* Ethernet */
+			ether_addr_copy(&p->vxlan.ether.da, &d->ether.d_addr);
+			ether_addr_copy(&p->vxlan.ether.sa, &d->ether.s_addr);
+			d->ether.ether_type = rte_htons(ETHER_TYPE_IPv4);
+
+			/* IPv4*/
+			d->ipv4.version_ihl = 0x45;
+			d->ipv4.type_of_service = p->vxlan.ipv4.dscp << 2;
+			d->ipv4.total_length = 0; /* not pre-computed */
+			d->ipv4.packet_id = 0;
+			d->ipv4.fragment_offset = 0;
+			d->ipv4.time_to_live = p->vxlan.ipv4.ttl;
+			d->ipv4.next_proto_id = IP_PROTO_UDP;
+			d->ipv4.hdr_checksum = 0;
+			d->ipv4.src_addr = rte_htonl(p->vxlan.ipv4.sa);
+			d->ipv4.dst_addr = rte_htonl(p->vxlan.ipv4.da);
+
+			d->ipv4.hdr_checksum = rte_ipv4_cksum(&d->ipv4);
+
+			/* UDP */
+			d->udp.src_port = rte_htons(p->vxlan.udp.sp);
+			d->udp.dst_port = rte_htons(p->vxlan.udp.dp);
+			d->udp.dgram_len = 0; /* not pre-computed */
+			d->udp.dgram_cksum = 0;
+
+			/* VXLAN */
+			d->vxlan.vx_flags = rte_htonl(0x08000000);
+			d->vxlan.vx_vni = rte_htonl(p->vxlan.vxlan.vni << 8);
+
+			return 0;
+		}
+	else
+		if (cfg->vxlan.vlan) {
+			struct encap_vxlan_ipv6_vlan_data *d = data;
+
+			/* Ethernet */
+			ether_addr_copy(&p->vxlan.ether.da, &d->ether.d_addr);
+			ether_addr_copy(&p->vxlan.ether.sa, &d->ether.s_addr);
+			d->ether.ether_type = rte_htons(ETHER_TYPE_VLAN);
+
+			/* VLAN */
+			d->vlan.vlan_tci = rte_htons(VLAN(p->vxlan.vlan.pcp,
+				p->vxlan.vlan.dei,
+				p->vxlan.vlan.vid));
+			d->vlan.eth_proto = rte_htons(ETHER_TYPE_IPv6);
+
+			/* IPv6*/
+			d->ipv6.vtc_flow = rte_htonl((6 << 28) |
+				(p->vxlan.ipv6.dscp << 22) |
+				p->vxlan.ipv6.flow_label);
+			d->ipv6.payload_len = 0; /* not pre-computed */
+			d->ipv6.proto = IP_PROTO_UDP;
+			d->ipv6.hop_limits = p->vxlan.ipv6.hop_limit;
+			memcpy(d->ipv6.src_addr,
+				p->vxlan.ipv6.sa,
+				sizeof(p->vxlan.ipv6.sa));
+			memcpy(d->ipv6.dst_addr,
+				p->vxlan.ipv6.da,
+				sizeof(p->vxlan.ipv6.da));
+
+			/* UDP */
+			d->udp.src_port = rte_htons(p->vxlan.udp.sp);
+			d->udp.dst_port = rte_htons(p->vxlan.udp.dp);
+			d->udp.dgram_len = 0; /* not pre-computed */
+			d->udp.dgram_cksum = 0;
+
+			/* VXLAN */
+			d->vxlan.vx_flags = rte_htonl(0x08000000);
+			d->vxlan.vx_vni = rte_htonl(p->vxlan.vxlan.vni << 8);
+
+			return 0;
+		} else {
+			struct encap_vxlan_ipv6_data *d = data;
+
+			/* Ethernet */
+			ether_addr_copy(&p->vxlan.ether.da, &d->ether.d_addr);
+			ether_addr_copy(&p->vxlan.ether.sa, &d->ether.s_addr);
+			d->ether.ether_type = rte_htons(ETHER_TYPE_IPv6);
+
+			/* IPv6*/
+			d->ipv6.vtc_flow = rte_htonl((6 << 28) |
+				(p->vxlan.ipv6.dscp << 22) |
+				p->vxlan.ipv6.flow_label);
+			d->ipv6.payload_len = 0; /* not pre-computed */
+			d->ipv6.proto = IP_PROTO_UDP;
+			d->ipv6.hop_limits = p->vxlan.ipv6.hop_limit;
+			memcpy(d->ipv6.src_addr,
+				p->vxlan.ipv6.sa,
+				sizeof(p->vxlan.ipv6.sa));
+			memcpy(d->ipv6.dst_addr,
+				p->vxlan.ipv6.da,
+				sizeof(p->vxlan.ipv6.da));
+
+			/* UDP */
+			d->udp.src_port = rte_htons(p->vxlan.udp.sp);
+			d->udp.dst_port = rte_htons(p->vxlan.udp.dp);
+			d->udp.dgram_len = 0; /* not pre-computed */
+			d->udp.dgram_cksum = 0;
+
+			/* VXLAN */
+			d->vxlan.vx_flags = rte_htonl(0x08000000);
+			d->vxlan.vx_vni = rte_htonl(p->vxlan.vxlan.vni << 8);
+
+			return 0;
+		}
+}
+
+static int
 encap_apply(void *data,
 	struct rte_table_action_encap_params *p,
 	struct rte_table_action_encap_config *cfg,
@@ -707,11 +918,31 @@ encap_apply(void *data,
 	case RTE_TABLE_ACTION_ENCAP_PPPOE:
 		return encap_pppoe_apply(data, p);
 
+	case RTE_TABLE_ACTION_ENCAP_VXLAN:
+		return encap_vxlan_apply(data, p, cfg);
+
 	default:
 		return -EINVAL;
 	}
 }
 
+static __rte_always_inline uint16_t
+encap_vxlan_ipv4_checksum_update(uint16_t cksum0,
+	uint16_t total_length)
+{
+	int32_t cksum1;
+
+	cksum1 = cksum0;
+	cksum1 = ~cksum1 & 0xFFFF;
+
+	/* Add total length (one's complement logic) */
+	cksum1 += total_length;
+	cksum1 = (cksum1 & 0xFFFF) + (cksum1 >> 16);
+	cksum1 = (cksum1 & 0xFFFF) + (cksum1 >> 16);
+
+	return (uint16_t)(~cksum1);
+}
+
 static __rte_always_inline void *
 encap(void *dst, const void *src, size_t n)
 {
@@ -720,6 +951,118 @@ encap(void *dst, const void *src, size_t n)
 }
 
 static __rte_always_inline void
+pkt_work_encap_vxlan_ipv4(struct rte_mbuf *mbuf,
+	struct encap_vxlan_ipv4_data *vxlan_tbl,
+	struct rte_table_action_encap_config *cfg)
+{
+	uint32_t ether_offset = cfg->vxlan.data_offset;
+	void *ether = RTE_MBUF_METADATA_UINT32_PTR(mbuf, ether_offset);
+	struct encap_vxlan_ipv4_data *vxlan_pkt;
+	uint16_t ether_length, ipv4_total_length, ipv4_hdr_cksum, udp_length;
+
+	ether_length = (uint16_t)mbuf->pkt_len;
+	ipv4_total_length = ether_length +
+		(sizeof(struct vxlan_hdr) +
+		sizeof(struct udp_hdr) +
+		sizeof(struct ipv4_hdr));
+	ipv4_hdr_cksum = encap_vxlan_ipv4_checksum_update(vxlan_tbl->ipv4.hdr_checksum,
+		rte_htons(ipv4_total_length));
+	udp_length = ether_length +
+		(sizeof(struct vxlan_hdr) +
+		sizeof(struct udp_hdr));
+
+	vxlan_pkt = encap(ether, vxlan_tbl, sizeof(*vxlan_tbl));
+	vxlan_pkt->ipv4.total_length = rte_htons(ipv4_total_length);
+	vxlan_pkt->ipv4.hdr_checksum = ipv4_hdr_cksum;
+	vxlan_pkt->udp.dgram_len = rte_htons(udp_length);
+
+	mbuf->data_off = ether_offset - (sizeof(struct rte_mbuf) + sizeof(*vxlan_pkt));
+	mbuf->pkt_len = mbuf->data_len = ether_length + sizeof(*vxlan_pkt);
+}
+
+static __rte_always_inline void
+pkt_work_encap_vxlan_ipv4_vlan(struct rte_mbuf *mbuf,
+	struct encap_vxlan_ipv4_vlan_data *vxlan_tbl,
+	struct rte_table_action_encap_config *cfg)
+{
+	uint32_t ether_offset = cfg->vxlan.data_offset;
+	void *ether = RTE_MBUF_METADATA_UINT32_PTR(mbuf, ether_offset);
+	struct encap_vxlan_ipv4_vlan_data *vxlan_pkt;
+	uint16_t ether_length, ipv4_total_length, ipv4_hdr_cksum, udp_length;
+
+	ether_length = (uint16_t)mbuf->pkt_len;
+	ipv4_total_length = ether_length +
+		(sizeof(struct vxlan_hdr) +
+		sizeof(struct udp_hdr) +
+		sizeof(struct ipv4_hdr));
+	ipv4_hdr_cksum = encap_vxlan_ipv4_checksum_update(vxlan_tbl->ipv4.hdr_checksum,
+		rte_htons(ipv4_total_length));
+	udp_length = ether_length +
+		(sizeof(struct vxlan_hdr) +
+		sizeof(struct udp_hdr));
+
+	vxlan_pkt = encap(ether, vxlan_tbl, sizeof(*vxlan_tbl));
+	vxlan_pkt->ipv4.total_length = rte_htons(ipv4_total_length);
+	vxlan_pkt->ipv4.hdr_checksum = ipv4_hdr_cksum;
+	vxlan_pkt->udp.dgram_len = rte_htons(udp_length);
+
+	mbuf->data_off = ether_offset - (sizeof(struct rte_mbuf) + sizeof(*vxlan_pkt));
+	mbuf->pkt_len = mbuf->data_len = ether_length + sizeof(*vxlan_pkt);
+}
+
+static __rte_always_inline void
+pkt_work_encap_vxlan_ipv6(struct rte_mbuf *mbuf,
+	struct encap_vxlan_ipv6_data *vxlan_tbl,
+	struct rte_table_action_encap_config *cfg)
+{
+	uint32_t ether_offset = cfg->vxlan.data_offset;
+	void *ether = RTE_MBUF_METADATA_UINT32_PTR(mbuf, ether_offset);
+	struct encap_vxlan_ipv6_data *vxlan_pkt;
+	uint16_t ether_length, ipv6_payload_length, udp_length;
+
+	ether_length = (uint16_t)mbuf->pkt_len;
+	ipv6_payload_length = ether_length +
+		(sizeof(struct vxlan_hdr) +
+		sizeof(struct udp_hdr));
+	udp_length = ether_length +
+		(sizeof(struct vxlan_hdr) +
+		sizeof(struct udp_hdr));
+
+	vxlan_pkt = encap(ether, vxlan_tbl, sizeof(*vxlan_tbl));
+	vxlan_pkt->ipv6.payload_len = rte_htons(ipv6_payload_length);
+	vxlan_pkt->udp.dgram_len = rte_htons(udp_length);
+
+	mbuf->data_off = ether_offset - (sizeof(struct rte_mbuf) + sizeof(*vxlan_pkt));
+	mbuf->pkt_len = mbuf->data_len = ether_length + sizeof(*vxlan_pkt);
+}
+
+static __rte_always_inline void
+pkt_work_encap_vxlan_ipv6_vlan(struct rte_mbuf *mbuf,
+	struct encap_vxlan_ipv6_vlan_data *vxlan_tbl,
+	struct rte_table_action_encap_config *cfg)
+{
+	uint32_t ether_offset = cfg->vxlan.data_offset;
+	void *ether = RTE_MBUF_METADATA_UINT32_PTR(mbuf, ether_offset);
+	struct encap_vxlan_ipv6_vlan_data *vxlan_pkt;
+	uint16_t ether_length, ipv6_payload_length, udp_length;
+
+	ether_length = (uint16_t)mbuf->pkt_len;
+	ipv6_payload_length = ether_length +
+		(sizeof(struct vxlan_hdr) +
+		sizeof(struct udp_hdr));
+	udp_length = ether_length +
+		(sizeof(struct vxlan_hdr) +
+		sizeof(struct udp_hdr));
+
+	vxlan_pkt = encap(ether, vxlan_tbl, sizeof(*vxlan_tbl));
+	vxlan_pkt->ipv6.payload_len = rte_htons(ipv6_payload_length);
+	vxlan_pkt->udp.dgram_len = rte_htons(udp_length);
+
+	mbuf->data_off = ether_offset - (sizeof(struct rte_mbuf) + sizeof(*vxlan_pkt));
+	mbuf->pkt_len = mbuf->data_len = ether_length + sizeof(*vxlan_pkt);
+}
+
+static __rte_always_inline void
 pkt_work_encap(struct rte_mbuf *mbuf,
 	void *data,
 	struct rte_table_action_encap_config *cfg,
@@ -776,6 +1119,20 @@ pkt_work_encap(struct rte_mbuf *mbuf,
 		break;
 	}
 
+	case 1LLU << RTE_TABLE_ACTION_ENCAP_VXLAN:
+	{
+		if (cfg->vxlan.ip_version)
+			if (cfg->vxlan.vlan)
+				pkt_work_encap_vxlan_ipv4_vlan(mbuf, data, cfg);
+			else
+				pkt_work_encap_vxlan_ipv4(mbuf, data, cfg);
+		else
+			if (cfg->vxlan.vlan)
+				pkt_work_encap_vxlan_ipv6_vlan(mbuf, data, cfg);
+			else
+				pkt_work_encap_vxlan_ipv6(mbuf, data, cfg);
+	}
+
 	default:
 		break;
 	}
@@ -1219,6 +1576,562 @@ pkt_work_time(struct time_data *data,
 	data->time = time;
 }
 
+
+/**
+ * RTE_TABLE_ACTION_CRYPTO
+ */
+
+#define CRYPTO_OP_MASK_CIPHER	0x1
+#define CRYPTO_OP_MASK_AUTH	0x2
+#define CRYPTO_OP_MASK_AEAD	0x4
+
+struct crypto_op_sym_iv_aad {
+	struct rte_crypto_op op;
+	struct rte_crypto_sym_op sym_op;
+	union {
+		struct {
+			uint8_t cipher_iv[
+				RTE_TABLE_ACTION_SYM_CRYPTO_IV_SIZE_MAX];
+			uint8_t auth_iv[
+				RTE_TABLE_ACTION_SYM_CRYPTO_IV_SIZE_MAX];
+		} cipher_auth;
+
+		struct {
+			uint8_t iv[RTE_TABLE_ACTION_SYM_CRYPTO_IV_SIZE_MAX];
+			uint8_t aad[RTE_TABLE_ACTION_SYM_CRYPTO_AAD_SIZE_MAX];
+		} aead_iv_aad;
+
+	} iv_aad;
+};
+
+struct sym_crypto_data {
+
+	union {
+		struct {
+
+			/** Length of cipher iv. */
+			uint16_t cipher_iv_len;
+
+			/** Offset from start of IP header to the cipher iv. */
+			uint16_t cipher_iv_data_offset;
+
+			/** Length of cipher iv to be updated in the mbuf. */
+			uint16_t cipher_iv_update_len;
+
+			/** Offset from start of IP header to the auth iv. */
+			uint16_t auth_iv_data_offset;
+
+			/** Length of auth iv in the mbuf. */
+			uint16_t auth_iv_len;
+
+			/** Length of auth iv to be updated in the mbuf. */
+			uint16_t auth_iv_update_len;
+
+		} cipher_auth;
+		struct {
+
+			/** Length of iv. */
+			uint16_t iv_len;
+
+			/** Offset from start of IP header to the aead iv. */
+			uint16_t iv_data_offset;
+
+			/** Length of iv to be updated in the mbuf. */
+			uint16_t iv_update_len;
+
+			/** Length of aad */
+			uint16_t aad_len;
+
+			/** Offset from start of IP header to the aad. */
+			uint16_t aad_data_offset;
+
+			/** Length of aad to updated in the mbuf. */
+			uint16_t aad_update_len;
+
+		} aead;
+	};
+
+	/** Offset from start of IP header to the data. */
+	uint16_t data_offset;
+
+	/** Digest length. */
+	uint16_t digest_len;
+
+	/** block size */
+	uint16_t block_size;
+
+	/** Mask of crypto operation */
+	uint16_t op_mask;
+
+	/** Session pointer. */
+	struct rte_cryptodev_sym_session *session;
+
+	/** Direction of crypto, encrypt or decrypt */
+	uint16_t direction;
+
+	/** Private data size to store cipher iv / aad. */
+	uint8_t iv_aad_data[32];
+
+} __attribute__((__packed__));
+
+static int
+sym_crypto_cfg_check(struct rte_table_action_sym_crypto_config *cfg)
+{
+	if (!rte_cryptodev_pmd_is_valid_dev(cfg->cryptodev_id))
+		return -EINVAL;
+	if (cfg->mp_create == NULL || cfg->mp_init == NULL)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int
+get_block_size(const struct rte_crypto_sym_xform *xform, uint8_t cdev_id)
+{
+	struct rte_cryptodev_info dev_info;
+	const struct rte_cryptodev_capabilities *cap;
+	uint32_t i;
+
+	rte_cryptodev_info_get(cdev_id, &dev_info);
+
+	for (i = 0;; i++) {
+		cap = &dev_info.capabilities[i];
+		if (!cap)
+			break;
+
+		if (cap->sym.xform_type != xform->type)
+			continue;
+
+		if ((xform->type == RTE_CRYPTO_SYM_XFORM_CIPHER) &&
+				(cap->sym.cipher.algo == xform->cipher.algo))
+			return cap->sym.cipher.block_size;
+
+		if ((xform->type == RTE_CRYPTO_SYM_XFORM_AEAD) &&
+				(cap->sym.aead.algo == xform->aead.algo))
+			return cap->sym.aead.block_size;
+
+		if (xform->type == RTE_CRYPTO_SYM_XFORM_NOT_SPECIFIED)
+			break;
+	}
+
+	return -1;
+}
+
+static int
+sym_crypto_apply(struct sym_crypto_data *data,
+	struct rte_table_action_sym_crypto_config *cfg,
+	struct rte_table_action_sym_crypto_params *p)
+{
+	const struct rte_crypto_cipher_xform *cipher_xform = NULL;
+	const struct rte_crypto_auth_xform *auth_xform = NULL;
+	const struct rte_crypto_aead_xform *aead_xform = NULL;
+	struct rte_crypto_sym_xform *xform = p->xform;
+	struct rte_cryptodev_sym_session *session;
+	int ret;
+
+	memset(data, 0, sizeof(*data));
+
+	while (xform) {
+		if (xform->type == RTE_CRYPTO_SYM_XFORM_CIPHER) {
+			cipher_xform = &xform->cipher;
+
+			if (cipher_xform->iv.length >
+				RTE_TABLE_ACTION_SYM_CRYPTO_IV_SIZE_MAX)
+				return -ENOMEM;
+			if (cipher_xform->iv.offset !=
+					RTE_TABLE_ACTION_SYM_CRYPTO_IV_OFFSET)
+				return -EINVAL;
+
+			ret = get_block_size(xform, cfg->cryptodev_id);
+			if (ret < 0)
+				return -1;
+			data->block_size = (uint16_t)ret;
+			data->op_mask |= CRYPTO_OP_MASK_CIPHER;
+
+			data->cipher_auth.cipher_iv_len =
+					cipher_xform->iv.length;
+			data->cipher_auth.cipher_iv_data_offset = (uint16_t)
+					p->cipher_auth.cipher_iv_update.offset;
+			data->cipher_auth.cipher_iv_update_len = (uint16_t)
+					p->cipher_auth.cipher_iv_update.length;
+
+			rte_memcpy(data->iv_aad_data,
+					p->cipher_auth.cipher_iv.val,
+					p->cipher_auth.cipher_iv.length);
+
+			data->direction = cipher_xform->op;
+
+		} else if (xform->type == RTE_CRYPTO_SYM_XFORM_AUTH) {
+			auth_xform = &xform->auth;
+			if (auth_xform->iv.length >
+				RTE_TABLE_ACTION_SYM_CRYPTO_IV_SIZE_MAX)
+				return -ENOMEM;
+			data->op_mask |= CRYPTO_OP_MASK_AUTH;
+
+			data->cipher_auth.auth_iv_len = auth_xform->iv.length;
+			data->cipher_auth.auth_iv_data_offset = (uint16_t)
+					p->cipher_auth.auth_iv_update.offset;
+			data->cipher_auth.auth_iv_update_len = (uint16_t)
+					p->cipher_auth.auth_iv_update.length;
+			data->digest_len = auth_xform->digest_length;
+
+			data->direction = (auth_xform->op ==
+					RTE_CRYPTO_AUTH_OP_GENERATE) ?
+					RTE_CRYPTO_CIPHER_OP_ENCRYPT :
+					RTE_CRYPTO_CIPHER_OP_DECRYPT;
+
+		} else if (xform->type == RTE_CRYPTO_SYM_XFORM_AEAD) {
+			aead_xform = &xform->aead;
+
+			if ((aead_xform->iv.length >
+				RTE_TABLE_ACTION_SYM_CRYPTO_IV_SIZE_MAX) || (
+				aead_xform->aad_length >
+				RTE_TABLE_ACTION_SYM_CRYPTO_AAD_SIZE_MAX))
+				return -EINVAL;
+			if (aead_xform->iv.offset !=
+					RTE_TABLE_ACTION_SYM_CRYPTO_IV_OFFSET)
+				return -EINVAL;
+
+			ret = get_block_size(xform, cfg->cryptodev_id);
+			if (ret < 0)
+				return -1;
+			data->block_size = (uint16_t)ret;
+			data->op_mask |= CRYPTO_OP_MASK_AEAD;
+
+			data->digest_len = aead_xform->digest_length;
+			data->aead.iv_len = aead_xform->iv.length;
+			data->aead.aad_len = aead_xform->aad_length;
+
+			data->aead.iv_data_offset = (uint16_t)
+					p->aead.iv_update.offset;
+			data->aead.iv_update_len = (uint16_t)
+					p->aead.iv_update.length;
+			data->aead.aad_data_offset = (uint16_t)
+					p->aead.aad_update.offset;
+			data->aead.aad_update_len = (uint16_t)
+					p->aead.aad_update.length;
+
+			rte_memcpy(data->iv_aad_data,
+					p->aead.iv.val,
+					p->aead.iv.length);
+
+			rte_memcpy(data->iv_aad_data + p->aead.iv.length,
+					p->aead.aad.val,
+					p->aead.aad.length);
+
+			data->direction = (aead_xform->op ==
+					RTE_CRYPTO_AEAD_OP_ENCRYPT) ?
+					RTE_CRYPTO_CIPHER_OP_ENCRYPT :
+					RTE_CRYPTO_CIPHER_OP_DECRYPT;
+		} else
+			return -EINVAL;
+
+		xform = xform->next;
+	}
+
+	if (auth_xform && auth_xform->iv.length) {
+		if (cipher_xform) {
+			if (auth_xform->iv.offset !=
+					RTE_TABLE_ACTION_SYM_CRYPTO_IV_OFFSET +
+					cipher_xform->iv.length)
+				return -EINVAL;
+
+			rte_memcpy(data->iv_aad_data + cipher_xform->iv.length,
+					p->cipher_auth.auth_iv.val,
+					p->cipher_auth.auth_iv.length);
+		} else {
+			rte_memcpy(data->iv_aad_data,
+					p->cipher_auth.auth_iv.val,
+					p->cipher_auth.auth_iv.length);
+		}
+	}
+
+	session = rte_cryptodev_sym_session_create(cfg->mp_create);
+	if (!session)
+		return -ENOMEM;
+
+	ret = rte_cryptodev_sym_session_init(cfg->cryptodev_id, session,
+			p->xform, cfg->mp_init);
+	if (ret < 0) {
+		rte_cryptodev_sym_session_free(session);
+		return ret;
+	}
+
+	data->data_offset = (uint16_t)p->data_offset;
+	data->session = session;
+
+	return 0;
+}
+
+static __rte_always_inline uint64_t
+pkt_work_sym_crypto(struct rte_mbuf *mbuf, struct sym_crypto_data *data,
+		struct rte_table_action_sym_crypto_config *cfg,
+		uint16_t ip_offset)
+{
+	struct crypto_op_sym_iv_aad *crypto_op = (struct crypto_op_sym_iv_aad *)
+			RTE_MBUF_METADATA_UINT8_PTR(mbuf, cfg->op_offset);
+	struct rte_crypto_op *op = &crypto_op->op;
+	struct rte_crypto_sym_op *sym = op->sym;
+	uint32_t pkt_offset = sizeof(*mbuf) + mbuf->data_off;
+	uint32_t payload_len = pkt_offset + mbuf->data_len - data->data_offset;
+
+	op->type = RTE_CRYPTO_OP_TYPE_SYMMETRIC;
+	op->sess_type = RTE_CRYPTO_OP_WITH_SESSION;
+	op->phys_addr = mbuf->buf_iova + cfg->op_offset - sizeof(*mbuf);
+	op->status = RTE_CRYPTO_OP_STATUS_NOT_PROCESSED;
+	sym->m_src = mbuf;
+	sym->m_dst = NULL;
+	sym->session = data->session;
+
+	/** pad the packet */
+	if (data->direction == RTE_CRYPTO_CIPHER_OP_ENCRYPT) {
+		uint32_t append_len = RTE_ALIGN_CEIL(payload_len,
+				data->block_size) - payload_len;
+
+		if (unlikely(rte_pktmbuf_append(mbuf, append_len +
+				data->digest_len) == NULL))
+			return 1;
+
+		payload_len += append_len;
+	} else
+		payload_len -= data->digest_len;
+
+	if (data->op_mask & CRYPTO_OP_MASK_CIPHER) {
+		/** prepare cipher op */
+		uint8_t *iv = crypto_op->iv_aad.cipher_auth.cipher_iv;
+
+		sym->cipher.data.length = payload_len;
+		sym->cipher.data.offset = data->data_offset - pkt_offset;
+
+		if (data->cipher_auth.cipher_iv_update_len) {
+			uint8_t *pkt_iv = RTE_MBUF_METADATA_UINT8_PTR(mbuf,
+				data->cipher_auth.cipher_iv_data_offset
+				+ ip_offset);
+
+			/** For encryption, update the pkt iv field, otherwise
+			 *  update the iv_aad_field
+			 **/
+			if (data->direction == RTE_CRYPTO_CIPHER_OP_ENCRYPT)
+				rte_memcpy(pkt_iv, data->iv_aad_data,
+					data->cipher_auth.cipher_iv_update_len);
+			else
+				rte_memcpy(data->iv_aad_data, pkt_iv,
+					data->cipher_auth.cipher_iv_update_len);
+		}
+
+		/** write iv */
+		rte_memcpy(iv, data->iv_aad_data,
+				data->cipher_auth.cipher_iv_len);
+	}
+
+	if (data->op_mask & CRYPTO_OP_MASK_AUTH) {
+		/** authentication always start from IP header. */
+		sym->auth.data.offset = ip_offset - pkt_offset;
+		sym->auth.data.length = mbuf->data_len - sym->auth.data.offset -
+				data->digest_len;
+		sym->auth.digest.data = rte_pktmbuf_mtod_offset(mbuf,
+				uint8_t *, rte_pktmbuf_pkt_len(mbuf) -
+				data->digest_len);
+		sym->auth.digest.phys_addr = rte_pktmbuf_iova_offset(mbuf,
+				rte_pktmbuf_pkt_len(mbuf) - data->digest_len);
+
+		if (data->cipher_auth.auth_iv_update_len) {
+			uint8_t *pkt_iv = RTE_MBUF_METADATA_UINT8_PTR(mbuf,
+					data->cipher_auth.auth_iv_data_offset
+					+ ip_offset);
+			uint8_t *data_iv = data->iv_aad_data +
+					data->cipher_auth.cipher_iv_len;
+
+			if (data->direction == RTE_CRYPTO_CIPHER_OP_ENCRYPT)
+				rte_memcpy(pkt_iv, data_iv,
+					data->cipher_auth.auth_iv_update_len);
+			else
+				rte_memcpy(data_iv, pkt_iv,
+					data->cipher_auth.auth_iv_update_len);
+		}
+
+		if (data->cipher_auth.auth_iv_len) {
+			/** prepare cipher op */
+			uint8_t *iv = crypto_op->iv_aad.cipher_auth.auth_iv;
+
+			rte_memcpy(iv, data->iv_aad_data +
+					data->cipher_auth.cipher_iv_len,
+					data->cipher_auth.auth_iv_len);
+		}
+	}
+
+	if (data->op_mask & CRYPTO_OP_MASK_AEAD) {
+		uint8_t *iv = crypto_op->iv_aad.aead_iv_aad.iv;
+		uint8_t *aad = crypto_op->iv_aad.aead_iv_aad.aad;
+
+		sym->aead.aad.data = aad;
+		sym->aead.aad.phys_addr = rte_pktmbuf_iova_offset(mbuf,
+				aad - rte_pktmbuf_mtod(mbuf, uint8_t *));
+		sym->aead.digest.data = rte_pktmbuf_mtod_offset(mbuf,
+				uint8_t *, rte_pktmbuf_pkt_len(mbuf) -
+				data->digest_len);
+		sym->aead.digest.phys_addr = rte_pktmbuf_iova_offset(mbuf,
+				rte_pktmbuf_pkt_len(mbuf) - data->digest_len);
+		sym->aead.data.offset = data->data_offset - pkt_offset;
+		sym->aead.data.length = payload_len;
+
+		if (data->aead.iv_update_len) {
+			uint8_t *pkt_iv = RTE_MBUF_METADATA_UINT8_PTR(mbuf,
+					data->aead.iv_data_offset + ip_offset);
+			uint8_t *data_iv = data->iv_aad_data;
+
+			if (data->direction == RTE_CRYPTO_CIPHER_OP_ENCRYPT)
+				rte_memcpy(pkt_iv, data_iv,
+						data->aead.iv_update_len);
+			else
+				rte_memcpy(data_iv, pkt_iv,
+					data->aead.iv_update_len);
+		}
+
+		rte_memcpy(iv, data->iv_aad_data, data->aead.iv_len);
+
+		if (data->aead.aad_update_len) {
+			uint8_t *pkt_aad = RTE_MBUF_METADATA_UINT8_PTR(mbuf,
+					data->aead.aad_data_offset + ip_offset);
+			uint8_t *data_aad = data->iv_aad_data +
+					data->aead.iv_len;
+
+			if (data->direction == RTE_CRYPTO_CIPHER_OP_ENCRYPT)
+				rte_memcpy(pkt_aad, data_aad,
+						data->aead.iv_update_len);
+			else
+				rte_memcpy(data_aad, pkt_aad,
+					data->aead.iv_update_len);
+		}
+
+		rte_memcpy(aad, data->iv_aad_data + data->aead.iv_len,
+					data->aead.aad_len);
+	}
+
+	return 0;
+}
+
+/**
+ * RTE_TABLE_ACTION_TAG
+ */
+struct tag_data {
+	uint32_t tag;
+} __attribute__((__packed__));
+
+static int
+tag_apply(struct tag_data *data,
+	struct rte_table_action_tag_params *p)
+{
+	data->tag = p->tag;
+	return 0;
+}
+
+static __rte_always_inline void
+pkt_work_tag(struct rte_mbuf *mbuf,
+	struct tag_data *data)
+{
+	mbuf->hash.fdir.hi = data->tag;
+	mbuf->ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID;
+}
+
+static __rte_always_inline void
+pkt4_work_tag(struct rte_mbuf *mbuf0,
+	struct rte_mbuf *mbuf1,
+	struct rte_mbuf *mbuf2,
+	struct rte_mbuf *mbuf3,
+	struct tag_data *data0,
+	struct tag_data *data1,
+	struct tag_data *data2,
+	struct tag_data *data3)
+{
+	mbuf0->hash.fdir.hi = data0->tag;
+	mbuf1->hash.fdir.hi = data1->tag;
+	mbuf2->hash.fdir.hi = data2->tag;
+	mbuf3->hash.fdir.hi = data3->tag;
+
+	mbuf0->ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID;
+	mbuf1->ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID;
+	mbuf2->ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID;
+	mbuf3->ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID;
+}
+
+/**
+ * RTE_TABLE_ACTION_DECAP
+ */
+struct decap_data {
+	uint16_t n;
+} __attribute__((__packed__));
+
+static int
+decap_apply(struct decap_data *data,
+	struct rte_table_action_decap_params *p)
+{
+	data->n = p->n;
+	return 0;
+}
+
+static __rte_always_inline void
+pkt_work_decap(struct rte_mbuf *mbuf,
+	struct decap_data *data)
+{
+	uint16_t data_off = mbuf->data_off;
+	uint16_t data_len = mbuf->data_len;
+	uint32_t pkt_len = mbuf->pkt_len;
+	uint16_t n = data->n;
+
+	mbuf->data_off = data_off + n;
+	mbuf->data_len = data_len - n;
+	mbuf->pkt_len = pkt_len - n;
+}
+
+static __rte_always_inline void
+pkt4_work_decap(struct rte_mbuf *mbuf0,
+	struct rte_mbuf *mbuf1,
+	struct rte_mbuf *mbuf2,
+	struct rte_mbuf *mbuf3,
+	struct decap_data *data0,
+	struct decap_data *data1,
+	struct decap_data *data2,
+	struct decap_data *data3)
+{
+	uint16_t data_off0 = mbuf0->data_off;
+	uint16_t data_len0 = mbuf0->data_len;
+	uint32_t pkt_len0 = mbuf0->pkt_len;
+
+	uint16_t data_off1 = mbuf1->data_off;
+	uint16_t data_len1 = mbuf1->data_len;
+	uint32_t pkt_len1 = mbuf1->pkt_len;
+
+	uint16_t data_off2 = mbuf2->data_off;
+	uint16_t data_len2 = mbuf2->data_len;
+	uint32_t pkt_len2 = mbuf2->pkt_len;
+
+	uint16_t data_off3 = mbuf3->data_off;
+	uint16_t data_len3 = mbuf3->data_len;
+	uint32_t pkt_len3 = mbuf3->pkt_len;
+
+	uint16_t n0 = data0->n;
+	uint16_t n1 = data1->n;
+	uint16_t n2 = data2->n;
+	uint16_t n3 = data3->n;
+
+	mbuf0->data_off = data_off0 + n0;
+	mbuf0->data_len = data_len0 - n0;
+	mbuf0->pkt_len = pkt_len0 - n0;
+
+	mbuf1->data_off = data_off1 + n1;
+	mbuf1->data_len = data_len1 - n1;
+	mbuf1->pkt_len = pkt_len1 - n1;
+
+	mbuf2->data_off = data_off2 + n2;
+	mbuf2->data_len = data_len2 - n2;
+	mbuf2->pkt_len = pkt_len2 - n2;
+
+	mbuf3->data_off = data_off3 + n3;
+	mbuf3->data_len = data_len3 - n3;
+	mbuf3->pkt_len = pkt_len3 - n3;
+}
+
 /**
  * Action profile
  */
@@ -1235,6 +2148,9 @@ action_valid(enum rte_table_action_type action)
 	case RTE_TABLE_ACTION_TTL:
 	case RTE_TABLE_ACTION_STATS:
 	case RTE_TABLE_ACTION_TIME:
+	case RTE_TABLE_ACTION_SYM_CRYPTO:
+	case RTE_TABLE_ACTION_TAG:
+	case RTE_TABLE_ACTION_DECAP:
 		return 1;
 	default:
 		return 0;
@@ -1254,6 +2170,7 @@ struct ap_config {
 	struct rte_table_action_nat_config nat;
 	struct rte_table_action_ttl_config ttl;
 	struct rte_table_action_stats_config stats;
+	struct rte_table_action_sym_crypto_config sym_crypto;
 };
 
 static size_t
@@ -1274,6 +2191,8 @@ action_cfg_size(enum rte_table_action_type action)
 		return sizeof(struct rte_table_action_ttl_config);
 	case RTE_TABLE_ACTION_STATS:
 		return sizeof(struct rte_table_action_stats_config);
+	case RTE_TABLE_ACTION_SYM_CRYPTO:
+		return sizeof(struct rte_table_action_sym_crypto_config);
 	default:
 		return 0;
 	}
@@ -1305,6 +2224,8 @@ action_cfg_get(struct ap_config *ap_config,
 	case RTE_TABLE_ACTION_STATS:
 		return &ap_config->stats;
 
+	case RTE_TABLE_ACTION_SYM_CRYPTO:
+		return &ap_config->sym_crypto;
 	default:
 		return NULL;
 	}
@@ -1361,6 +2282,15 @@ action_data_size(enum rte_table_action_type action,
 	case RTE_TABLE_ACTION_TIME:
 		return sizeof(struct time_data);
 
+	case RTE_TABLE_ACTION_SYM_CRYPTO:
+		return (sizeof(struct sym_crypto_data));
+
+	case RTE_TABLE_ACTION_TAG:
+		return sizeof(struct tag_data);
+
+	case RTE_TABLE_ACTION_DECAP:
+		return sizeof(struct decap_data);
+
 	default:
 		return 0;
 	}
@@ -1460,6 +2390,10 @@ rte_table_action_profile_action_register(struct rte_table_action_profile *profil
 		status = stats_cfg_check(action_config);
 		break;
 
+	case RTE_TABLE_ACTION_SYM_CRYPTO:
+		status = sym_crypto_cfg_check(action_config);
+		break;
+
 	default:
 		status = 0;
 		break;
@@ -1609,6 +2543,19 @@ rte_table_action_apply(struct rte_table_action *action,
 		return time_apply(action_data,
 			action_params);
 
+	case RTE_TABLE_ACTION_SYM_CRYPTO:
+		return sym_crypto_apply(action_data,
+				&action->cfg.sym_crypto,
+				action_params);
+
+	case RTE_TABLE_ACTION_TAG:
+		return tag_apply(action_data,
+			action_params);
+
+	case RTE_TABLE_ACTION_DECAP:
+		return decap_apply(action_data,
+			action_params);
+
 	default:
 		return -EINVAL;
 	}
@@ -1861,6 +2808,25 @@ rte_table_action_time_read(struct rte_table_action *action,
 	return 0;
 }
 
+struct rte_cryptodev_sym_session *
+rte_table_action_crypto_sym_session_get(struct rte_table_action *action,
+	void *data)
+{
+	struct sym_crypto_data *sym_crypto_data;
+
+	/* Check input arguments */
+	if ((action == NULL) ||
+		((action->cfg.action_mask &
+		(1LLU << RTE_TABLE_ACTION_SYM_CRYPTO)) == 0) ||
+		(data == NULL))
+		return NULL;
+
+	sym_crypto_data = action_data_get(data, action,
+			RTE_TABLE_ACTION_SYM_CRYPTO);
+
+	return sym_crypto_data->session;
+}
+
 static __rte_always_inline uint64_t
 pkt_work(struct rte_mbuf *mbuf,
 	struct rte_pipeline_table_entry *table_entry,
@@ -1920,6 +2886,14 @@ pkt_work(struct rte_mbuf *mbuf,
 			dscp);
 	}
 
+	if (cfg->action_mask & (1LLU << RTE_TABLE_ACTION_DECAP)) {
+		void *data = action_data_get(table_entry,
+			action,
+			RTE_TABLE_ACTION_DECAP);
+
+		pkt_work_decap(mbuf, data);
+	}
+
 	if (cfg->action_mask & (1LLU << RTE_TABLE_ACTION_ENCAP)) {
 		void *data =
 			action_data_get(table_entry, action, RTE_TABLE_ACTION_ENCAP);
@@ -1966,6 +2940,22 @@ pkt_work(struct rte_mbuf *mbuf,
 		pkt_work_time(data, time);
 	}
 
+	if (cfg->action_mask & (1LLU << RTE_TABLE_ACTION_SYM_CRYPTO)) {
+		void *data = action_data_get(table_entry, action,
+				RTE_TABLE_ACTION_SYM_CRYPTO);
+
+		drop_mask |= pkt_work_sym_crypto(mbuf, data, &cfg->sym_crypto,
+				ip_offset);
+	}
+
+	if (cfg->action_mask & (1LLU << RTE_TABLE_ACTION_TAG)) {
+		void *data = action_data_get(table_entry,
+			action,
+			RTE_TABLE_ACTION_TAG);
+
+		pkt_work_tag(mbuf, data);
+	}
+
 	return drop_mask;
 }
 
@@ -2137,6 +3127,24 @@ pkt4_work(struct rte_mbuf **mbufs,
 			dscp3);
 	}
 
+	if (cfg->action_mask & (1LLU << RTE_TABLE_ACTION_DECAP)) {
+		void *data0 = action_data_get(table_entry0,
+			action,
+			RTE_TABLE_ACTION_DECAP);
+		void *data1 = action_data_get(table_entry1,
+			action,
+			RTE_TABLE_ACTION_DECAP);
+		void *data2 = action_data_get(table_entry2,
+			action,
+			RTE_TABLE_ACTION_DECAP);
+		void *data3 = action_data_get(table_entry3,
+			action,
+			RTE_TABLE_ACTION_DECAP);
+
+		pkt4_work_decap(mbuf0, mbuf1, mbuf2, mbuf3,
+			data0, data1, data2, data3);
+	}
+
 	if (cfg->action_mask & (1LLU << RTE_TABLE_ACTION_ENCAP)) {
 		void *data0 =
 			action_data_get(table_entry0, action, RTE_TABLE_ACTION_ENCAP);
@@ -2254,6 +3262,44 @@ pkt4_work(struct rte_mbuf **mbufs,
 		pkt_work_time(data3, time);
 	}
 
+	if (cfg->action_mask & (1LLU << RTE_TABLE_ACTION_SYM_CRYPTO)) {
+		void *data0 = action_data_get(table_entry0, action,
+				RTE_TABLE_ACTION_SYM_CRYPTO);
+		void *data1 = action_data_get(table_entry1, action,
+				RTE_TABLE_ACTION_SYM_CRYPTO);
+		void *data2 = action_data_get(table_entry2, action,
+				RTE_TABLE_ACTION_SYM_CRYPTO);
+		void *data3 = action_data_get(table_entry3, action,
+				RTE_TABLE_ACTION_SYM_CRYPTO);
+
+		drop_mask0 |= pkt_work_sym_crypto(mbuf0, data0, &cfg->sym_crypto,
+				ip_offset);
+		drop_mask1 |= pkt_work_sym_crypto(mbuf1, data1, &cfg->sym_crypto,
+				ip_offset);
+		drop_mask2 |= pkt_work_sym_crypto(mbuf2, data2, &cfg->sym_crypto,
+				ip_offset);
+		drop_mask3 |= pkt_work_sym_crypto(mbuf3, data3, &cfg->sym_crypto,
+				ip_offset);
+	}
+
+	if (cfg->action_mask & (1LLU << RTE_TABLE_ACTION_TAG)) {
+		void *data0 = action_data_get(table_entry0,
+			action,
+			RTE_TABLE_ACTION_TAG);
+		void *data1 = action_data_get(table_entry1,
+			action,
+			RTE_TABLE_ACTION_TAG);
+		void *data2 = action_data_get(table_entry2,
+			action,
+			RTE_TABLE_ACTION_TAG);
+		void *data3 = action_data_get(table_entry3,
+			action,
+			RTE_TABLE_ACTION_TAG);
+
+		pkt4_work_tag(mbuf0, mbuf1, mbuf2, mbuf3,
+			data0, data1, data2, data3);
+	}
+
 	return drop_mask0 |
 		(drop_mask1 << 1) |
 		(drop_mask2 << 2) |
diff --git a/lib/librte_pipeline/rte_table_action.h b/lib/librte_pipeline/rte_table_action.h
index c7f751aa..c9606129 100644
--- a/lib/librte_pipeline/rte_table_action.h
+++ b/lib/librte_pipeline/rte_table_action.h
@@ -93,6 +93,15 @@ enum rte_table_action_type {
 
 	/** Timestamp. */
 	RTE_TABLE_ACTION_TIME,
+
+	/** Crypto. */
+	RTE_TABLE_ACTION_SYM_CRYPTO,
+
+	/** Tag. */
+	RTE_TABLE_ACTION_TAG,
+
+	/** Packet decapsulations. */
+	RTE_TABLE_ACTION_DECAP,
 };
 
 /** Common action configuration (per table action profile). */
@@ -366,6 +375,11 @@ enum rte_table_action_encap_type {
 
 	/** IP -> { Ether | PPPoE | PPP | IP } */
 	RTE_TABLE_ACTION_ENCAP_PPPOE,
+
+	/** Ether -> { Ether | IP | UDP | VXLAN | Ether }
+	 * Ether -> { Ether | VLAN | IP | UDP | VXLAN | Ether }
+	 */
+	RTE_TABLE_ACTION_ENCAP_VXLAN,
 };
 
 /** Pre-computed Ethernet header fields for encapsulation action. */
@@ -393,6 +407,34 @@ struct rte_table_action_pppoe_hdr {
 	uint16_t session_id; /**< Session ID. */
 };
 
+/** Pre-computed IPv4 header fields for encapsulation action. */
+struct rte_table_action_ipv4_header {
+	uint32_t sa; /**< Source address. */
+	uint32_t da; /**< Destination address. */
+	uint8_t dscp; /**< DiffServ Code Point (DSCP). */
+	uint8_t ttl; /**< Time To Live (TTL). */
+};
+
+/** Pre-computed IPv6 header fields for encapsulation action. */
+struct rte_table_action_ipv6_header {
+	uint8_t sa[16]; /**< Source address. */
+	uint8_t da[16]; /**< Destination address. */
+	uint32_t flow_label; /**< Flow label. */
+	uint8_t dscp; /**< DiffServ Code Point (DSCP). */
+	uint8_t hop_limit; /**< Hop Limit (HL). */
+};
+
+/** Pre-computed UDP header fields for encapsulation action. */
+struct rte_table_action_udp_header {
+	uint16_t sp; /**< Source port. */
+	uint16_t dp; /**< Destination port. */
+};
+
+/** Pre-computed VXLAN header fields for encapsulation action. */
+struct rte_table_action_vxlan_hdr {
+	uint32_t vni; /**< VXLAN Network Identifier (VNI). */
+};
+
 /** Ether encap parameters. */
 struct rte_table_action_encap_ether_params {
 	struct rte_table_action_ether_hdr ether; /**< Ethernet header. */
@@ -437,6 +479,21 @@ struct rte_table_action_encap_pppoe_params {
 	struct rte_table_action_pppoe_hdr pppoe; /**< PPPoE/PPP headers. */
 };
 
+/** VXLAN encap parameters. */
+struct rte_table_action_encap_vxlan_params {
+	struct rte_table_action_ether_hdr ether; /**< Ethernet header. */
+	struct rte_table_action_vlan_hdr vlan; /**< VLAN header. */
+
+	RTE_STD_C11
+	union {
+		struct rte_table_action_ipv4_header ipv4; /**< IPv4 header. */
+		struct rte_table_action_ipv6_header ipv6; /**< IPv6 header. */
+	};
+
+	struct rte_table_action_udp_header udp; /**< UDP header. */
+	struct rte_table_action_vxlan_hdr vxlan; /**< VXLAN header. */
+};
+
 /** Encap action configuration (per table action profile). */
 struct rte_table_action_encap_config {
 	/** Bit mask defining the set of packet encapsulations enabled for the
@@ -446,6 +503,30 @@ struct rte_table_action_encap_config {
 	 * @see enum rte_table_action_encap_type
 	 */
 	uint64_t encap_mask;
+
+	/** Encapsulation type specific configuration. */
+	RTE_STD_C11
+	union {
+		struct {
+			/** Input packet to be encapsulated: offset within the
+			 * input packet buffer to the start of the Ethernet
+			 * frame to be encapsulated. Offset 0 points to the
+			 * first byte of the MBUF structure.
+			 */
+			uint32_t data_offset;
+
+			/** Encapsulation header: non-zero when encapsulation
+			 * header includes a VLAN tag, zero otherwise.
+			 */
+			int vlan;
+
+			/** Encapsulation header: IP version of the IP header
+			 * within the encapsulation header. Non-zero for IPv4,
+			 * zero for IPv6.
+			 */
+			int ip_version;
+		} vxlan; /**< VXLAN specific configuration. */
+	};
 };
 
 /** Encap action parameters (per table rule). */
@@ -469,6 +550,9 @@ struct rte_table_action_encap_params {
 
 		/** Only valid when *type* is set to PPPoE. */
 		struct rte_table_action_encap_pppoe_params pppoe;
+
+		/** Only valid when *type* is set to VXLAN. */
+		struct rte_table_action_encap_vxlan_params vxlan;
 	};
 };
 
@@ -606,6 +690,111 @@ struct rte_table_action_time_params {
 };
 
 /**
+ * RTE_TABLE_ACTION_CRYPTO
+ */
+#ifndef RTE_TABLE_ACTION_SYM_CRYPTO_IV_SIZE_MAX
+#define RTE_TABLE_ACTION_SYM_CRYPTO_IV_SIZE_MAX		(16)
+#endif
+
+#ifndef RTE_TABLE_ACTION_SYM_CRYPTO_AAD_SIZE_MAX
+#define RTE_TABLE_ACTION_SYM_CRYPTO_AAD_SIZE_MAX	(16)
+#endif
+
+#ifndef RTE_TABLE_ACTION_SYM_CRYPTO_IV_OFFSET
+#define RTE_TABLE_ACTION_SYM_CRYPTO_IV_OFFSET				\
+	(sizeof(struct rte_crypto_op) + sizeof(struct rte_crypto_sym_op))
+#endif
+
+/** Common action structure to store the data's value, length, and offset */
+struct rte_table_action_vlo {
+	uint8_t *val;
+	uint32_t length;
+	uint32_t offset;
+};
+
+/** Symmetric crypto action configuration (per table action profile). */
+struct rte_table_action_sym_crypto_config {
+	/** Target Cryptodev ID. */
+	uint8_t cryptodev_id;
+
+	/**
+	 * Offset to rte_crypto_op structure within the input packet buffer.
+	 * Offset 0 points to the first byte of the MBUF structure.
+	 */
+	uint32_t op_offset;
+
+	/** The mempool for creating cryptodev sessions. */
+	struct rte_mempool *mp_create;
+
+	/** The mempool for initializing cryptodev sessions. */
+	struct rte_mempool *mp_init;
+};
+
+/** Symmetric Crypto action parameters (per table rule). */
+struct rte_table_action_sym_crypto_params {
+
+	/** Xform pointer contains all relevant information */
+	struct rte_crypto_sym_xform *xform;
+
+	/**
+	 * Offset within the input packet buffer to the first byte of data
+	 * to be processed by the crypto unit. Offset 0 points to the first
+	 * byte of the MBUF structure.
+	 */
+	uint32_t data_offset;
+
+	union {
+		struct {
+			/** Cipher iv data. */
+			struct rte_table_action_vlo cipher_iv;
+
+			/** Cipher iv data. */
+			struct rte_table_action_vlo cipher_iv_update;
+
+			/** Auth iv data. */
+			struct rte_table_action_vlo auth_iv;
+
+			/** Auth iv data. */
+			struct rte_table_action_vlo auth_iv_update;
+
+		} cipher_auth;
+
+		struct {
+			/** AEAD AAD data. */
+			struct rte_table_action_vlo aad;
+
+			/** AEAD iv data. */
+			struct rte_table_action_vlo iv;
+
+			/** AEAD AAD data. */
+			struct rte_table_action_vlo aad_update;
+
+			/** AEAD iv data. */
+			struct rte_table_action_vlo iv_update;
+
+		} aead;
+	};
+};
+
+/**
+ * RTE_TABLE_ACTION_TAG
+ */
+/** Tag action parameters (per table rule). */
+struct rte_table_action_tag_params {
+	/** Tag to be attached to the input packet. */
+	uint32_t tag;
+};
+
+/**
+ * RTE_TABLE_ACTION_DECAP
+ */
+/** Decap action parameters (per table rule). */
+struct rte_table_action_decap_params {
+	/** Number of bytes to be removed from the start of the packet. */
+	uint16_t n;
+};
+
+/**
  * Table action profile.
  */
 struct rte_table_action_profile;
@@ -898,6 +1087,20 @@ rte_table_action_time_read(struct rte_table_action *action,
 	void *data,
 	uint64_t *timestamp);
 
+/**
+ * Table action cryptodev symmetric session get.
+ *
+ * @param[in] action
+ *   Handle to table action object (needs to be valid).
+ * @param[in] data
+ *   Data byte array (typically table rule data) with sym crypto action.
+ * @return
+ *   The pointer to the session on success, NULL otherwise.
+ */
+struct rte_cryptodev_sym_session *__rte_experimental
+rte_table_action_crypto_sym_session_get(struct rte_table_action *action,
+	void *data);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_port/Makefile b/lib/librte_port/Makefile
index 8df4864e..1b83f6f2 100644
--- a/lib/librte_port/Makefile
+++ b/lib/librte_port/Makefile
@@ -11,7 +11,7 @@ ifeq ($(CONFIG_RTE_PORT_PCAP),y)
 LDLIBS += -lpcap
 endif
 LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_ethdev
-LDLIBS += -lrte_ip_frag -lrte_sched
+LDLIBS += -lrte_ip_frag -lrte_sched -lrte_cryptodev
 ifeq ($(CONFIG_RTE_LIBRTE_KNI),y)
 LDLIBS += -lrte_kni
 endif
@@ -38,6 +38,7 @@ ifeq ($(CONFIG_RTE_LIBRTE_KNI),y)
 SRCS-$(CONFIG_RTE_LIBRTE_PORT) += rte_port_kni.c
 endif
 SRCS-$(CONFIG_RTE_LIBRTE_PORT) += rte_port_source_sink.c
+SRCS-$(CONFIG_RTE_LIBRTE_PORT) += rte_port_sym_crypto.c
 
 # install includes
 SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port.h
@@ -53,5 +54,6 @@ ifeq ($(CONFIG_RTE_LIBRTE_KNI),y)
 SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port_kni.h
 endif
 SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port_source_sink.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port_sym_crypto.h
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_port/meson.build b/lib/librte_port/meson.build
index f3d8b443..0d11456f 100644
--- a/lib/librte_port/meson.build
+++ b/lib/librte_port/meson.build
@@ -9,7 +9,8 @@ sources = files(
 	'rte_port_ras.c',
 	'rte_port_ring.c',
 	'rte_port_sched.c',
-	'rte_port_source_sink.c')
+	'rte_port_source_sink.c',
+	'rte_port_sym_crypto.c')
 headers = files(
 	'rte_port_ethdev.h',
 	'rte_port_fd.h',
@@ -18,8 +19,9 @@ headers = files(
 	'rte_port.h',
 	'rte_port_ring.h',
 	'rte_port_sched.h',
-	'rte_port_source_sink.h')
-deps += ['ethdev', 'sched', 'ip_frag']
+	'rte_port_source_sink.h',
+	'rte_port_sym_crypto.h')
+deps += ['ethdev', 'sched', 'ip_frag', 'cryptodev']
 
 if dpdk_conf.has('RTE_LIBRTE_KNI')
 	sources += files('rte_port_kni.c')
diff --git a/lib/librte_port/rte_port_sym_crypto.c b/lib/librte_port/rte_port_sym_crypto.c
new file mode 100644
index 00000000..295984d0
--- /dev/null
+++ b/lib/librte_port/rte_port_sym_crypto.c
@@ -0,0 +1,552 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+#include <string.h>
+
+#include <rte_common.h>
+#include <rte_malloc.h>
+
+#include "rte_port_sym_crypto.h"
+
+/*
+ * Port Crypto Reader
+ */
+#ifdef RTE_PORT_STATS_COLLECT
+
+#define RTE_PORT_SYM_CRYPTO_READER_STATS_PKTS_IN_ADD(port, val) \
+	(port)->stats.n_pkts_in += (val)
+#define RTE_PORT_SYM_CRYPTO_READER_STATS_PKTS_DROP_ADD(port, val) \
+	(port)->stats.n_pkts_drop += (val)
+
+#else
+
+#define RTE_PORT_SYM_CRYPTO_READER_STATS_PKTS_IN_ADD(port, val)
+#define RTE_PORT_SYM_CRYPTO_READER_STATS_PKTS_DROP_ADD(port, val)
+
+#endif
+
+struct rte_port_sym_crypto_reader {
+	struct rte_port_in_stats stats;
+
+	uint8_t cryptodev_id;
+	uint16_t queue_id;
+	struct rte_crypto_op *ops[RTE_PORT_IN_BURST_SIZE_MAX];
+	rte_port_sym_crypto_reader_callback_fn f_callback;
+	void *arg_callback;
+};
+
+static void *
+rte_port_sym_crypto_reader_create(void *params, int socket_id)
+{
+	struct rte_port_sym_crypto_reader_params *conf =
+			params;
+	struct rte_port_sym_crypto_reader *port;
+
+	/* Check input parameters */
+	if (conf == NULL) {
+		RTE_LOG(ERR, PORT, "%s: params is NULL\n", __func__);
+		return NULL;
+	}
+
+	/* Memory allocation */
+	port = rte_zmalloc_socket("PORT", sizeof(*port),
+		RTE_CACHE_LINE_SIZE, socket_id);
+	if (port == NULL) {
+		RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__);
+		return NULL;
+	}
+
+	/* Initialization */
+	port->cryptodev_id = conf->cryptodev_id;
+	port->queue_id = conf->queue_id;
+	port->f_callback = conf->f_callback;
+	port->arg_callback = conf->arg_callback;
+
+	return port;
+}
+
+static int
+rte_port_sym_crypto_reader_rx(void *port, struct rte_mbuf **pkts, uint32_t n_pkts)
+{
+	struct rte_port_sym_crypto_reader *p =
+			port;
+	uint16_t rx_ops_cnt, i, n = 0;
+
+	rx_ops_cnt = rte_cryptodev_dequeue_burst(p->cryptodev_id, p->queue_id,
+			p->ops, n_pkts);
+
+	for (i = 0; i < rx_ops_cnt; i++) {
+		struct rte_crypto_op *op = p->ops[i];
+
+		/** Drop failed pkts */
+		if (unlikely(op->status != RTE_CRYPTO_OP_STATUS_SUCCESS)) {
+			rte_pktmbuf_free(op->sym->m_src);
+			continue;
+		}
+
+		pkts[n++] = op->sym->m_src;
+	}
+
+	if (p->f_callback)
+		(*p->f_callback)(pkts, n, p->arg_callback);
+
+	RTE_PORT_SYM_CRYPTO_READER_STATS_PKTS_IN_ADD(p, n);
+	RTE_PORT_SYM_CRYPTO_READER_STATS_PKTS_DROP_ADD(p, rx_ops_cnt - n);
+
+	return n;
+}
+
+static int
+rte_port_sym_crypto_reader_free(void *port)
+{
+	if (port == NULL) {
+		RTE_LOG(ERR, PORT, "%s: port is NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	rte_free(port);
+
+	return 0;
+}
+
+static int rte_port_sym_crypto_reader_stats_read(void *port,
+	struct rte_port_in_stats *stats, int clear)
+{
+	struct rte_port_sym_crypto_reader *p =
+			port;
+
+	if (stats != NULL)
+		memcpy(stats, &p->stats, sizeof(p->stats));
+
+	if (clear)
+		memset(&p->stats, 0, sizeof(p->stats));
+
+	return 0;
+}
+
+/*
+ * Port crypto Writer
+ */
+#ifdef RTE_PORT_STATS_COLLECT
+
+#define RTE_PORT_SYM_CRYPTO_WRITER_STATS_PKTS_IN_ADD(port, val) \
+	(port)->stats.n_pkts_in += (val)
+#define RTE_PORT_SYM_CRYPTO_WRITER_STATS_PKTS_DROP_ADD(port, val) \
+	(port)->stats.n_pkts_drop += (val)
+
+#else
+
+#define RTE_PORT_SYM_CRYPTO_WRITER_STATS_PKTS_IN_ADD(port, val)
+#define RTE_PORT_SYM_CRYPTO_WRITER_STATS_PKTS_DROP_ADD(port, val)
+
+#endif
+
+struct rte_port_sym_crypto_writer {
+	struct rte_port_out_stats stats;
+
+	struct rte_crypto_op *tx_buf[2 * RTE_PORT_IN_BURST_SIZE_MAX];
+
+	uint32_t tx_burst_sz;
+	uint32_t tx_buf_count;
+	uint64_t bsz_mask;
+
+	uint8_t cryptodev_id;
+	uint16_t queue_id;
+	uint16_t crypto_op_offset;
+};
+
+static void *
+rte_port_sym_crypto_writer_create(void *params, int socket_id)
+{
+	struct rte_port_sym_crypto_writer_params *conf =
+			params;
+	struct rte_port_sym_crypto_writer *port;
+
+	/* Check input parameters */
+	if ((conf == NULL) ||
+		(conf->tx_burst_sz == 0) ||
+		(conf->tx_burst_sz > RTE_PORT_IN_BURST_SIZE_MAX) ||
+		(!rte_is_power_of_2(conf->tx_burst_sz))) {
+		RTE_LOG(ERR, PORT, "%s: Invalid input parameters\n", __func__);
+		return NULL;
+	}
+
+	/* Memory allocation */
+	port = rte_zmalloc_socket("PORT", sizeof(*port),
+		RTE_CACHE_LINE_SIZE, socket_id);
+	if (port == NULL) {
+		RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__);
+		return NULL;
+	}
+
+	/* Initialization */
+	port->tx_burst_sz = conf->tx_burst_sz;
+	port->tx_buf_count = 0;
+	port->bsz_mask = 1LLU << (conf->tx_burst_sz - 1);
+
+	port->cryptodev_id = conf->cryptodev_id;
+	port->queue_id = conf->queue_id;
+	port->crypto_op_offset = conf->crypto_op_offset;
+
+	return port;
+}
+
+static inline void
+send_burst(struct rte_port_sym_crypto_writer *p)
+{
+	uint32_t nb_tx;
+
+	nb_tx = rte_cryptodev_enqueue_burst(p->cryptodev_id, p->queue_id,
+			p->tx_buf, p->tx_buf_count);
+
+	RTE_PORT_SYM_CRYPTO_WRITER_STATS_PKTS_DROP_ADD(p, p->tx_buf_count -
+			nb_tx);
+	for (; nb_tx < p->tx_buf_count; nb_tx++)
+		rte_pktmbuf_free(p->tx_buf[nb_tx]->sym->m_src);
+
+	p->tx_buf_count = 0;
+}
+
+static int
+rte_port_sym_crypto_writer_tx(void *port, struct rte_mbuf *pkt)
+{
+	struct rte_port_sym_crypto_writer *p =
+			port;
+
+	p->tx_buf[p->tx_buf_count++] = (struct rte_crypto_op *)
+			RTE_MBUF_METADATA_UINT8_PTR(pkt, p->crypto_op_offset);
+	RTE_PORT_SYM_CRYPTO_WRITER_STATS_PKTS_IN_ADD(p, 1);
+	if (p->tx_buf_count >= p->tx_burst_sz)
+		send_burst(p);
+
+	return 0;
+}
+
+static int
+rte_port_sym_crypto_writer_tx_bulk(void *port,
+	struct rte_mbuf **pkts,
+	uint64_t pkts_mask)
+{
+	struct rte_port_sym_crypto_writer *p =
+			port;
+	uint64_t bsz_mask = p->bsz_mask;
+	uint32_t tx_buf_count = p->tx_buf_count;
+	uint64_t expr = (pkts_mask & (pkts_mask + 1)) |
+					((pkts_mask & bsz_mask) ^ bsz_mask);
+
+	if (expr == 0) {
+		uint64_t n_pkts = __builtin_popcountll(pkts_mask);
+		uint32_t i;
+
+		RTE_PORT_SYM_CRYPTO_WRITER_STATS_PKTS_IN_ADD(p, n_pkts);
+
+		for (i = 0; i < n_pkts; i++)
+			p->tx_buf[p->tx_buf_count++] = (struct rte_crypto_op *)
+					RTE_MBUF_METADATA_UINT8_PTR(pkts[i],
+							p->crypto_op_offset);
+
+		if (p->tx_buf_count >= p->tx_burst_sz)
+			send_burst(p);
+	} else {
+		for (; pkts_mask;) {
+			uint32_t pkt_index = __builtin_ctzll(pkts_mask);
+			uint64_t pkt_mask = 1LLU << pkt_index;
+			struct rte_mbuf *pkt = pkts[pkt_index];
+
+			p->tx_buf[tx_buf_count++] = (struct rte_crypto_op *)
+					RTE_MBUF_METADATA_UINT8_PTR(pkt,
+							p->crypto_op_offset);
+
+			RTE_PORT_SYM_CRYPTO_WRITER_STATS_PKTS_IN_ADD(p, 1);
+			pkts_mask &= ~pkt_mask;
+		}
+
+		p->tx_buf_count = tx_buf_count;
+		if (tx_buf_count >= p->tx_burst_sz)
+			send_burst(p);
+	}
+
+	return 0;
+}
+
+static int
+rte_port_sym_crypto_writer_flush(void *port)
+{
+	struct rte_port_sym_crypto_writer *p =
+			port;
+
+	if (p->tx_buf_count > 0)
+		send_burst(p);
+
+	return 0;
+}
+
+static int
+rte_port_sym_crypto_writer_free(void *port)
+{
+	if (port == NULL) {
+		RTE_LOG(ERR, PORT, "%s: Port is NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	rte_port_sym_crypto_writer_flush(port);
+	rte_free(port);
+
+	return 0;
+}
+
+static int rte_port_sym_crypto_writer_stats_read(void *port,
+	struct rte_port_out_stats *stats, int clear)
+{
+	struct rte_port_sym_crypto_writer *p =
+			port;
+
+	if (stats != NULL)
+		memcpy(stats, &p->stats, sizeof(p->stats));
+
+	if (clear)
+		memset(&p->stats, 0, sizeof(p->stats));
+
+	return 0;
+}
+
+/*
+ * Port crypto Writer Nodrop
+ */
+#ifdef RTE_PORT_STATS_COLLECT
+
+#define RTE_PORT_SYM_CRYPTO_WRITER_NODROP_STATS_PKTS_IN_ADD(port, val) \
+	(port)->stats.n_pkts_in += (val)
+#define RTE_PORT_SYM_CRYPTO_WRITER_NODROP_STATS_PKTS_DROP_ADD(port, val) \
+	(port)->stats.n_pkts_drop += (val)
+
+#else
+
+#define RTE_PORT_SYM_CRYPTO_WRITER_NODROP_STATS_PKTS_IN_ADD(port, val)
+#define RTE_PORT_SYM_CRYPTO_WRITER_NODROP_STATS_PKTS_DROP_ADD(port, val)
+
+#endif
+
+struct rte_port_sym_crypto_writer_nodrop {
+	struct rte_port_out_stats stats;
+
+	struct rte_crypto_op *tx_buf[2 * RTE_PORT_IN_BURST_SIZE_MAX];
+	uint32_t tx_burst_sz;
+	uint32_t tx_buf_count;
+	uint64_t bsz_mask;
+	uint64_t n_retries;
+
+	uint8_t cryptodev_id;
+	uint16_t queue_id;
+	uint16_t crypto_op_offset;
+};
+
+static void *
+rte_port_sym_crypto_writer_nodrop_create(void *params, int socket_id)
+{
+	struct rte_port_sym_crypto_writer_nodrop_params *conf =
+		params;
+	struct rte_port_sym_crypto_writer_nodrop *port;
+
+	/* Check input parameters */
+	if ((conf == NULL) ||
+		(conf->tx_burst_sz == 0) ||
+		(conf->tx_burst_sz > RTE_PORT_IN_BURST_SIZE_MAX) ||
+		(!rte_is_power_of_2(conf->tx_burst_sz))) {
+		RTE_LOG(ERR, PORT, "%s: Invalid input parameters\n", __func__);
+		return NULL;
+	}
+
+	/* Memory allocation */
+	port = rte_zmalloc_socket("PORT", sizeof(*port),
+		RTE_CACHE_LINE_SIZE, socket_id);
+	if (port == NULL) {
+		RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__);
+		return NULL;
+	}
+
+	/* Initialization */
+	port->cryptodev_id = conf->cryptodev_id;
+	port->queue_id = conf->queue_id;
+	port->crypto_op_offset = conf->crypto_op_offset;
+	port->tx_burst_sz = conf->tx_burst_sz;
+	port->tx_buf_count = 0;
+	port->bsz_mask = 1LLU << (conf->tx_burst_sz - 1);
+
+	/*
+	 * When n_retries is 0 it means that we should wait for every packet to
+	 * send no matter how many retries should it take. To limit number of
+	 * branches in fast path, we use UINT64_MAX instead of branching.
+	 */
+	port->n_retries = (conf->n_retries == 0) ? UINT64_MAX : conf->n_retries;
+
+	return port;
+}
+
+static inline void
+send_burst_nodrop(struct rte_port_sym_crypto_writer_nodrop *p)
+{
+	uint32_t nb_tx = 0, i;
+
+	nb_tx = rte_cryptodev_enqueue_burst(p->cryptodev_id, p->queue_id,
+			p->tx_buf, p->tx_buf_count);
+
+	/* We sent all the packets in a first try */
+	if (nb_tx >= p->tx_buf_count) {
+		p->tx_buf_count = 0;
+		return;
+	}
+
+	for (i = 0; i < p->n_retries; i++) {
+		nb_tx += rte_cryptodev_enqueue_burst(p->cryptodev_id,
+				p->queue_id, p->tx_buf + nb_tx,
+				p->tx_buf_count - nb_tx);
+
+		/* We sent all the packets in more than one try */
+		if (nb_tx >= p->tx_buf_count) {
+			p->tx_buf_count = 0;
+			return;
+		}
+	}
+
+	/* We didn't send the packets in maximum allowed attempts */
+	RTE_PORT_SYM_CRYPTO_WRITER_NODROP_STATS_PKTS_DROP_ADD(p,
+			p->tx_buf_count - nb_tx);
+	for ( ; nb_tx < p->tx_buf_count; nb_tx++)
+		rte_pktmbuf_free(p->tx_buf[nb_tx]->sym->m_src);
+
+	p->tx_buf_count = 0;
+}
+
+static int
+rte_port_sym_crypto_writer_nodrop_tx(void *port, struct rte_mbuf *pkt)
+{
+	struct rte_port_sym_crypto_writer_nodrop *p =
+			port;
+
+	p->tx_buf[p->tx_buf_count++] = (struct rte_crypto_op *)
+			RTE_MBUF_METADATA_UINT8_PTR(pkt, p->crypto_op_offset);
+	RTE_PORT_SYM_CRYPTO_WRITER_STATS_PKTS_IN_ADD(p, 1);
+	if (p->tx_buf_count >= p->tx_burst_sz)
+		send_burst_nodrop(p);
+
+	return 0;
+}
+
+static int
+rte_port_sym_crypto_writer_nodrop_tx_bulk(void *port,
+	struct rte_mbuf **pkts,
+	uint64_t pkts_mask)
+{
+	struct rte_port_sym_crypto_writer_nodrop *p =
+			port;
+
+	uint64_t bsz_mask = p->bsz_mask;
+	uint32_t tx_buf_count = p->tx_buf_count;
+	uint64_t expr = (pkts_mask & (pkts_mask + 1)) |
+					((pkts_mask & bsz_mask) ^ bsz_mask);
+
+	if (expr == 0) {
+		uint64_t n_pkts = __builtin_popcountll(pkts_mask);
+		uint32_t i;
+
+		RTE_PORT_SYM_CRYPTO_WRITER_NODROP_STATS_PKTS_IN_ADD(p, n_pkts);
+
+		for (i = 0; i < n_pkts; i++)
+			p->tx_buf[p->tx_buf_count++] = (struct rte_crypto_op *)
+					RTE_MBUF_METADATA_UINT8_PTR(pkts[i],
+							p->crypto_op_offset);
+
+		if (p->tx_buf_count >= p->tx_burst_sz)
+			send_burst_nodrop(p);
+	} else {
+		for ( ; pkts_mask; ) {
+			uint32_t pkt_index = __builtin_ctzll(pkts_mask);
+			uint64_t pkt_mask = 1LLU << pkt_index;
+			struct rte_mbuf *pkt = pkts[pkt_index];
+
+			p->tx_buf[tx_buf_count++] = (struct rte_crypto_op *)
+					RTE_MBUF_METADATA_UINT8_PTR(pkt,
+							p->crypto_op_offset);
+			RTE_PORT_SYM_CRYPTO_WRITER_NODROP_STATS_PKTS_IN_ADD(p,
+					1);
+			pkts_mask &= ~pkt_mask;
+		}
+
+		p->tx_buf_count = tx_buf_count;
+		if (tx_buf_count >= p->tx_burst_sz)
+			send_burst_nodrop(p);
+	}
+
+	return 0;
+}
+
+static int
+rte_port_sym_crypto_writer_nodrop_flush(void *port)
+{
+	struct rte_port_sym_crypto_writer_nodrop *p =
+		port;
+
+	if (p->tx_buf_count > 0)
+		send_burst_nodrop(p);
+
+	return 0;
+}
+
+static int
+rte_port_sym_crypto_writer_nodrop_free(void *port)
+{
+	if (port == NULL) {
+		RTE_LOG(ERR, PORT, "%s: Port is NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	rte_port_sym_crypto_writer_nodrop_flush(port);
+	rte_free(port);
+
+	return 0;
+}
+
+static int rte_port_sym_crypto_writer_nodrop_stats_read(void *port,
+	struct rte_port_out_stats *stats, int clear)
+{
+	struct rte_port_sym_crypto_writer_nodrop *p =
+			port;
+
+	if (stats != NULL)
+		memcpy(stats, &p->stats, sizeof(p->stats));
+
+	if (clear)
+		memset(&p->stats, 0, sizeof(p->stats));
+
+	return 0;
+}
+
+
+/*
+ * Summary of port operations
+ */
+struct rte_port_in_ops rte_port_sym_crypto_reader_ops = {
+	.f_create = rte_port_sym_crypto_reader_create,
+	.f_free = rte_port_sym_crypto_reader_free,
+	.f_rx = rte_port_sym_crypto_reader_rx,
+	.f_stats = rte_port_sym_crypto_reader_stats_read,
+};
+
+struct rte_port_out_ops rte_port_sym_crypto_writer_ops = {
+	.f_create = rte_port_sym_crypto_writer_create,
+	.f_free = rte_port_sym_crypto_writer_free,
+	.f_tx = rte_port_sym_crypto_writer_tx,
+	.f_tx_bulk = rte_port_sym_crypto_writer_tx_bulk,
+	.f_flush = rte_port_sym_crypto_writer_flush,
+	.f_stats = rte_port_sym_crypto_writer_stats_read,
+};
+
+struct rte_port_out_ops rte_port_sym_crypto_writer_nodrop_ops = {
+	.f_create = rte_port_sym_crypto_writer_nodrop_create,
+	.f_free = rte_port_sym_crypto_writer_nodrop_free,
+	.f_tx = rte_port_sym_crypto_writer_nodrop_tx,
+	.f_tx_bulk = rte_port_sym_crypto_writer_nodrop_tx_bulk,
+	.f_flush = rte_port_sym_crypto_writer_nodrop_flush,
+	.f_stats = rte_port_sym_crypto_writer_nodrop_stats_read,
+};
diff --git a/lib/librte_port/rte_port_sym_crypto.h b/lib/librte_port/rte_port_sym_crypto.h
new file mode 100644
index 00000000..181f6ce0
--- /dev/null
+++ b/lib/librte_port/rte_port_sym_crypto.h
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef __INCLUDE_RTE_PORT_SYM_CRYPTO_H__
+#define __INCLUDE_RTE_PORT_SYM_CRYPTO_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Port sym crypto Interface
+ *
+ * crypto_reader: input port built on top of pre-initialized crypto interface
+ * crypto_writer: output port built on top of pre-initialized crypto interface
+ *
+ **/
+
+#include <stdint.h>
+
+#include <rte_cryptodev.h>
+
+#include "rte_port.h"
+
+/** Function prototype for reader post action. */
+typedef void (*rte_port_sym_crypto_reader_callback_fn)(struct rte_mbuf **pkts,
+		uint16_t n_pkts, void *arg);
+
+/** Crypto_reader port parameters */
+struct rte_port_sym_crypto_reader_params {
+	/** Target cryptodev ID. */
+	uint8_t cryptodev_id;
+
+	/** Target cryptodev Queue Pair ID. */
+	uint16_t queue_id;
+
+	/** Crypto reader post callback function. */
+	rte_port_sym_crypto_reader_callback_fn f_callback;
+
+	/** Crypto reader post callback function arguments. */
+	void *arg_callback;
+};
+
+/** Crypto_reader port operations. */
+extern struct rte_port_in_ops rte_port_sym_crypto_reader_ops;
+
+
+/** Crypto_writer port parameters. */
+struct rte_port_sym_crypto_writer_params {
+	/** Target cryptodev ID. */
+	uint8_t cryptodev_id;
+
+	/** Target cryptodev Queue Pair ID. */
+	uint16_t queue_id;
+
+	/** offset to rte_crypto_op in the mbufs. */
+	uint16_t crypto_op_offset;
+
+	/** Burst size to crypto interface. */
+	uint32_t tx_burst_sz;
+};
+
+/** Crypto_writer port operations. */
+extern struct rte_port_out_ops rte_port_sym_crypto_writer_ops;
+
+/** Crypto_writer_nodrop port parameters. */
+struct rte_port_sym_crypto_writer_nodrop_params {
+	/** Target cryptodev ID. */
+	uint8_t cryptodev_id;
+
+	/** Target cryptodev queue pair id. */
+	uint16_t queue_id;
+
+	/** Offset to rte_crypto_op in the mbufs. */
+	uint16_t crypto_op_offset;
+
+	/** Burst size to crypto interface. */
+	uint32_t tx_burst_sz;
+
+	/** Maximum number of retries, 0 for no limit. */
+	uint32_t n_retries;
+};
+
+/** Crypto_writer_nodrop port operations. */
+extern struct rte_port_out_ops rte_port_sym_crypto_writer_nodrop_ops;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/librte_port/rte_port_version.map b/lib/librte_port/rte_port_version.map
index 6470629b..609bcec3 100644
--- a/lib/librte_port/rte_port_version.map
+++ b/lib/librte_port/rte_port_version.map
@@ -51,3 +51,12 @@ DPDK_16.11 {
 	rte_port_fd_writer_nodrop_ops;
 
 } DPDK_16.07;
+
+DPDK_18.11 {
+	global:
+
+	rte_port_sym_crypto_reader_ops;
+	rte_port_sym_crypto_writer_ops;
+	rte_port_sym_crypto_writer_nodrop_ops;
+
+} DPDK_16.11;
diff --git a/lib/librte_power/Makefile b/lib/librte_power/Makefile
index 6f85e885..9bec668d 100644
--- a/lib/librte_power/Makefile
+++ b/lib/librte_power/Makefile
@@ -7,7 +7,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
 LIB = librte_power.a
 
 CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 -fno-strict-aliasing
-LDLIBS += -lrte_eal
+LDLIBS += -lrte_eal -lrte_timer
 
 EXPORT_MAP := rte_power_version.map
 
@@ -16,8 +16,9 @@ LIBABIVER := 1
 # all source are stored in SRCS-y
 SRCS-$(CONFIG_RTE_LIBRTE_POWER) := rte_power.c power_acpi_cpufreq.c
 SRCS-$(CONFIG_RTE_LIBRTE_POWER) += power_kvm_vm.c guest_channel.c
+SRCS-$(CONFIG_RTE_LIBRTE_POWER) += rte_power_empty_poll.c
 
 # install this header file
-SYMLINK-$(CONFIG_RTE_LIBRTE_POWER)-include := rte_power.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_POWER)-include := rte_power.h  rte_power_empty_poll.h
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_power/channel_commands.h b/lib/librte_power/channel_commands.h
index ee638eef..e7b93a79 100644
--- a/lib/librte_power/channel_commands.h
+++ b/lib/librte_power/channel_commands.h
@@ -19,6 +19,7 @@ extern "C" {
 #define CPU_POWER               1
 #define CPU_POWER_CONNECT       2
 #define PKT_POLICY              3
+#define PKT_POLICY_REMOVE       4
 
 /* CPU Power Command Scaling */
 #define CPU_POWER_SCALE_UP      1
@@ -58,6 +59,9 @@ struct traffic {
 	uint32_t max_max_packet_thresh;
 };
 
+#define CORE_TYPE_VIRTUAL 0
+#define CORE_TYPE_PHYSICAL 1
+
 struct channel_packet {
 	uint64_t resource_id; /**< core_num, device */
 	uint32_t unit;        /**< scale down/up/min/max */
@@ -70,6 +74,7 @@ struct channel_packet {
 	uint8_t vcpu_to_control[MAX_VCPU_PER_VM];
 	uint8_t num_vcpu;
 	struct timer_profile timer_policy;
+	bool core_type;
 	enum workload workload;
 	enum policy_to_use policy_to_use;
 	struct t_boost_status t_boost_status;
diff --git a/lib/librte_power/meson.build b/lib/librte_power/meson.build
index 253173f2..9ed8b56d 100644
--- a/lib/librte_power/meson.build
+++ b/lib/librte_power/meson.build
@@ -5,5 +5,7 @@ if host_machine.system() != 'linux'
 	build = false
 endif
 sources = files('rte_power.c', 'power_acpi_cpufreq.c',
-		'power_kvm_vm.c', 'guest_channel.c')
-headers = files('rte_power.h')
+		'power_kvm_vm.c', 'guest_channel.c',
+		'rte_power_empty_poll.c')
+headers = files('rte_power.h','rte_power_empty_poll.h')
+deps += ['timer']
diff --git a/lib/librte_power/rte_power_empty_poll.c b/lib/librte_power/rte_power_empty_poll.c
new file mode 100644
index 00000000..e6145462
--- /dev/null
+++ b/lib/librte_power/rte_power_empty_poll.c
@@ -0,0 +1,545 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2018 Intel Corporation
+ */
+
+#include <string.h>
+
+#include <rte_lcore.h>
+#include <rte_cycles.h>
+#include <rte_atomic.h>
+#include <rte_malloc.h>
+#include <inttypes.h>
+
+#include "rte_power.h"
+#include "rte_power_empty_poll.h"
+
+#define INTERVALS_PER_SECOND 100     /* (10ms) */
+#define SECONDS_TO_TRAIN_FOR 2
+#define DEFAULT_MED_TO_HIGH_PERCENT_THRESHOLD 70
+#define DEFAULT_HIGH_TO_MED_PERCENT_THRESHOLD 30
+#define DEFAULT_CYCLES_PER_PACKET 800
+
+static struct ep_params *ep_params;
+static uint32_t med_to_high_threshold = DEFAULT_MED_TO_HIGH_PERCENT_THRESHOLD;
+static uint32_t high_to_med_threshold = DEFAULT_HIGH_TO_MED_PERCENT_THRESHOLD;
+
+static uint32_t avail_freqs[RTE_MAX_LCORE][NUM_FREQS];
+
+static uint32_t total_avail_freqs[RTE_MAX_LCORE];
+
+static uint32_t freq_index[NUM_FREQ];
+
+static uint32_t
+get_freq_index(enum freq_val index)
+{
+	return freq_index[index];
+}
+
+
+static int
+set_power_freq(int lcore_id, enum freq_val freq, bool specific_freq)
+{
+	int err = 0;
+	uint32_t power_freq_index;
+	if (!specific_freq)
+		power_freq_index = get_freq_index(freq);
+	else
+		power_freq_index = freq;
+
+	err = rte_power_set_freq(lcore_id, power_freq_index);
+
+	return err;
+}
+
+
+static inline void __attribute__((always_inline))
+exit_training_state(struct priority_worker *poll_stats)
+{
+	RTE_SET_USED(poll_stats);
+}
+
+static inline void __attribute__((always_inline))
+enter_training_state(struct priority_worker *poll_stats)
+{
+	poll_stats->iter_counter = 0;
+	poll_stats->cur_freq = LOW;
+	poll_stats->queue_state = TRAINING;
+}
+
+static inline void __attribute__((always_inline))
+enter_normal_state(struct priority_worker *poll_stats)
+{
+	/* Clear the averages arrays and strs */
+	memset(poll_stats->edpi_av, 0, sizeof(poll_stats->edpi_av));
+	poll_stats->ec = 0;
+	memset(poll_stats->ppi_av, 0, sizeof(poll_stats->ppi_av));
+	poll_stats->pc = 0;
+
+	poll_stats->cur_freq = MED;
+	poll_stats->iter_counter = 0;
+	poll_stats->threshold_ctr = 0;
+	poll_stats->queue_state = MED_NORMAL;
+	RTE_LOG(INFO, POWER, "Set the power freq to MED\n");
+	set_power_freq(poll_stats->lcore_id, MED, false);
+
+	poll_stats->thresh[MED].threshold_percent = med_to_high_threshold;
+	poll_stats->thresh[HGH].threshold_percent = high_to_med_threshold;
+}
+
+static inline void __attribute__((always_inline))
+enter_busy_state(struct priority_worker *poll_stats)
+{
+	memset(poll_stats->edpi_av, 0, sizeof(poll_stats->edpi_av));
+	poll_stats->ec = 0;
+	memset(poll_stats->ppi_av, 0, sizeof(poll_stats->ppi_av));
+	poll_stats->pc = 0;
+
+	poll_stats->cur_freq = HGH;
+	poll_stats->iter_counter = 0;
+	poll_stats->threshold_ctr = 0;
+	poll_stats->queue_state = HGH_BUSY;
+	set_power_freq(poll_stats->lcore_id, HGH, false);
+}
+
+static inline void __attribute__((always_inline))
+enter_purge_state(struct priority_worker *poll_stats)
+{
+	poll_stats->iter_counter = 0;
+	poll_stats->queue_state = LOW_PURGE;
+}
+
+static inline void __attribute__((always_inline))
+set_state(struct priority_worker *poll_stats,
+		enum queue_state new_state)
+{
+	enum queue_state old_state = poll_stats->queue_state;
+	if (old_state != new_state) {
+
+		/* Call any old state exit functions */
+		if (old_state == TRAINING)
+			exit_training_state(poll_stats);
+
+		/* Call any new state entry functions */
+		if (new_state == TRAINING)
+			enter_training_state(poll_stats);
+		if (new_state == MED_NORMAL)
+			enter_normal_state(poll_stats);
+		if (new_state == HGH_BUSY)
+			enter_busy_state(poll_stats);
+		if (new_state == LOW_PURGE)
+			enter_purge_state(poll_stats);
+	}
+}
+
+static inline void __attribute__((always_inline))
+set_policy(struct priority_worker *poll_stats,
+		struct ep_policy *policy)
+{
+	set_state(poll_stats, policy->state);
+
+	if (policy->state == TRAINING)
+		return;
+
+	poll_stats->thresh[MED_NORMAL].base_edpi = policy->med_base_edpi;
+	poll_stats->thresh[HGH_BUSY].base_edpi = policy->hgh_base_edpi;
+
+	poll_stats->thresh[MED_NORMAL].trained = true;
+	poll_stats->thresh[HGH_BUSY].trained = true;
+
+}
+
+static void
+update_training_stats(struct priority_worker *poll_stats,
+		uint32_t freq,
+		bool specific_freq,
+		uint32_t max_train_iter)
+{
+	RTE_SET_USED(specific_freq);
+
+	char pfi_str[32];
+	uint64_t p0_empty_deq;
+
+	sprintf(pfi_str, "%02d", freq);
+
+	if (poll_stats->cur_freq == freq &&
+			poll_stats->thresh[freq].trained == false) {
+		if (poll_stats->thresh[freq].cur_train_iter == 0) {
+
+			set_power_freq(poll_stats->lcore_id,
+					freq, specific_freq);
+
+			poll_stats->empty_dequeues_prev =
+				poll_stats->empty_dequeues;
+
+			poll_stats->thresh[freq].cur_train_iter++;
+
+			return;
+		} else if (poll_stats->thresh[freq].cur_train_iter
+				<= max_train_iter) {
+
+			p0_empty_deq = poll_stats->empty_dequeues -
+				poll_stats->empty_dequeues_prev;
+
+			poll_stats->empty_dequeues_prev =
+				poll_stats->empty_dequeues;
+
+			poll_stats->thresh[freq].base_edpi += p0_empty_deq;
+			poll_stats->thresh[freq].cur_train_iter++;
+
+		} else {
+			if (poll_stats->thresh[freq].trained == false) {
+				poll_stats->thresh[freq].base_edpi =
+					poll_stats->thresh[freq].base_edpi /
+					max_train_iter;
+
+				/* Add on a factor of 0.05%
+				 * this should remove any
+				 * false negatives when the system is 0% busy
+				 */
+				poll_stats->thresh[freq].base_edpi +=
+				poll_stats->thresh[freq].base_edpi / 2000;
+
+				poll_stats->thresh[freq].trained = true;
+				poll_stats->cur_freq++;
+
+			}
+		}
+	}
+}
+
+static inline uint32_t __attribute__((always_inline))
+update_stats(struct priority_worker *poll_stats)
+{
+	uint64_t tot_edpi = 0, tot_ppi = 0;
+	uint32_t j, percent;
+
+	struct priority_worker *s = poll_stats;
+
+	uint64_t cur_edpi = s->empty_dequeues - s->empty_dequeues_prev;
+
+	s->empty_dequeues_prev = s->empty_dequeues;
+
+	uint64_t ppi = s->num_dequeue_pkts - s->num_dequeue_pkts_prev;
+
+	s->num_dequeue_pkts_prev = s->num_dequeue_pkts;
+
+	if (s->thresh[s->cur_freq].base_edpi < cur_edpi) {
+
+		/* edpi mean empty poll counter difference per interval */
+		RTE_LOG(DEBUG, POWER, "cur_edpi is too large "
+				"cur edpi %"PRId64" "
+				"base edpi %"PRId64"\n",
+				cur_edpi,
+				s->thresh[s->cur_freq].base_edpi);
+		/* Value to make us fail need debug log*/
+		return 1000UL;
+	}
+
+	s->edpi_av[s->ec++ % BINS_AV] = cur_edpi;
+	s->ppi_av[s->pc++ % BINS_AV] = ppi;
+
+	for (j = 0; j < BINS_AV; j++) {
+		tot_edpi += s->edpi_av[j];
+		tot_ppi += s->ppi_av[j];
+	}
+
+	tot_edpi = tot_edpi / BINS_AV;
+
+	percent = 100 - (uint32_t)(((float)tot_edpi /
+			(float)s->thresh[s->cur_freq].base_edpi) * 100);
+
+	return (uint32_t)percent;
+}
+
+
+static inline void  __attribute__((always_inline))
+update_stats_normal(struct priority_worker *poll_stats)
+{
+	uint32_t percent;
+
+	if (poll_stats->thresh[poll_stats->cur_freq].base_edpi == 0) {
+
+		enum freq_val cur_freq = poll_stats->cur_freq;
+
+		/* edpi mean empty poll counter difference per interval */
+		RTE_LOG(DEBUG, POWER, "cure freq is %d, edpi is %"PRIu64"\n",
+				cur_freq,
+				poll_stats->thresh[cur_freq].base_edpi);
+		return;
+	}
+
+	percent = update_stats(poll_stats);
+
+	if (percent > 100) {
+		/* edpi mean empty poll counter difference per interval */
+		RTE_LOG(DEBUG, POWER, "Edpi is bigger than threshold\n");
+		return;
+	}
+
+	if (poll_stats->cur_freq == LOW)
+		RTE_LOG(INFO, POWER, "Purge Mode is not currently supported\n");
+	else if (poll_stats->cur_freq == MED) {
+
+		if (percent >
+			poll_stats->thresh[MED].threshold_percent) {
+
+			if (poll_stats->threshold_ctr < INTERVALS_PER_SECOND)
+				poll_stats->threshold_ctr++;
+			else {
+				set_state(poll_stats, HGH_BUSY);
+				RTE_LOG(INFO, POWER, "MOVE to HGH\n");
+			}
+
+		} else {
+			/* reset */
+			poll_stats->threshold_ctr = 0;
+		}
+
+	} else if (poll_stats->cur_freq == HGH) {
+
+		if (percent <
+				poll_stats->thresh[HGH].threshold_percent) {
+
+			if (poll_stats->threshold_ctr < INTERVALS_PER_SECOND)
+				poll_stats->threshold_ctr++;
+			else {
+				set_state(poll_stats, MED_NORMAL);
+				RTE_LOG(INFO, POWER, "MOVE to MED\n");
+			}
+		} else {
+			/* reset */
+			poll_stats->threshold_ctr = 0;
+		}
+
+	}
+}
+
+static int
+empty_poll_training(struct priority_worker *poll_stats,
+		uint32_t max_train_iter)
+{
+
+	if (poll_stats->iter_counter < INTERVALS_PER_SECOND) {
+		poll_stats->iter_counter++;
+		return 0;
+	}
+
+
+	update_training_stats(poll_stats,
+			LOW,
+			false,
+			max_train_iter);
+
+	update_training_stats(poll_stats,
+			MED,
+			false,
+			max_train_iter);
+
+	update_training_stats(poll_stats,
+			HGH,
+			false,
+			max_train_iter);
+
+
+	if (poll_stats->thresh[LOW].trained == true
+			&& poll_stats->thresh[MED].trained == true
+			&& poll_stats->thresh[HGH].trained == true) {
+
+		set_state(poll_stats, MED_NORMAL);
+
+		RTE_LOG(INFO, POWER, "LOW threshold is %"PRIu64"\n",
+				poll_stats->thresh[LOW].base_edpi);
+
+		RTE_LOG(INFO, POWER, "MED threshold is %"PRIu64"\n",
+				poll_stats->thresh[MED].base_edpi);
+
+
+		RTE_LOG(INFO, POWER, "HIGH threshold is %"PRIu64"\n",
+				poll_stats->thresh[HGH].base_edpi);
+
+		RTE_LOG(INFO, POWER, "Training is Complete for %d\n",
+				poll_stats->lcore_id);
+	}
+
+	return 0;
+}
+
+void __rte_experimental
+rte_empty_poll_detection(struct rte_timer *tim, void *arg)
+{
+
+	uint32_t i;
+
+	struct priority_worker *poll_stats;
+
+	RTE_SET_USED(tim);
+
+	RTE_SET_USED(arg);
+
+	for (i = 0; i < NUM_NODES; i++) {
+
+		poll_stats = &(ep_params->wrk_data.wrk_stats[i]);
+
+		if (rte_lcore_is_enabled(poll_stats->lcore_id) == 0)
+			continue;
+
+		switch (poll_stats->queue_state) {
+		case(TRAINING):
+			empty_poll_training(poll_stats,
+					ep_params->max_train_iter);
+			break;
+
+		case(HGH_BUSY):
+		case(MED_NORMAL):
+			update_stats_normal(poll_stats);
+			break;
+
+		case(LOW_PURGE):
+			break;
+		default:
+			break;
+
+		}
+
+	}
+
+}
+
+int __rte_experimental
+rte_power_empty_poll_stat_init(struct ep_params **eptr, uint8_t *freq_tlb,
+		struct ep_policy *policy)
+{
+	uint32_t i;
+	/* Allocate the ep_params structure */
+	ep_params = rte_zmalloc_socket(NULL,
+			sizeof(struct ep_params),
+			0,
+			rte_socket_id());
+
+	if (!ep_params)
+		return -1;
+
+	if (freq_tlb == NULL) {
+		freq_index[LOW] = 14;
+		freq_index[MED] = 9;
+		freq_index[HGH] = 1;
+	} else {
+		freq_index[LOW] = freq_tlb[LOW];
+		freq_index[MED] = freq_tlb[MED];
+		freq_index[HGH] = freq_tlb[HGH];
+	}
+
+	RTE_LOG(INFO, POWER, "Initialize the Empty Poll\n");
+
+	/* Train for pre-defined period */
+	ep_params->max_train_iter = INTERVALS_PER_SECOND * SECONDS_TO_TRAIN_FOR;
+
+	struct stats_data *w = &ep_params->wrk_data;
+
+	*eptr = ep_params;
+
+	/* initialize all wrk_stats state */
+	for (i = 0; i < NUM_NODES; i++) {
+
+		if (rte_lcore_is_enabled(i) == 0)
+			continue;
+		/*init the freqs table */
+		total_avail_freqs[i] = rte_power_freqs(i,
+				avail_freqs[i],
+				NUM_FREQS);
+
+		RTE_LOG(INFO, POWER, "total avail freq is %d , lcoreid %d\n",
+				total_avail_freqs[i],
+				i);
+
+		if (get_freq_index(LOW) > total_avail_freqs[i])
+			return -1;
+
+		if (rte_get_master_lcore() != i) {
+			w->wrk_stats[i].lcore_id = i;
+			set_policy(&w->wrk_stats[i], policy);
+		}
+	}
+
+	return 0;
+}
+
+void __rte_experimental
+rte_power_empty_poll_stat_free(void)
+{
+
+	RTE_LOG(INFO, POWER, "Close the Empty Poll\n");
+
+	if (ep_params != NULL)
+		rte_free(ep_params);
+}
+
+int __rte_experimental
+rte_power_empty_poll_stat_update(unsigned int lcore_id)
+{
+	struct priority_worker *poll_stats;
+
+	if (lcore_id >= NUM_NODES)
+		return -1;
+
+	poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
+
+	if (poll_stats->lcore_id == 0)
+		poll_stats->lcore_id = lcore_id;
+
+	poll_stats->empty_dequeues++;
+
+	return 0;
+}
+
+int __rte_experimental
+rte_power_poll_stat_update(unsigned int lcore_id, uint8_t nb_pkt)
+{
+
+	struct priority_worker *poll_stats;
+
+	if (lcore_id >= NUM_NODES)
+		return -1;
+
+	poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
+
+	if (poll_stats->lcore_id == 0)
+		poll_stats->lcore_id = lcore_id;
+
+	poll_stats->num_dequeue_pkts += nb_pkt;
+
+	return 0;
+}
+
+
+uint64_t __rte_experimental
+rte_power_empty_poll_stat_fetch(unsigned int lcore_id)
+{
+	struct priority_worker *poll_stats;
+
+	if (lcore_id >= NUM_NODES)
+		return -1;
+
+	poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
+
+	if (poll_stats->lcore_id == 0)
+		poll_stats->lcore_id = lcore_id;
+
+	return poll_stats->empty_dequeues;
+}
+
+uint64_t __rte_experimental
+rte_power_poll_stat_fetch(unsigned int lcore_id)
+{
+	struct priority_worker *poll_stats;
+
+	if (lcore_id >= NUM_NODES)
+		return -1;
+
+	poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
+
+	if (poll_stats->lcore_id == 0)
+		poll_stats->lcore_id = lcore_id;
+
+	return poll_stats->num_dequeue_pkts;
+}
diff --git a/lib/librte_power/rte_power_empty_poll.h b/lib/librte_power/rte_power_empty_poll.h
new file mode 100644
index 00000000..c1ad5c24
--- /dev/null
+++ b/lib/librte_power/rte_power_empty_poll.h
@@ -0,0 +1,223 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2018 Intel Corporation
+ */
+
+#ifndef _RTE_EMPTY_POLL_H
+#define _RTE_EMPTY_POLL_H
+
+/**
+ * @file
+ * RTE Power Management
+ */
+#include <stdint.h>
+#include <stdbool.h>
+
+#include <rte_common.h>
+#include <rte_byteorder.h>
+#include <rte_log.h>
+#include <rte_string_fns.h>
+#include <rte_power.h>
+#include <rte_timer.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define NUM_FREQS  RTE_MAX_LCORE_FREQS
+
+#define BINS_AV 4 /* Has to be ^2 */
+
+#define DROP (NUM_DIRECTIONS * NUM_DEVICES)
+
+#define NUM_PRIORITIES          2
+
+#define NUM_NODES         256  /* Max core number*/
+
+/* Processor Power State */
+enum freq_val {
+	LOW,
+	MED,
+	HGH,
+	NUM_FREQ = NUM_FREQS
+};
+
+
+/* Queue Polling State */
+enum queue_state {
+	TRAINING, /* NO TRAFFIC */
+	MED_NORMAL,   /* MED */
+	HGH_BUSY,     /* HIGH */
+	LOW_PURGE,    /* LOW */
+};
+
+/* Queue Stats */
+struct freq_threshold {
+
+	uint64_t base_edpi;
+	bool trained;
+	uint32_t threshold_percent;
+	uint32_t cur_train_iter;
+};
+
+/* Each Worder Thread Empty Poll Stats */
+struct priority_worker {
+
+	/* Current dequeue and throughput counts */
+	/* These 2 are written to by the worker threads */
+	/* So keep them on their own cache line */
+	uint64_t empty_dequeues;
+	uint64_t num_dequeue_pkts;
+
+	enum queue_state queue_state;
+
+	uint64_t empty_dequeues_prev;
+	uint64_t num_dequeue_pkts_prev;
+
+	/* Used for training only */
+	struct freq_threshold thresh[NUM_FREQ];
+	enum freq_val cur_freq;
+
+	/* bucket arrays to calculate the averages */
+	/* edpi mean empty poll counter difference per interval */
+	uint64_t edpi_av[BINS_AV];
+	/* empty poll counter */
+	uint32_t ec;
+	/* ppi mean valid poll counter per interval */
+	uint64_t ppi_av[BINS_AV];
+	/* valid poll counter */
+	uint32_t pc;
+
+	uint32_t lcore_id;
+	uint32_t iter_counter;
+	uint32_t threshold_ctr;
+	uint32_t display_ctr;
+	uint8_t  dev_id;
+
+} __rte_cache_aligned;
+
+
+struct stats_data {
+
+	struct priority_worker wrk_stats[NUM_NODES];
+
+	/* flag to stop rx threads processing packets until training over */
+	bool start_rx;
+
+};
+
+/* Empty Poll Parameters */
+struct ep_params {
+
+	/* Timer related stuff */
+	uint64_t interval_ticks;
+	uint32_t max_train_iter;
+
+	struct rte_timer timer0;
+	struct stats_data wrk_data;
+};
+
+
+/* Sample App Init information */
+struct ep_policy {
+
+	uint64_t med_base_edpi;
+	uint64_t hgh_base_edpi;
+
+	enum queue_state state;
+};
+
+
+
+/**
+ * Initialize the power management system.
+ *
+ * @param eptr
+ *   the structure of empty poll configuration
+ * @param freq_tlb
+ *   the power state/frequency mapping table
+ * @param policy
+ *   the initialization policy from sample app
+ *
+ * @return
+ *  - 0 on success.
+ *  - Negative on error.
+ */
+int __rte_experimental
+rte_power_empty_poll_stat_init(struct ep_params **eptr, uint8_t *freq_tlb,
+		struct ep_policy *policy);
+
+/**
+ * Free the resource hold by power management system.
+ */
+void __rte_experimental
+rte_power_empty_poll_stat_free(void);
+
+/**
+ * Update specific core empty poll counter
+ * It's not thread safe.
+ *
+ * @param lcore_id
+ *  lcore id
+ *
+ * @return
+ *  - 0 on success.
+ *  - Negative on error.
+ */
+int __rte_experimental
+rte_power_empty_poll_stat_update(unsigned int lcore_id);
+
+/**
+ * Update specific core valid poll counter, not thread safe.
+ *
+ * @param lcore_id
+ *  lcore id.
+ * @param nb_pkt
+ *  The packet number of one valid poll.
+ *
+ * @return
+ *  - 0 on success.
+ *  - Negative on error.
+ */
+int __rte_experimental
+rte_power_poll_stat_update(unsigned int lcore_id, uint8_t nb_pkt);
+
+/**
+ * Fetch specific core empty poll counter.
+ *
+ * @param lcore_id
+ *  lcore id
+ *
+ * @return
+ *  Current lcore empty poll counter value.
+ */
+uint64_t __rte_experimental
+rte_power_empty_poll_stat_fetch(unsigned int lcore_id);
+
+/**
+ * Fetch specific core valid poll counter.
+ *
+ * @param lcore_id
+ *  lcore id
+ *
+ * @return
+ *  Current lcore valid poll counter value.
+ */
+uint64_t __rte_experimental
+rte_power_poll_stat_fetch(unsigned int lcore_id);
+
+/**
+ * Empty poll  state change detection function
+ *
+ * @param  tim
+ *  The timer structure
+ * @param  arg
+ *  The customized parameter
+ */
+void  __rte_experimental
+rte_empty_poll_detection(struct rte_timer *tim, void *arg);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/librte_power/rte_power_version.map b/lib/librte_power/rte_power_version.map
index dd587dfb..17a083b2 100644
--- a/lib/librte_power/rte_power_version.map
+++ b/lib/librte_power/rte_power_version.map
@@ -33,3 +33,16 @@ DPDK_18.08 {
 	rte_power_get_capabilities;
 
 } DPDK_17.11;
+
+EXPERIMENTAL {
+        global:
+
+        rte_empty_poll_detection;
+        rte_power_empty_poll_stat_fetch;
+        rte_power_empty_poll_stat_free;
+        rte_power_empty_poll_stat_init;
+        rte_power_empty_poll_stat_update;
+        rte_power_poll_stat_fetch;
+        rte_power_poll_stat_update;
+
+};
diff --git a/lib/librte_rawdev/rte_rawdev.c b/lib/librte_rawdev/rte_rawdev.c
index 62b6b97e..9f1e3592 100644
--- a/lib/librte_rawdev/rte_rawdev.c
+++ b/lib/librte_rawdev/rte_rawdev.c
@@ -35,21 +35,19 @@
 /* dynamic log identifier */
 int librawdev_logtype;
 
-struct rte_rawdev rte_rawdevices[RTE_RAWDEV_MAX_DEVS];
+static struct rte_rawdev rte_rawdevices[RTE_RAWDEV_MAX_DEVS];
 
-struct rte_rawdev *rte_rawdevs = &rte_rawdevices[0];
+struct rte_rawdev *rte_rawdevs = rte_rawdevices;
 
 static struct rte_rawdev_global rawdev_globals = {
 	.nb_devs		= 0
 };
 
-struct rte_rawdev_global *rte_rawdev_globals = &rawdev_globals;
-
 /* Raw device, northbound API implementation */
 uint8_t
 rte_rawdev_count(void)
 {
-	return rte_rawdev_globals->nb_devs;
+	return rawdev_globals.nb_devs;
 }
 
 uint16_t
@@ -60,7 +58,7 @@ rte_rawdev_get_dev_id(const char *name)
 	if (!name)
 		return -EINVAL;
 
-	for (i = 0; i < rte_rawdev_globals->nb_devs; i++)
+	for (i = 0; i < rawdev_globals.nb_devs; i++)
 		if ((strcmp(rte_rawdevices[i].name, name)
 				== 0) &&
 				(rte_rawdevices[i].attached ==
diff --git a/lib/librte_rawdev/rte_rawdev_pmd.h b/lib/librte_rawdev/rte_rawdev_pmd.h
index bb9bbc35..811e51d0 100644
--- a/lib/librte_rawdev/rte_rawdev_pmd.h
+++ b/lib/librte_rawdev/rte_rawdev_pmd.h
@@ -73,8 +73,6 @@ struct rte_rawdev_global {
 	uint16_t nb_devs;
 };
 
-extern struct rte_rawdev_global *rte_rawdev_globals;
-/** Pointer to global raw devices data structure. */
 extern struct rte_rawdev *rte_rawdevs;
 /** The pool of rte_rawdev structures. */
 
diff --git a/lib/librte_ring/meson.build b/lib/librte_ring/meson.build
index ca8a435e..ab8b0b46 100644
--- a/lib/librte_ring/meson.build
+++ b/lib/librte_ring/meson.build
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
+version = 2
 sources = files('rte_ring.c')
 headers = files('rte_ring.h',
 		'rte_ring_c11_mem.h',
diff --git a/lib/librte_ring/rte_ring.h b/lib/librte_ring/rte_ring.h
index 7a731d07..af5444a9 100644
--- a/lib/librte_ring/rte_ring.h
+++ b/lib/librte_ring/rte_ring.h
@@ -303,11 +303,11 @@ void rte_ring_dump(FILE *f, const struct rte_ring *r);
  * There are 2 choices for the users
  * 1.use rmb() memory barrier
  * 2.use one-direcion load_acquire/store_release barrier,defined by
- * CONFIG_RTE_RING_USE_C11_MEM_MODEL=y
+ * CONFIG_RTE_USE_C11_MEM_MODEL=y
  * It depends on performance test results.
  * By default, move common functions to rte_ring_generic.h
  */
-#ifdef RTE_RING_USE_C11_MEM_MODEL
+#ifdef RTE_USE_C11_MEM_MODEL
 #include "rte_ring_c11_mem.h"
 #else
 #include "rte_ring_generic.h"
diff --git a/lib/librte_sched/Makefile b/lib/librte_sched/Makefile
index 55d9c698..46c53ed7 100644
--- a/lib/librte_sched/Makefile
+++ b/lib/librte_sched/Makefile
@@ -11,8 +11,6 @@ LIB = librte_sched.a
 CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS)
 
-CFLAGS_rte_red.o := -D_GNU_SOURCE
-
 LDLIBS += -lm
 LDLIBS += -lrt
 LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_net
diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c
index 9269e5c7..587d5e60 100644
--- a/lib/librte_sched/rte_sched.c
+++ b/lib/librte_sched/rte_sched.c
@@ -329,7 +329,7 @@ rte_sched_port_check_params(struct rte_sched_port_params *params)
 		return -1;
 
 	/* socket */
-	if ((params->socket < 0) || (params->socket >= RTE_MAX_NUMA_NODES))
+	if (params->socket < 0)
 		return -3;
 
 	/* rate */
@@ -633,7 +633,8 @@ rte_sched_port_config(struct rte_sched_port_params *params)
 		return NULL;
 
 	/* Allocate memory to store the data structures */
-	port = rte_zmalloc("qos_params", mem_size, RTE_CACHE_LINE_SIZE);
+	port = rte_zmalloc_socket("qos_params", mem_size, RTE_CACHE_LINE_SIZE,
+		params->socket);
 	if (port == NULL)
 		return NULL;
 
diff --git a/lib/librte_security/rte_security.c b/lib/librte_security/rte_security.c
index 1954960a..c6355de9 100644
--- a/lib/librte_security/rte_security.c
+++ b/lib/librte_security/rte_security.c
@@ -131,6 +131,10 @@ rte_security_capability_get(struct rte_security_ctx *instance,
 					capability->ipsec.direction ==
 							idx->ipsec.direction)
 					return capability;
+			} else if (idx->protocol == RTE_SECURITY_PROTOCOL_PDCP) {
+				if (capability->pdcp.domain ==
+							idx->pdcp.domain)
+					return capability;
 			}
 		}
 	}
diff --git a/lib/librte_security/rte_security.h b/lib/librte_security/rte_security.h
index b0d1b97e..1431b4df 100644
--- a/lib/librte_security/rte_security.h
+++ b/lib/librte_security/rte_security.h
@@ -207,6 +207,64 @@ struct rte_security_macsec_xform {
 };
 
 /**
+ * PDCP Mode of session
+ */
+enum rte_security_pdcp_domain {
+	RTE_SECURITY_PDCP_MODE_CONTROL,	/**< PDCP control plane */
+	RTE_SECURITY_PDCP_MODE_DATA,	/**< PDCP data plane */
+};
+
+/** PDCP Frame direction */
+enum rte_security_pdcp_direction {
+	RTE_SECURITY_PDCP_UPLINK,	/**< Uplink */
+	RTE_SECURITY_PDCP_DOWNLINK,	/**< Downlink */
+};
+
+/** PDCP Sequence Number Size selectors */
+enum rte_security_pdcp_sn_size {
+	/** PDCP_SN_SIZE_5: 5bit sequence number */
+	RTE_SECURITY_PDCP_SN_SIZE_5 = 5,
+	/** PDCP_SN_SIZE_7: 7bit sequence number */
+	RTE_SECURITY_PDCP_SN_SIZE_7 = 7,
+	/** PDCP_SN_SIZE_12: 12bit sequence number */
+	RTE_SECURITY_PDCP_SN_SIZE_12 = 12,
+	/** PDCP_SN_SIZE_15: 15bit sequence number */
+	RTE_SECURITY_PDCP_SN_SIZE_15 = 15,
+	/** PDCP_SN_SIZE_18: 18bit sequence number */
+	RTE_SECURITY_PDCP_SN_SIZE_18 = 18
+};
+
+/**
+ * PDCP security association configuration data.
+ *
+ * This structure contains data required to create a PDCP security session.
+ */
+struct rte_security_pdcp_xform {
+	int8_t bearer;	/**< PDCP bearer ID */
+	/** Enable in order delivery, this field shall be set only if
+	 * driver/HW is capable. See RTE_SECURITY_PDCP_ORDERING_CAP.
+	 */
+	uint8_t en_ordering;
+	/** Notify driver/HW to detect and remove duplicate packets.
+	 * This field should be set only when driver/hw is capable.
+	 * See RTE_SECURITY_PDCP_DUP_DETECT_CAP.
+	 */
+	uint8_t remove_duplicates;
+	/** PDCP mode of operation: Control or data */
+	enum rte_security_pdcp_domain domain;
+	/** PDCP Frame Direction 0:UL 1:DL */
+	enum rte_security_pdcp_direction pkt_dir;
+	/** Sequence number size, 5/7/12/15/18 */
+	enum rte_security_pdcp_sn_size sn_size;
+	/** Starting Hyper Frame Number to be used together with the SN
+	 * from the PDCP frames
+	 */
+	uint32_t hfn;
+	/** HFN Threshold for key renegotiation */
+	uint32_t hfn_threshold;
+};
+
+/**
  * Security session action type.
  */
 enum rte_security_session_action_type {
@@ -232,6 +290,8 @@ enum rte_security_session_protocol {
 	/**< IPsec Protocol */
 	RTE_SECURITY_PROTOCOL_MACSEC,
 	/**< MACSec Protocol */
+	RTE_SECURITY_PROTOCOL_PDCP,
+	/**< PDCP Protocol */
 };
 
 /**
@@ -246,6 +306,7 @@ struct rte_security_session_conf {
 	union {
 		struct rte_security_ipsec_xform ipsec;
 		struct rte_security_macsec_xform macsec;
+		struct rte_security_pdcp_xform pdcp;
 	};
 	/**< Configuration parameters for security session */
 	struct rte_crypto_sym_xform *crypto_xform;
@@ -413,6 +474,10 @@ struct rte_security_ipsec_stats {
 
 };
 
+struct rte_security_pdcp_stats {
+	uint64_t reserved;
+};
+
 struct rte_security_stats {
 	enum rte_security_session_protocol protocol;
 	/**< Security protocol to be configured */
@@ -421,6 +486,7 @@ struct rte_security_stats {
 	union {
 		struct rte_security_macsec_stats macsec;
 		struct rte_security_ipsec_stats ipsec;
+		struct rte_security_pdcp_stats pdcp;
 	};
 };
 
@@ -465,6 +531,13 @@ struct rte_security_capability {
 			int dummy;
 		} macsec;
 		/**< MACsec capability */
+		struct {
+			enum rte_security_pdcp_domain domain;
+			/**< PDCP mode of operation: Control or data */
+			uint32_t capa_flags;
+			/**< Capabilitity flags, see RTE_SECURITY_PDCP_* */
+		} pdcp;
+		/**< PDCP capability */
 	};
 
 	const struct rte_cryptodev_capabilities *crypto_capabilities;
@@ -474,6 +547,19 @@ struct rte_security_capability {
 	/**< Device offload flags */
 };
 
+/** Underlying Hardware/driver which support PDCP may or may not support
+ * packet ordering. Set RTE_SECURITY_PDCP_ORDERING_CAP if it support.
+ * If it is not set, driver/HW assumes packets received are in order
+ * and it will be application's responsibility to maintain ordering.
+ */
+#define RTE_SECURITY_PDCP_ORDERING_CAP		0x00000001
+
+/** Underlying Hardware/driver which support PDCP may or may not detect
+ * duplicate packet. Set RTE_SECURITY_PDCP_DUP_DETECT_CAP if it support.
+ * If it is not set, driver/HW assumes there is no duplicate packet received.
+ */
+#define RTE_SECURITY_PDCP_DUP_DETECT_CAP	0x00000002
+
 #define RTE_SECURITY_TX_OLOAD_NEED_MDATA	0x00000001
 /**< HW needs metadata update, see rte_security_set_pkt_metadata().
  */
@@ -506,6 +592,10 @@ struct rte_security_capability_idx {
 			enum rte_security_ipsec_sa_mode mode;
 			enum rte_security_ipsec_sa_direction direction;
 		} ipsec;
+		struct {
+			enum rte_security_pdcp_domain domain;
+			uint32_t capa_flags;
+		} pdcp;
 	};
 };
 
diff --git a/lib/librte_table/Makefile b/lib/librte_table/Makefile
index 276d476a..f935678a 100644
--- a/lib/librte_table/Makefile
+++ b/lib/librte_table/Makefile
@@ -46,6 +46,8 @@ SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_table_acl.h
 endif
 SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_table_hash.h
 SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_table_hash_cuckoo.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_table_hash_func.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_table_hash_func_arm64.h
 SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_lru.h
 ifeq ($(CONFIG_RTE_ARCH_X86),y)
 SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_lru_x86.h
diff --git a/lib/librte_table/meson.build b/lib/librte_table/meson.build
index 8b2f8413..6ae3cd6c 100644
--- a/lib/librte_table/meson.build
+++ b/lib/librte_table/meson.build
@@ -19,6 +19,8 @@ headers = files('rte_table.h',
 		'rte_table_lpm_ipv6.h',
 		'rte_table_hash.h',
 		'rte_table_hash_cuckoo.h',
+		'rte_table_hash_func.h',
+		'rte_table_hash_func_arm64.h',
 		'rte_lru.h',
 		'rte_table_array.h',
 		'rte_table_stub.h')
diff --git a/lib/librte_table/rte_table_hash_func.h b/lib/librte_table/rte_table_hash_func.h
new file mode 100644
index 00000000..02296eab
--- /dev/null
+++ b/lib/librte_table/rte_table_hash_func.h
@@ -0,0 +1,245 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2018 Intel Corporation
+ */
+
+#ifndef __INCLUDE_RTE_TABLE_HASH_FUNC_H__
+#define __INCLUDE_RTE_TABLE_HASH_FUNC_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+#include <rte_compat.h>
+#include <rte_common.h>
+
+static inline uint64_t __rte_experimental
+rte_crc32_u64_generic(uint64_t crc, uint64_t value)
+{
+	int i;
+
+	crc = (crc & 0xFFFFFFFFLLU) ^ value;
+	for (i = 63; i >= 0; i--) {
+		uint64_t mask;
+
+		mask = -(crc & 1LLU);
+		crc = (crc >> 1LLU) ^ (0x82F63B78LLU & mask);
+	}
+
+	return crc;
+}
+
+#if defined(RTE_ARCH_X86_64)
+
+#include <x86intrin.h>
+
+static inline uint64_t
+rte_crc32_u64(uint64_t crc, uint64_t v)
+{
+	return _mm_crc32_u64(crc, v);
+}
+
+#elif defined(RTE_ARCH_ARM64)
+#include "rte_table_hash_func_arm64.h"
+#else
+
+static inline uint64_t
+rte_crc32_u64(uint64_t crc, uint64_t v)
+{
+	return rte_crc32_u64_generic(crc, v);
+}
+
+#endif
+
+static inline uint64_t __rte_experimental
+rte_table_hash_crc_key8(void *key, void *mask, __rte_unused uint32_t key_size,
+	uint64_t seed)
+{
+	uint64_t *k = key;
+	uint64_t *m = mask;
+	uint64_t crc0;
+
+	crc0 = rte_crc32_u64(seed, k[0] & m[0]);
+
+	return crc0;
+}
+
+static inline uint64_t __rte_experimental
+rte_table_hash_crc_key16(void *key, void *mask, __rte_unused uint32_t key_size,
+	uint64_t seed)
+{
+	uint64_t *k = key;
+	uint64_t *m = mask;
+	uint64_t k0, crc0, crc1;
+
+	k0 = k[0] & m[0];
+
+	crc0 = rte_crc32_u64(k0, seed);
+	crc1 = rte_crc32_u64(k0 >> 32, k[1] & m[1]);
+
+	crc0 ^= crc1;
+
+	return crc0;
+}
+
+static inline uint64_t __rte_experimental
+rte_table_hash_crc_key24(void *key, void *mask, __rte_unused uint32_t key_size,
+	uint64_t seed)
+{
+	uint64_t *k = key;
+	uint64_t *m = mask;
+	uint64_t k0, k2, crc0, crc1;
+
+	k0 = k[0] & m[0];
+	k2 = k[2] & m[2];
+
+	crc0 = rte_crc32_u64(k0, seed);
+	crc1 = rte_crc32_u64(k0 >> 32, k[1] & m[1]);
+
+	crc0 = rte_crc32_u64(crc0, k2);
+
+	crc0 ^= crc1;
+
+	return crc0;
+}
+
+static inline uint64_t __rte_experimental
+rte_table_hash_crc_key32(void *key, void *mask, __rte_unused uint32_t key_size,
+	uint64_t seed)
+{
+	uint64_t *k = key;
+	uint64_t *m = mask;
+	uint64_t k0, k2, crc0, crc1, crc2, crc3;
+
+	k0 = k[0] & m[0];
+	k2 = k[2] & m[2];
+
+	crc0 = rte_crc32_u64(k0, seed);
+	crc1 = rte_crc32_u64(k0 >> 32, k[1] & m[1]);
+
+	crc2 = rte_crc32_u64(k2, k[3] & m[3]);
+	crc3 = k2 >> 32;
+
+	crc0 = rte_crc32_u64(crc0, crc1);
+	crc1 = rte_crc32_u64(crc2, crc3);
+
+	crc0 ^= crc1;
+
+	return crc0;
+}
+
+static inline uint64_t __rte_experimental
+rte_table_hash_crc_key40(void *key, void *mask, __rte_unused uint32_t key_size,
+	uint64_t seed)
+{
+	uint64_t *k = key;
+	uint64_t *m = mask;
+	uint64_t k0, k2, crc0, crc1, crc2, crc3;
+
+	k0 = k[0] & m[0];
+	k2 = k[2] & m[2];
+
+	crc0 = rte_crc32_u64(k0, seed);
+	crc1 = rte_crc32_u64(k0 >> 32, k[1] & m[1]);
+
+	crc2 = rte_crc32_u64(k2, k[3] & m[3]);
+	crc3 = rte_crc32_u64(k2 >> 32, k[4] & m[4]);
+
+	crc0 = rte_crc32_u64(crc0, crc1);
+	crc1 = rte_crc32_u64(crc2, crc3);
+
+	crc0 ^= crc1;
+
+	return crc0;
+}
+
+static inline uint64_t __rte_experimental
+rte_table_hash_crc_key48(void *key, void *mask, __rte_unused uint32_t key_size,
+	uint64_t seed)
+{
+	uint64_t *k = key;
+	uint64_t *m = mask;
+	uint64_t k0, k2, k5, crc0, crc1, crc2, crc3;
+
+	k0 = k[0] & m[0];
+	k2 = k[2] & m[2];
+	k5 = k[5] & m[5];
+
+	crc0 = rte_crc32_u64(k0, seed);
+	crc1 = rte_crc32_u64(k0 >> 32, k[1] & m[1]);
+
+	crc2 = rte_crc32_u64(k2, k[3] & m[3]);
+	crc3 = rte_crc32_u64(k2 >> 32, k[4] & m[4]);
+
+	crc0 = rte_crc32_u64(crc0, (crc1 << 32) ^ crc2);
+	crc1 = rte_crc32_u64(crc3, k5);
+
+	crc0 ^= crc1;
+
+	return crc0;
+}
+
+static inline uint64_t __rte_experimental
+rte_table_hash_crc_key56(void *key, void *mask, __rte_unused uint32_t key_size,
+	uint64_t seed)
+{
+	uint64_t *k = key;
+	uint64_t *m = mask;
+	uint64_t k0, k2, k5, crc0, crc1, crc2, crc3, crc4, crc5;
+
+	k0 = k[0] & m[0];
+	k2 = k[2] & m[2];
+	k5 = k[5] & m[5];
+
+	crc0 = rte_crc32_u64(k0, seed);
+	crc1 = rte_crc32_u64(k0 >> 32, k[1] & m[1]);
+
+	crc2 = rte_crc32_u64(k2, k[3] & m[3]);
+	crc3 = rte_crc32_u64(k2 >> 32, k[4] & m[4]);
+
+	crc4 = rte_crc32_u64(k5, k[6] & m[6]);
+	crc5 = k5 >> 32;
+
+	crc0 = rte_crc32_u64(crc0, (crc1 << 32) ^ crc2);
+	crc1 = rte_crc32_u64(crc3, (crc4 << 32) ^ crc5);
+
+	crc0 ^= crc1;
+
+	return crc0;
+}
+
+static inline uint64_t __rte_experimental
+rte_table_hash_crc_key64(void *key, void *mask, __rte_unused uint32_t key_size,
+	uint64_t seed)
+{
+	uint64_t *k = key;
+	uint64_t *m = mask;
+	uint64_t k0, k2, k5, crc0, crc1, crc2, crc3, crc4, crc5;
+
+	k0 = k[0] & m[0];
+	k2 = k[2] & m[2];
+	k5 = k[5] & m[5];
+
+	crc0 = rte_crc32_u64(k0, seed);
+	crc1 = rte_crc32_u64(k0 >> 32, k[1] & m[1]);
+
+	crc2 = rte_crc32_u64(k2, k[3] & m[3]);
+	crc3 = rte_crc32_u64(k2 >> 32, k[4] & m[4]);
+
+	crc4 = rte_crc32_u64(k5, k[6] & m[6]);
+	crc5 = rte_crc32_u64(k5 >> 32, k[7] & m[7]);
+
+	crc0 = rte_crc32_u64(crc0, (crc1 << 32) ^ crc2);
+	crc1 = rte_crc32_u64(crc3, (crc4 << 32) ^ crc5);
+
+	crc0 ^= crc1;
+
+	return crc0;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/librte_table/rte_table_hash_func_arm64.h b/lib/librte_table/rte_table_hash_func_arm64.h
new file mode 100644
index 00000000..eb04c1ff
--- /dev/null
+++ b/lib/librte_table/rte_table_hash_func_arm64.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017-2018 Linaro Limited
+ */
+
+#ifndef __INCLUDE_RTE_TABLE_HASH_FUNC_ARM64_H__
+#define __INCLUDE_RTE_TABLE_HASH_FUNC_ARM64_H__
+
+#define _CRC32CX(crc, val)	\
+	__asm__("crc32cx %w[c], %w[c], %x[v]":[c] "+r" (crc):[v] "r" (val))
+
+static inline uint64_t
+rte_crc32_u64(uint64_t crc, uint64_t v)
+{
+	uint32_t crc32 = crc;
+
+	_CRC32CX(crc32, v);
+
+	return crc32;
+}
+
+#endif
diff --git a/lib/librte_telemetry/Makefile b/lib/librte_telemetry/Makefile
new file mode 100644
index 00000000..1a050691
--- /dev/null
+++ b/lib/librte_telemetry/Makefile
@@ -0,0 +1,30 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Intel Corporation
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = librte_telemetry.a
+
+CFLAGS += -O3
+CFLAGS += -I$(SRCDIR)
+CFLAGS += -DALLOW_EXPERIMENTAL_API
+
+LDLIBS += -lrte_eal -lrte_ethdev
+LDLIBS += -lrte_metrics
+LDLIBS += -lpthread
+LDLIBS += -ljansson
+
+EXPORT_MAP := rte_telemetry_version.map
+
+LIBABIVER := 1
+
+# library source files
+SRCS-$(CONFIG_RTE_LIBRTE_TELEMETRY) := rte_telemetry.c
+SRCS-$(CONFIG_RTE_LIBRTE_TELEMETRY) += rte_telemetry_parser.c
+SRCS-$(CONFIG_RTE_LIBRTE_TELEMETRY) += rte_telemetry_parser_test.c
+
+# export include files
+SYMLINK-$(CONFIG_RTE_LIBRTE_TELEMETRY)-include := rte_telemetry.h
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_telemetry/meson.build b/lib/librte_telemetry/meson.build
new file mode 100644
index 00000000..9492f544
--- /dev/null
+++ b/lib/librte_telemetry/meson.build
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Intel Corporation
+
+sources = files('rte_telemetry.c', 'rte_telemetry_parser.c', 'rte_telemetry_parser_test.c')
+headers = files('rte_telemetry.h', 'rte_telemetry_internal.h', 'rte_telemetry_parser.h', 'rte_telemetry_parser_test.h')
+deps += ['metrics', 'ethdev']
+cflags += '-DALLOW_EXPERIMENTAL_API'
+
+jansson = cc.find_library('jansson', required: false)
+if jansson.found()
+	ext_deps += jansson
+	dpdk_app_link_libraries += ['telemetry']
+else
+	build = false
+endif
diff --git a/lib/librte_telemetry/rte_telemetry.c b/lib/librte_telemetry/rte_telemetry.c
new file mode 100644
index 00000000..016431f1
--- /dev/null
+++ b/lib/librte_telemetry/rte_telemetry.c
@@ -0,0 +1,1813 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <unistd.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <jansson.h>
+
+#include <rte_eal.h>
+#include <rte_ethdev.h>
+#include <rte_metrics.h>
+#include <rte_option.h>
+#include <rte_string_fns.h>
+
+#include "rte_telemetry.h"
+#include "rte_telemetry_internal.h"
+#include "rte_telemetry_parser.h"
+#include "rte_telemetry_parser_test.h"
+#include "rte_telemetry_socket_tests.h"
+
+#define BUF_SIZE 1024
+#define ACTION_POST 1
+#define SLEEP_TIME 10
+
+#define SELFTEST_VALID_CLIENT "/var/run/dpdk/valid_client"
+#define SELFTEST_INVALID_CLIENT "/var/run/dpdk/invalid_client"
+#define SOCKET_TEST_CLIENT_PATH "/var/run/dpdk/client"
+
+static telemetry_impl *static_telemetry;
+
+struct telemetry_message_test {
+	char *test_name;
+	int (*test_func_ptr)(struct telemetry_impl *telemetry, int fd);
+};
+
+struct json_data {
+	char *status_code;
+	char *data;
+	int port;
+	char *stat_name;
+	int stat_value;
+};
+
+static void
+rte_telemetry_get_runtime_dir(char *socket_path, size_t size)
+{
+	snprintf(socket_path, size, "%s/telemetry", rte_eal_get_runtime_dir());
+}
+
+int32_t
+rte_telemetry_is_port_active(int port_id)
+{
+	int ret;
+
+	ret = rte_eth_find_next(port_id);
+	if (ret == port_id)
+		return 1;
+
+	TELEMETRY_LOG_ERR("port_id: %d is invalid, not active",
+		port_id);
+
+	return 0;
+}
+
+static int32_t
+rte_telemetry_update_metrics_ethdev(struct telemetry_impl *telemetry,
+	uint16_t port_id, int reg_start_index)
+{
+	int ret, num_xstats, i;
+	struct rte_eth_xstat *eth_xstats;
+
+	if (!rte_eth_dev_is_valid_port(port_id)) {
+		TELEMETRY_LOG_ERR("port_id: %d is invalid", port_id);
+		ret = rte_telemetry_send_error_response(telemetry, -EINVAL);
+		if (ret < 0)
+			TELEMETRY_LOG_ERR("Could not send error");
+		return -1;
+	}
+
+	ret = rte_telemetry_is_port_active(port_id);
+	if (ret < 1) {
+		ret = rte_telemetry_send_error_response(telemetry, -EINVAL);
+		if (ret < 0)
+			TELEMETRY_LOG_ERR("Could not send error");
+		return -1;
+	}
+
+	num_xstats = rte_eth_xstats_get(port_id, NULL, 0);
+	if (num_xstats < 0) {
+		TELEMETRY_LOG_ERR("rte_eth_xstats_get(%u) failed: %d", port_id,
+				num_xstats);
+		ret = rte_telemetry_send_error_response(telemetry, -EPERM);
+		if (ret < 0)
+			TELEMETRY_LOG_ERR("Could not send error");
+		return -1;
+	}
+
+	eth_xstats = malloc(sizeof(struct rte_eth_xstat) * num_xstats);
+	if (eth_xstats == NULL) {
+		TELEMETRY_LOG_ERR("Failed to malloc memory for xstats");
+		ret = rte_telemetry_send_error_response(telemetry, -ENOMEM);
+		if (ret < 0)
+			TELEMETRY_LOG_ERR("Could not send error");
+		return -1;
+	}
+
+	ret = rte_eth_xstats_get(port_id, eth_xstats, num_xstats);
+	if (ret < 0 || ret > num_xstats) {
+		free(eth_xstats);
+		TELEMETRY_LOG_ERR("rte_eth_xstats_get(%u) len%i failed: %d",
+				port_id, num_xstats, ret);
+		ret = rte_telemetry_send_error_response(telemetry, -EPERM);
+		if (ret < 0)
+			TELEMETRY_LOG_ERR("Could not send error");
+		return -1;
+	}
+
+	uint64_t xstats_values[num_xstats];
+	for (i = 0; i < num_xstats; i++)
+		xstats_values[i] = eth_xstats[i].value;
+
+	ret = rte_metrics_update_values(port_id, reg_start_index, xstats_values,
+			num_xstats);
+	if (ret < 0) {
+		TELEMETRY_LOG_ERR("Could not update metrics values");
+		ret = rte_telemetry_send_error_response(telemetry, -EPERM);
+		if (ret < 0)
+			TELEMETRY_LOG_ERR("Could not send error");
+		free(eth_xstats);
+		return -1;
+	}
+
+	free(eth_xstats);
+	return 0;
+}
+
+int32_t
+rte_telemetry_write_to_socket(struct telemetry_impl *telemetry,
+	const char *json_string)
+{
+	int ret;
+
+	if (telemetry == NULL) {
+		TELEMETRY_LOG_ERR("Could not initialise TELEMETRY_API");
+		return -1;
+	}
+
+	if (telemetry->request_client == NULL) {
+		TELEMETRY_LOG_ERR("No client has been chosen to write to");
+		return -1;
+	}
+
+	if (json_string == NULL) {
+		TELEMETRY_LOG_ERR("Invalid JSON string!");
+		return -1;
+	}
+
+	ret = send(telemetry->request_client->fd,
+			json_string, strlen(json_string), 0);
+	if (ret < 0) {
+		TELEMETRY_LOG_ERR("Failed to write to socket for client: %s",
+				telemetry->request_client->file_path);
+		return -1;
+	}
+
+	return 0;
+}
+
+int32_t
+rte_telemetry_send_error_response(struct telemetry_impl *telemetry,
+	int error_type)
+{
+	int ret;
+	const char *status_code, *json_buffer;
+	json_t *root;
+
+	if (error_type == -EPERM)
+		status_code = "Status Error: Unknown";
+	else if (error_type == -EINVAL)
+		status_code = "Status Error: Invalid Argument 404";
+	else if (error_type == -ENOMEM)
+		status_code = "Status Error: Memory Allocation Error";
+	else {
+		TELEMETRY_LOG_ERR("Invalid error type");
+		return -EINVAL;
+	}
+
+	root = json_object();
+
+	if (root == NULL) {
+		TELEMETRY_LOG_ERR("Could not create root JSON object");
+		return -EPERM;
+	}
+
+	ret = json_object_set_new(root, "status_code", json_string(status_code));
+	if (ret < 0) {
+		TELEMETRY_LOG_ERR("Status code field cannot be set");
+		json_decref(root);
+		return -EPERM;
+	}
+
+	ret = json_object_set_new(root, "data", json_null());
+	if (ret < 0) {
+		TELEMETRY_LOG_ERR("Data field cannot be set");
+		json_decref(root);
+		return -EPERM;
+	}
+
+	json_buffer = json_dumps(root, 0);
+	json_decref(root);
+
+	ret = rte_telemetry_write_to_socket(telemetry, json_buffer);
+	if (ret < 0) {
+		TELEMETRY_LOG_ERR("Could not write to socket");
+		return -EPERM;
+	}
+
+	return 0;
+}
+
+static int
+rte_telemetry_get_metrics(struct telemetry_impl *telemetry, uint32_t port_id,
+	struct rte_metric_value *metrics, struct rte_metric_name *names,
+	int num_metrics)
+{
+	int ret, num_values;
+
+	if (num_metrics < 0) {
+		TELEMETRY_LOG_ERR("Invalid metrics count");
+		goto einval_fail;
+	} else if (num_metrics == 0) {
+		TELEMETRY_LOG_ERR("No metrics to display (none have been registered)");
+		goto eperm_fail;
+	}
+
+	if (metrics == NULL) {
+		TELEMETRY_LOG_ERR("Metrics must be initialised.");
+		goto einval_fail;
+	}
+
+	if (names == NULL) {
+		TELEMETRY_LOG_ERR("Names must be initialised.");
+		goto einval_fail;
+	}
+
+	ret = rte_metrics_get_names(names, num_metrics);
+	if (ret < 0 || ret > num_metrics) {
+		TELEMETRY_LOG_ERR("Cannot get metrics names");
+		goto eperm_fail;
+	}
+
+	num_values = rte_metrics_get_values(port_id, NULL, 0);
+	ret = rte_metrics_get_values(port_id, metrics, num_values);
+	if (ret < 0 || ret > num_values) {
+		TELEMETRY_LOG_ERR("Cannot get metrics values");
+		goto eperm_fail;
+	}
+
+	return 0;
+
+eperm_fail:
+	ret = rte_telemetry_send_error_response(telemetry, -EPERM);
+	if (ret < 0)
+		TELEMETRY_LOG_ERR("Could not send error");
+	return -1;
+
+einval_fail:
+	ret = rte_telemetry_send_error_response(telemetry, -EINVAL);
+	if (ret < 0)
+		TELEMETRY_LOG_ERR("Could not send error");
+	return -1;
+
+}
+
+static int32_t
+rte_telemetry_json_format_stat(struct telemetry_impl *telemetry, json_t *stats,
+	const char *metric_name, uint64_t metric_value)
+{
+	int ret;
+	json_t *stat = json_object();
+
+	if (stat == NULL) {
+		TELEMETRY_LOG_ERR("Could not create stat JSON object");
+		goto eperm_fail;
+	}
+
+	ret = json_object_set_new(stat, "name", json_string(metric_name));
+	if (ret < 0) {
+		TELEMETRY_LOG_ERR("Stat Name field cannot be set");
+		goto eperm_fail;
+	}
+
+	ret = json_object_set_new(stat, "value", json_integer(metric_value));
+	if (ret < 0) {
+		TELEMETRY_LOG_ERR("Stat Value field cannot be set");
+		goto eperm_fail;
+	}
+
+	ret = json_array_append_new(stats, stat);
+	if (ret < 0) {
+		TELEMETRY_LOG_ERR("Stat cannot be added to stats json array");
+		goto eperm_fail;
+	}
+
+	return 0;
+
+eperm_fail:
+	ret = rte_telemetry_send_error_response(telemetry, -EPERM);
+	if (ret < 0)
+		TELEMETRY_LOG_ERR("Could not send error");
+	return -1;
+
+}
+
+static int32_t
+rte_telemetry_json_format_port(struct telemetry_impl *telemetry,
+	uint32_t port_id, json_t *ports, uint32_t *metric_ids,
+	uint32_t num_metric_ids)
+{
+	struct rte_metric_value *metrics = 0;
+	struct rte_metric_name *names = 0;
+	int num_metrics, ret, err_ret;
+	json_t *port, *stats;
+	uint32_t i;
+
+	num_metrics = rte_metrics_get_names(NULL, 0);
+	if (num_metrics < 0) {
+		TELEMETRY_LOG_ERR("Cannot get metrics count");
+		goto einval_fail;
+	} else if (num_metrics == 0) {
+		TELEMETRY_LOG_ERR("No metrics to display (none have been registered)");
+		goto eperm_fail;
+	}
+
+	metrics = malloc(sizeof(struct rte_metric_value) * num_metrics);
+	names = malloc(sizeof(struct rte_metric_name) * num_metrics);
+	if (metrics == NULL || names == NULL) {
+		TELEMETRY_LOG_ERR("Cannot allocate memory");
+		free(metrics);
+		free(names);
+
+		err_ret = rte_telemetry_send_error_response(telemetry, -ENOMEM);
+		if (err_ret < 0)
+			TELEMETRY_LOG_ERR("Could not send error");
+		return -1;
+	}
+
+	ret  = rte_telemetry_get_metrics(telemetry, port_id, metrics, names,
+		num_metrics);
+	if (ret < 0) {
+		free(metrics);
+		free(names);
+		TELEMETRY_LOG_ERR("rte_telemetry_get_metrics failed");
+		return -1;
+	}
+
+	port = json_object();
+	stats = json_array();
+	if (port == NULL || stats == NULL) {
+		TELEMETRY_LOG_ERR("Could not create port/stats JSON objects");
+		goto eperm_fail;
+	}
+
+	ret = json_object_set_new(port, "port", json_integer(port_id));
+	if (ret < 0) {
+		TELEMETRY_LOG_ERR("Port field cannot be set");
+		goto eperm_fail;
+	}
+
+	for (i = 0; i < num_metric_ids; i++) {
+		int metric_id = metric_ids[i];
+		int metric_index = -1;
+		int metric_name_key = -1;
+		int32_t j;
+		uint64_t metric_value;
+
+		if (metric_id >= num_metrics) {
+			TELEMETRY_LOG_ERR("Metric_id: %d is not valid",
+					metric_id);
+			goto einval_fail;
+		}
+
+		for (j = 0; j < num_metrics; j++) {
+			if (metrics[j].key == metric_id) {
+				metric_name_key = metrics[j].key;
+				metric_index = j;
+				break;
+			}
+		}
+
+		const char *metric_name = names[metric_name_key].name;
+		metric_value = metrics[metric_index].value;
+
+		if (metric_name_key < 0 || metric_index < 0) {
+			TELEMETRY_LOG_ERR("Could not get metric name/index");
+			goto eperm_fail;
+		}
+
+		ret = rte_telemetry_json_format_stat(telemetry, stats,
+			metric_name, metric_value);
+		if (ret < 0) {
+			TELEMETRY_LOG_ERR("Format stat with id: %u failed",
+					metric_id);
+			free(metrics);
+			free(names);
+			return -1;
+		}
+	}
+
+	if (json_array_size(stats) == 0)
+		ret = json_object_set_new(port, "stats", json_null());
+	else
+		ret = json_object_set_new(port, "stats", stats);
+
+	if (ret < 0) {
+		TELEMETRY_LOG_ERR("Stats object cannot be set");
+		goto eperm_fail;
+	}
+
+	ret = json_array_append_new(ports, port);
+	if (ret < 0) {
+		TELEMETRY_LOG_ERR("Port object cannot be added to ports array");
+		goto eperm_fail;
+	}
+
+	free(metrics);
+	free(names);
+	return 0;
+
+eperm_fail:
+	free(metrics);
+	free(names);
+	ret = rte_telemetry_send_error_response(telemetry, -EPERM);
+	if (ret < 0)
+		TELEMETRY_LOG_ERR("Could not send error");
+	return -1;
+
+einval_fail:
+	free(metrics);
+	free(names);
+	ret = rte_telemetry_send_error_response(telemetry, -EINVAL);
+	if (ret < 0)
+		TELEMETRY_LOG_ERR("Could not send error");
+	return -1;
+}
+
+static int32_t
+rte_telemetry_encode_json_format(struct telemetry_impl *telemetry,
+	uint32_t *port_ids, uint32_t num_port_ids, uint32_t *metric_ids,
+	uint32_t num_metric_ids, char **json_buffer)
+{
+	int ret;
+	json_t *root, *ports;
+	uint32_t i;
+
+	if (num_port_ids <= 0 || num_metric_ids <= 0) {
+		TELEMETRY_LOG_ERR("Please provide port and metric ids to query");
+		goto einval_fail;
+	}
+
+	ports = json_array();
+	if (ports == NULL) {
+		TELEMETRY_LOG_ERR("Could not create ports JSON array");
+		goto eperm_fail;
+	}
+
+	for (i = 0; i < num_port_ids; i++) {
+		if (!rte_eth_dev_is_valid_port(port_ids[i])) {
+			TELEMETRY_LOG_ERR("Port: %d invalid", port_ids[i]);
+			goto einval_fail;
+		}
+	}
+
+	for (i = 0; i < num_port_ids; i++) {
+		ret = rte_telemetry_json_format_port(telemetry, port_ids[i],
+			ports, metric_ids, num_metric_ids);
+		if (ret < 0) {
+			TELEMETRY_LOG_ERR("Format port in JSON failed");
+			return -1;
+		}
+	}
+
+	root = json_object();
+	if (root == NULL) {
+		TELEMETRY_LOG_ERR("Could not create root JSON object");
+		goto eperm_fail;
+	}
+
+	ret = json_object_set_new(root, "status_code",
+		json_string("Status OK: 200"));
+	if (ret < 0) {
+		TELEMETRY_LOG_ERR("Status code field cannot be set");
+		goto eperm_fail;
+	}
+
+	ret = json_object_set_new(root, "data", ports);
+	if (ret < 0) {
+		TELEMETRY_LOG_ERR("Data field cannot be set");
+		goto eperm_fail;
+	}
+
+	*json_buffer = json_dumps(root, JSON_INDENT(2));
+	json_decref(root);
+	return 0;
+
+eperm_fail:
+	ret = rte_telemetry_send_error_response(telemetry, -EPERM);
+	if (ret < 0)
+		TELEMETRY_LOG_ERR("Could not send error");
+	return -1;
+
+einval_fail:
+	ret = rte_telemetry_send_error_response(telemetry, -EINVAL);
+	if (ret < 0)
+		TELEMETRY_LOG_ERR("Could not send error");
+	return -1;
+}
+
+int32_t
+rte_telemetry_send_ports_stats_values(uint32_t *metric_ids, int num_metric_ids,
+	uint32_t *port_ids, int num_port_ids, struct telemetry_impl *telemetry)
+{
+	int ret, i;
+	char *json_buffer = NULL;
+
+	if (telemetry == NULL) {
+		TELEMETRY_LOG_ERR("Invalid telemetry argument");
+		return -1;
+	}
+
+	if (metric_ids == NULL) {
+		TELEMETRY_LOG_ERR("Invalid metric_ids array");
+		goto einval_fail;
+	}
+
+	if (num_metric_ids < 0) {
+		TELEMETRY_LOG_ERR("Invalid num_metric_ids, must be positive");
+		goto einval_fail;
+	}
+
+	if (port_ids == NULL) {
+		TELEMETRY_LOG_ERR("Invalid port_ids array");
+		goto einval_fail;
+	}
+
+	if (num_port_ids < 0) {
+		TELEMETRY_LOG_ERR("Invalid num_port_ids, must be positive");
+		goto einval_fail;
+	}
+
+	for (i = 0; i < num_port_ids; i++) {
+		if (!rte_eth_dev_is_valid_port(port_ids[i])) {
+			TELEMETRY_LOG_ERR("Port: %d invalid", port_ids[i]);
+			goto einval_fail;
+		}
+
+		ret = rte_telemetry_update_metrics_ethdev(telemetry,
+				port_ids[i], telemetry->reg_index);
+		if (ret < 0) {
+			TELEMETRY_LOG_ERR("Failed to update ethdev metrics");
+			return -1;
+		}
+	}
+
+	ret = rte_telemetry_encode_json_format(telemetry, port_ids,
+		num_port_ids, metric_ids, num_metric_ids, &json_buffer);
+	if (ret < 0) {
+		TELEMETRY_LOG_ERR("JSON encode function failed");
+		return -1;
+	}
+
+	ret = rte_telemetry_write_to_socket(telemetry, json_buffer);
+	if (ret < 0) {
+		TELEMETRY_LOG_ERR("Could not write to socket");
+		return -1;
+	}
+
+	return 0;
+
+einval_fail:
+	ret = rte_telemetry_send_error_response(telemetry, -EINVAL);
+	if (ret < 0)
+		TELEMETRY_LOG_ERR("Could not send error");
+	return -1;
+}
+
+
+static int32_t
+rte_telemetry_reg_ethdev_to_metrics(uint16_t port_id)
+{
+	int ret, num_xstats, ret_val, i;
+	struct rte_eth_xstat *eth_xstats = NULL;
+	struct rte_eth_xstat_name *eth_xstats_names = NULL;
+
+	if (!rte_eth_dev_is_valid_port(port_id)) {
+		TELEMETRY_LOG_ERR("port_id: %d is invalid", port_id);
+		return -EINVAL;
+	}
+
+	num_xstats = rte_eth_xstats_get(port_id, NULL, 0);
+	if (num_xstats < 0) {
+		TELEMETRY_LOG_ERR("rte_eth_xstats_get(%u) failed: %d",
+				port_id, num_xstats);
+		return -EPERM;
+	}
+
+	eth_xstats = malloc(sizeof(struct rte_eth_xstat) * num_xstats);
+	if (eth_xstats == NULL) {
+		TELEMETRY_LOG_ERR("Failed to malloc memory for xstats");
+		return -ENOMEM;
+	}
+
+	ret = rte_eth_xstats_get(port_id, eth_xstats, num_xstats);
+	const char *xstats_names[num_xstats];
+	eth_xstats_names = malloc(sizeof(struct rte_eth_xstat_name) * num_xstats);
+	if (ret < 0 || ret > num_xstats) {
+		TELEMETRY_LOG_ERR("rte_eth_xstats_get(%u) len%i failed: %d",
+				port_id, num_xstats, ret);
+		ret_val = -EPERM;
+		goto free_xstats;
+	}
+
+	if (eth_xstats_names == NULL) {
+		TELEMETRY_LOG_ERR("Failed to malloc memory for xstats_names");
+		ret_val = -ENOMEM;
+		goto free_xstats;
+	}
+
+	ret = rte_eth_xstats_get_names(port_id, eth_xstats_names, num_xstats);
+	if (ret < 0 || ret > num_xstats) {
+		TELEMETRY_LOG_ERR("rte_eth_xstats_get_names(%u) len%i failed: %d",
+				port_id, num_xstats, ret);
+		ret_val = -EPERM;
+		goto free_xstats;
+	}
+
+	for (i = 0; i < num_xstats; i++)
+		xstats_names[i] = eth_xstats_names[eth_xstats[i].id].name;
+
+	ret_val = rte_metrics_reg_names(xstats_names, num_xstats);
+	if (ret_val < 0) {
+		TELEMETRY_LOG_ERR("rte_metrics_reg_names failed - metrics may already be registered");
+		ret_val = -1;
+		goto free_xstats;
+	}
+
+	goto free_xstats;
+
+free_xstats:
+	free(eth_xstats);
+	free(eth_xstats_names);
+	return ret_val;
+}
+
+static int32_t
+rte_telemetry_initial_accept(struct telemetry_impl *telemetry)
+{
+	uint16_t pid;
+	int ret;
+	int selftest = 0;
+
+	RTE_ETH_FOREACH_DEV(pid) {
+		telemetry->reg_index = rte_telemetry_reg_ethdev_to_metrics(pid);
+		break;
+	}
+
+	if (telemetry->reg_index < 0) {
+		TELEMETRY_LOG_ERR("Failed to register ethdev metrics");
+		return -1;
+	}
+
+	telemetry->metrics_register_done = 1;
+	if (selftest) {
+		ret = rte_telemetry_socket_messaging_testing(telemetry->reg_index,
+				telemetry->server_fd);
+		if (ret < 0)
+			return -1;
+
+		ret = rte_telemetry_parser_test(telemetry);
+		if (ret < 0) {
+			TELEMETRY_LOG_ERR("Parser Tests Failed");
+			return -1;
+		}
+
+		TELEMETRY_LOG_INFO("Success - All Parser Tests Passed");
+	}
+
+	return 0;
+}
+
+static int32_t
+rte_telemetry_read_client(struct telemetry_impl *telemetry)
+{
+	char buf[BUF_SIZE];
+	int ret, buffer_read;
+
+	buffer_read = read(telemetry->accept_fd, buf, BUF_SIZE-1);
+
+	if (buffer_read == -1) {
+		TELEMETRY_LOG_ERR("Read error");
+		return -1;
+	} else if (buffer_read == 0) {
+		goto close_socket;
+	} else {
+		buf[buffer_read] = '\0';
+		ret = rte_telemetry_parse_client_message(telemetry, buf);
+		if (ret < 0)
+			TELEMETRY_LOG_WARN("Parse message failed");
+		goto close_socket;
+	}
+
+close_socket:
+	if (close(telemetry->accept_fd) < 0) {
+		TELEMETRY_LOG_ERR("Close TELEMETRY socket failed");
+		free(telemetry);
+		return -EPERM;
+	}
+	telemetry->accept_fd = 0;
+
+	return 0;
+}
+
+static int32_t
+rte_telemetry_accept_new_client(struct telemetry_impl *telemetry)
+{
+	int ret;
+
+	if (telemetry->accept_fd <= 0) {
+		ret = listen(telemetry->server_fd, 1);
+		if (ret < 0) {
+			TELEMETRY_LOG_ERR("Listening error with server fd");
+			return -1;
+		}
+
+		telemetry->accept_fd = accept(telemetry->server_fd, NULL, NULL);
+		if (telemetry->accept_fd >= 0 &&
+			telemetry->metrics_register_done == 0) {
+			ret = rte_telemetry_initial_accept(telemetry);
+			if (ret < 0) {
+				TELEMETRY_LOG_ERR("Failed to run initial configurations/tests");
+				return -1;
+			}
+		}
+	} else {
+		ret = rte_telemetry_read_client(telemetry);
+		if (ret < 0) {
+			TELEMETRY_LOG_ERR("Failed to read socket buffer");
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+static int32_t
+rte_telemetry_read_client_sockets(struct telemetry_impl *telemetry)
+{
+	int ret;
+	telemetry_client *client;
+	char client_buf[BUF_SIZE];
+	int bytes;
+
+	TAILQ_FOREACH(client, &telemetry->client_list_head, client_list) {
+		bytes = read(client->fd, client_buf, BUF_SIZE-1);
+
+		if (bytes > 0) {
+			client_buf[bytes] = '\0';
+			telemetry->request_client = client;
+			ret = rte_telemetry_parse(telemetry, client_buf);
+			if (ret < 0) {
+				TELEMETRY_LOG_WARN("Parse socket input failed: %i",
+						ret);
+				return -1;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static int32_t
+rte_telemetry_run(void *userdata)
+{
+	int ret;
+	struct telemetry_impl *telemetry = userdata;
+
+	if (telemetry == NULL) {
+		TELEMETRY_LOG_WARN("TELEMETRY could not be initialised");
+		return -1;
+	}
+
+	ret = rte_telemetry_accept_new_client(telemetry);
+	if (ret < 0) {
+		TELEMETRY_LOG_ERR("Accept and read new client failed");
+		return -1;
+	}
+
+	ret = rte_telemetry_read_client_sockets(telemetry);
+	if (ret < 0) {
+		TELEMETRY_LOG_ERR("Client socket read failed");
+		return -1;
+	}
+
+	return 0;
+}
+
+static void
+*rte_telemetry_run_thread_func(void *userdata)
+{
+	int ret;
+	struct telemetry_impl *telemetry = userdata;
+
+	if (telemetry == NULL) {
+		TELEMETRY_LOG_ERR("%s passed a NULL instance", __func__);
+		pthread_exit(0);
+	}
+
+	while (telemetry->thread_status) {
+		rte_telemetry_run(telemetry);
+		ret = usleep(SLEEP_TIME);
+		if (ret < 0)
+			TELEMETRY_LOG_ERR("Calling thread could not be put to sleep");
+	}
+	pthread_exit(0);
+}
+
+static int32_t
+rte_telemetry_set_socket_nonblock(int fd)
+{
+	int flags;
+
+	if (fd < 0) {
+		TELEMETRY_LOG_ERR("Invalid fd provided");
+		return -1;
+	}
+
+	flags = fcntl(fd, F_GETFL, 0);
+	if (flags < 0)
+		flags = 0;
+
+	return fcntl(fd, F_SETFL, flags | O_NONBLOCK);
+}
+
+static int32_t
+rte_telemetry_create_socket(struct telemetry_impl *telemetry)
+{
+	int ret;
+	struct sockaddr_un addr;
+	char socket_path[BUF_SIZE];
+
+	if (telemetry == NULL)
+		return -1;
+
+	telemetry->server_fd = socket(AF_UNIX, SOCK_SEQPACKET, 0);
+	if (telemetry->server_fd == -1) {
+		TELEMETRY_LOG_ERR("Failed to open socket");
+		return -1;
+	}
+
+	ret  = rte_telemetry_set_socket_nonblock(telemetry->server_fd);
+	if (ret < 0) {
+		TELEMETRY_LOG_ERR("Could not set socket to NONBLOCK");
+		goto close_socket;
+	}
+
+	addr.sun_family = AF_UNIX;
+	rte_telemetry_get_runtime_dir(socket_path, sizeof(socket_path));
+	strlcpy(addr.sun_path, socket_path, sizeof(addr.sun_path));
+	unlink(socket_path);
+
+	if (bind(telemetry->server_fd, (struct sockaddr *)&addr,
+		sizeof(addr)) < 0) {
+		TELEMETRY_LOG_ERR("Socket binding error");
+		goto close_socket;
+	}
+
+	return 0;
+
+close_socket:
+	if (close(telemetry->server_fd) < 0) {
+		TELEMETRY_LOG_ERR("Close TELEMETRY socket failed");
+		return -EPERM;
+	}
+
+	return -1;
+}
+
+int32_t __rte_experimental
+rte_telemetry_init()
+{
+	int ret;
+	pthread_attr_t attr;
+	const char *telemetry_ctrl_thread = "telemetry";
+
+	if (static_telemetry) {
+		TELEMETRY_LOG_WARN("TELEMETRY structure already initialised");
+		return -EALREADY;
+	}
+
+	static_telemetry = calloc(1, sizeof(struct telemetry_impl));
+	if (static_telemetry == NULL) {
+		TELEMETRY_LOG_ERR("Memory could not be allocated");
+		return -ENOMEM;
+	}
+
+	static_telemetry->socket_id = rte_socket_id();
+	rte_metrics_init(static_telemetry->socket_id);
+
+	ret = pthread_attr_init(&attr);
+	if (ret != 0) {
+		TELEMETRY_LOG_ERR("Pthread attribute init failed");
+		return -EPERM;
+	}
+
+	ret = rte_telemetry_create_socket(static_telemetry);
+	if (ret < 0) {
+		ret = rte_telemetry_cleanup();
+		if (ret < 0)
+			TELEMETRY_LOG_ERR("TELEMETRY cleanup failed");
+		return -EPERM;
+	}
+	TAILQ_INIT(&static_telemetry->client_list_head);
+
+	ret = rte_ctrl_thread_create(&static_telemetry->thread_id,
+		telemetry_ctrl_thread, &attr, rte_telemetry_run_thread_func,
+		(void *)static_telemetry);
+	static_telemetry->thread_status = 1;
+
+	if (ret < 0) {
+		ret = rte_telemetry_cleanup();
+		if (ret < 0)
+			TELEMETRY_LOG_ERR("TELEMETRY cleanup failed");
+		return -EPERM;
+	}
+
+	return 0;
+}
+
+static int32_t
+rte_telemetry_client_cleanup(struct telemetry_client *client)
+{
+	int ret;
+
+	ret = close(client->fd);
+	free(client->file_path);
+	free(client);
+
+	if (ret < 0) {
+		TELEMETRY_LOG_ERR("Close client socket failed");
+		return -EPERM;
+	}
+
+	return 0;
+}
+
+int32_t __rte_experimental
+rte_telemetry_cleanup(void)
+{
+	int ret;
+	struct telemetry_impl *telemetry = static_telemetry;
+	telemetry_client *client, *temp_client;
+
+	TAILQ_FOREACH_SAFE(client, &telemetry->client_list_head, client_list,
+		temp_client) {
+		TAILQ_REMOVE(&telemetry->client_list_head, client, client_list);
+		ret = rte_telemetry_client_cleanup(client);
+		if (ret < 0) {
+			TELEMETRY_LOG_ERR("Client cleanup failed");
+			return -EPERM;
+		}
+	}
+
+	ret = close(telemetry->server_fd);
+	if (ret < 0) {
+		TELEMETRY_LOG_ERR("Close TELEMETRY socket failed");
+		free(telemetry);
+		return -EPERM;
+	}
+
+	telemetry->thread_status = 0;
+	pthread_join(telemetry->thread_id, NULL);
+	free(telemetry);
+	static_telemetry = NULL;
+
+	return 0;
+}
+
+int32_t
+rte_telemetry_unregister_client(struct telemetry_impl *telemetry,
+	const char *client_path)
+{
+	int ret;
+	telemetry_client *client, *temp_client;
+
+	if (telemetry == NULL) {
+		TELEMETRY_LOG_WARN("TELEMETRY is not initialised");
+		return -ENODEV;
+	}
+
+	if (client_path == NULL) {
+		TELEMETRY_LOG_ERR("Invalid client path");
+		goto einval_fail;
+	}
+
+	if (TAILQ_EMPTY(&telemetry->client_list_head)) {
+		TELEMETRY_LOG_ERR("There are no clients currently registered");
+		return -EPERM;
+	}
+
+	TAILQ_FOREACH_SAFE(client, &telemetry->client_list_head, client_list,
+			temp_client) {
+		if (strcmp(client_path, client->file_path) == 0) {
+			TAILQ_REMOVE(&telemetry->client_list_head, client,
+				client_list);
+			ret = rte_telemetry_client_cleanup(client);
+
+			if (ret < 0) {
+				TELEMETRY_LOG_ERR("Client cleanup failed");
+				return -EPERM;
+			}
+
+			return 0;
+		}
+	}
+
+	TELEMETRY_LOG_WARN("Couldn't find client, possibly not registered yet.");
+	return -1;
+
+einval_fail:
+	ret = rte_telemetry_send_error_response(telemetry, -EINVAL);
+	if (ret < 0)
+		TELEMETRY_LOG_ERR("Could not send error");
+	return -EINVAL;
+}
+
+int32_t
+rte_telemetry_register_client(struct telemetry_impl *telemetry,
+	const char *client_path)
+{
+	int ret, fd;
+	struct sockaddr_un addrs;
+
+	if (telemetry == NULL) {
+		TELEMETRY_LOG_ERR("Could not initialize TELEMETRY API");
+		return -ENODEV;
+	}
+
+	if (client_path == NULL) {
+		TELEMETRY_LOG_ERR("Invalid client path");
+		return -EINVAL;
+	}
+
+	telemetry_client *client;
+	TAILQ_FOREACH(client, &telemetry->client_list_head, client_list) {
+		if (strcmp(client_path, client->file_path) == 0) {
+			TELEMETRY_LOG_WARN("'%s' already registered",
+					client_path);
+			return -EINVAL;
+		}
+	}
+
+	fd = socket(AF_UNIX, SOCK_SEQPACKET, 0);
+	if (fd == -1) {
+		TELEMETRY_LOG_ERR("Client socket error");
+		return -EACCES;
+	}
+
+	ret = rte_telemetry_set_socket_nonblock(fd);
+	if (ret < 0) {
+		TELEMETRY_LOG_ERR("Could not set socket to NONBLOCK");
+		return -EPERM;
+	}
+
+	addrs.sun_family = AF_UNIX;
+	strlcpy(addrs.sun_path, client_path, sizeof(addrs.sun_path));
+	telemetry_client *new_client = malloc(sizeof(telemetry_client));
+	new_client->file_path = strdup(client_path);
+	new_client->fd = fd;
+
+	if (connect(fd, (struct sockaddr *)&addrs, sizeof(addrs)) == -1) {
+		TELEMETRY_LOG_ERR("TELEMETRY client connect to %s didn't work",
+				client_path);
+		ret = rte_telemetry_client_cleanup(new_client);
+		if (ret < 0) {
+			TELEMETRY_LOG_ERR("Client cleanup failed");
+			return -EPERM;
+		}
+		return -EINVAL;
+	}
+
+	TAILQ_INSERT_HEAD(&telemetry->client_list_head, new_client, client_list);
+
+	return 0;
+}
+
+int32_t
+rte_telemetry_parse_client_message(struct telemetry_impl *telemetry, char *buf)
+{
+	int ret, action_int;
+	json_error_t error;
+	json_t *root = json_loads(buf, 0, &error);
+
+	if (root == NULL) {
+		TELEMETRY_LOG_WARN("Could not load JSON object from data passed in : %s",
+				error.text);
+		goto fail;
+	} else if (!json_is_object(root)) {
+		TELEMETRY_LOG_WARN("JSON Request is not a JSON object");
+		goto fail;
+	}
+
+	json_t *action = json_object_get(root, "action");
+	if (action == NULL) {
+		TELEMETRY_LOG_WARN("Request does not have action field");
+		goto fail;
+	} else if (!json_is_integer(action)) {
+		TELEMETRY_LOG_WARN("Action value is not an integer");
+		goto fail;
+	}
+
+	json_t *command = json_object_get(root, "command");
+	if (command == NULL) {
+		TELEMETRY_LOG_WARN("Request does not have command field");
+		goto fail;
+	} else if (!json_is_string(command)) {
+		TELEMETRY_LOG_WARN("Command value is not a string");
+		goto fail;
+	}
+
+	action_int = json_integer_value(action);
+	if (action_int != ACTION_POST) {
+		TELEMETRY_LOG_WARN("Invalid action code");
+		goto fail;
+	}
+
+	if (strcmp(json_string_value(command), "clients") != 0) {
+		TELEMETRY_LOG_WARN("Invalid command");
+		goto fail;
+	}
+
+	json_t *data = json_object_get(root, "data");
+	if (data == NULL) {
+		TELEMETRY_LOG_WARN("Request does not have data field");
+		goto fail;
+	}
+
+	json_t *client_path = json_object_get(data, "client_path");
+	if (client_path == NULL) {
+		TELEMETRY_LOG_WARN("Request does not have client_path field");
+		goto fail;
+	}
+
+	if (!json_is_string(client_path)) {
+		TELEMETRY_LOG_WARN("Client_path value is not a string");
+		goto fail;
+	}
+
+	ret = rte_telemetry_register_client(telemetry,
+			json_string_value(client_path));
+	if (ret < 0) {
+		TELEMETRY_LOG_ERR("Could not register client");
+		telemetry->register_fail_count++;
+		goto fail;
+	}
+
+	return 0;
+
+fail:
+	TELEMETRY_LOG_WARN("Client attempted to register with invalid message");
+	json_decref(root);
+	return -1;
+}
+
+int32_t
+rte_telemetry_dummy_client_socket(const char *valid_client_path)
+{
+	int sockfd = socket(AF_UNIX, SOCK_SEQPACKET, 0);
+	struct sockaddr_un addr = {0};
+
+	if (sockfd < 0) {
+		TELEMETRY_LOG_ERR("Test socket creation failure");
+		return -1;
+	}
+
+	addr.sun_family = AF_UNIX;
+	strlcpy(addr.sun_path, valid_client_path, sizeof(addr.sun_path));
+	unlink(valid_client_path);
+
+	if (bind(sockfd, (struct sockaddr *)&addr, sizeof(addr)) < 0) {
+		TELEMETRY_LOG_ERR("Test socket binding failure");
+		return -1;
+	}
+
+	if (listen(sockfd, 1) < 0) {
+		TELEMETRY_LOG_ERR("Listen failure");
+		return -1;
+	}
+
+	return sockfd;
+}
+
+int32_t __rte_experimental
+rte_telemetry_selftest(void)
+{
+	const char *invalid_client_path = SELFTEST_INVALID_CLIENT;
+	const char *valid_client_path = SELFTEST_VALID_CLIENT;
+	int ret, sockfd;
+
+	TELEMETRY_LOG_INFO("Selftest");
+
+	ret = rte_telemetry_init();
+	if (ret < 0) {
+		TELEMETRY_LOG_ERR("Valid initialisation test failed");
+		return -1;
+	}
+
+	TELEMETRY_LOG_INFO("Success - Valid initialisation test passed");
+
+	ret = rte_telemetry_init();
+	if (ret != -EALREADY) {
+		TELEMETRY_LOG_ERR("Invalid initialisation test failed");
+		return -1;
+	}
+
+	TELEMETRY_LOG_INFO("Success - Invalid initialisation test passed");
+
+	ret = rte_telemetry_unregister_client(static_telemetry,
+			invalid_client_path);
+	if (ret != -EPERM) {
+		TELEMETRY_LOG_ERR("Invalid unregister test failed");
+		return -1;
+	}
+
+	TELEMETRY_LOG_INFO("Success - Invalid unregister test passed");
+
+	sockfd = rte_telemetry_dummy_client_socket(valid_client_path);
+	if (sockfd < 0) {
+		TELEMETRY_LOG_ERR("Test socket creation failed");
+		return -1;
+	}
+
+	ret = rte_telemetry_register_client(static_telemetry, valid_client_path);
+	if (ret != 0) {
+		TELEMETRY_LOG_ERR("Valid register test failed: %i", ret);
+		return -1;
+	}
+
+	accept(sockfd, NULL, NULL);
+	TELEMETRY_LOG_INFO("Success - Valid register test passed");
+
+	ret = rte_telemetry_register_client(static_telemetry, valid_client_path);
+	if (ret != -EINVAL) {
+		TELEMETRY_LOG_ERR("Invalid register test failed: %i", ret);
+		return -1;
+	}
+
+	TELEMETRY_LOG_INFO("Success - Invalid register test passed");
+
+	ret = rte_telemetry_unregister_client(static_telemetry,
+		invalid_client_path);
+	if (ret != -1) {
+		TELEMETRY_LOG_ERR("Invalid unregister test failed: %i", ret);
+		return -1;
+	}
+
+	TELEMETRY_LOG_INFO("Success - Invalid unregister test passed");
+
+	ret = rte_telemetry_unregister_client(static_telemetry, valid_client_path);
+	if (ret != 0) {
+		TELEMETRY_LOG_ERR("Valid unregister test failed: %i", ret);
+		return -1;
+	}
+
+	TELEMETRY_LOG_INFO("Success - Valid unregister test passed");
+
+	ret = rte_telemetry_cleanup();
+	if (ret < 0) {
+		TELEMETRY_LOG_ERR("Cleanup test failed");
+		return -1;
+	}
+
+	TELEMETRY_LOG_INFO("Success - Valid cleanup test passed");
+
+	return 0;
+}
+
+int32_t
+rte_telemetry_socket_messaging_testing(int index, int socket)
+{
+	struct telemetry_impl *telemetry = calloc(1, sizeof(telemetry_impl));
+	int fd, bad_send_fd, send_fd, bad_fd, bad_recv_fd, recv_fd, ret;
+
+	if (telemetry == NULL) {
+		TELEMETRY_LOG_ERR("Could not initialize Telemetry API");
+		return -1;
+	}
+
+	telemetry->server_fd = socket;
+	telemetry->reg_index = index;
+	TELEMETRY_LOG_INFO("Beginning Telemetry socket message Selftest");
+	rte_telemetry_socket_test_setup(telemetry, &send_fd, &recv_fd);
+	TELEMETRY_LOG_INFO("Register valid client test");
+
+	ret = rte_telemetry_socket_register_test(telemetry, &fd, send_fd,
+		recv_fd);
+	if (ret < 0) {
+		TELEMETRY_LOG_ERR("Register valid client test failed!");
+		free(telemetry);
+		return -1;
+	}
+
+	TELEMETRY_LOG_INFO("Success - Register valid client test passed!");
+
+	TELEMETRY_LOG_INFO("Register invalid/same client test");
+	ret = rte_telemetry_socket_test_setup(telemetry, &bad_send_fd,
+		&bad_recv_fd);
+	ret = rte_telemetry_socket_register_test(telemetry, &bad_fd,
+		bad_send_fd, bad_recv_fd);
+	if (!ret) {
+		TELEMETRY_LOG_ERR("Register invalid/same client test failed!");
+		free(telemetry);
+		return -1;
+	}
+
+	TELEMETRY_LOG_INFO("Success - Register invalid/same client test passed!");
+
+	ret = rte_telemetry_json_socket_message_test(telemetry, fd);
+	if (ret < 0) {
+		free(telemetry);
+		return -1;
+	}
+
+	free(telemetry);
+	return 0;
+}
+
+int32_t
+rte_telemetry_socket_register_test(struct telemetry_impl *telemetry, int *fd,
+	int send_fd, int recv_fd)
+{
+	int ret;
+	char good_req_string[BUF_SIZE];
+
+	snprintf(good_req_string, sizeof(good_req_string),
+	"{\"action\":1,\"command\":\"clients\",\"data\":{\"client_path\""
+		":\"%s\"}}", SOCKET_TEST_CLIENT_PATH);
+
+	listen(recv_fd, 1);
+
+	ret = send(send_fd, good_req_string, strlen(good_req_string), 0);
+	if (ret < 0) {
+		TELEMETRY_LOG_ERR("Could not send message over socket");
+		return -1;
+	}
+
+	rte_telemetry_run(telemetry);
+
+	if (telemetry->register_fail_count != 0)
+		return -1;
+
+	*fd = accept(recv_fd, NULL, NULL);
+
+	return 0;
+}
+
+int32_t
+rte_telemetry_socket_test_setup(struct telemetry_impl *telemetry, int *send_fd,
+	int *recv_fd)
+{
+	int ret;
+	const char *client_path = SOCKET_TEST_CLIENT_PATH;
+	char socket_path[BUF_SIZE];
+	struct sockaddr_un addr = {0};
+	struct sockaddr_un addrs = {0};
+	*send_fd = socket(AF_UNIX, SOCK_SEQPACKET, 0);
+	*recv_fd = socket(AF_UNIX, SOCK_SEQPACKET, 0);
+
+	listen(telemetry->server_fd, 5);
+	addr.sun_family = AF_UNIX;
+	rte_telemetry_get_runtime_dir(socket_path, sizeof(socket_path));
+	strlcpy(addr.sun_path, socket_path, sizeof(addr.sun_path));
+
+	ret = connect(*send_fd, (struct sockaddr *) &addr, sizeof(addr));
+	if (ret < 0) {
+		TELEMETRY_LOG_ERR("Could not connect socket");
+		return -1;
+	}
+
+	telemetry->accept_fd = accept(telemetry->server_fd, NULL, NULL);
+
+	addrs.sun_family = AF_UNIX;
+	strlcpy(addrs.sun_path, client_path, sizeof(addrs.sun_path));
+	unlink(client_path);
+
+	ret = bind(*recv_fd, (struct sockaddr *)&addrs, sizeof(addrs));
+	if (ret < 0) {
+		TELEMETRY_LOG_ERR("Could not bind socket");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int32_t
+rte_telemetry_stat_parse(char *buf, struct json_data *json_data_struct)
+{
+	json_error_t error;
+	json_t *root = json_loads(buf, 0, &error);
+	int arraylen, i;
+	json_t *status, *dataArray, *port, *stats, *name, *value, *dataArrayObj,
+	       *statsArrayObj;
+
+	stats = NULL;
+	port = NULL;
+	name = NULL;
+
+	if (buf == NULL) {
+		TELEMETRY_LOG_ERR("JSON message is NULL");
+		return -EINVAL;
+	}
+
+	if (root == NULL) {
+		TELEMETRY_LOG_ERR("Could not load JSON object from data passed in : %s",
+				error.text);
+		return -EPERM;
+	} else if (!json_is_object(root)) {
+		TELEMETRY_LOG_ERR("JSON Request is not a JSON object");
+		json_decref(root);
+		return -EINVAL;
+	}
+
+	status = json_object_get(root, "status_code");
+	if (!status) {
+		TELEMETRY_LOG_ERR("Request does not have status field");
+		return -EINVAL;
+	} else if (!json_is_string(status)) {
+		TELEMETRY_LOG_ERR("Status value is not a string");
+		return -EINVAL;
+	}
+
+	json_data_struct->status_code = strdup(json_string_value(status));
+
+	dataArray = json_object_get(root, "data");
+	if (dataArray == NULL) {
+		TELEMETRY_LOG_ERR("Request does not have data field");
+		return -EINVAL;
+	}
+
+	arraylen = json_array_size(dataArray);
+	if (arraylen == 0) {
+		json_data_struct->data = "null";
+		return -EINVAL;
+	}
+
+	for (i = 0; i < arraylen; i++) {
+		dataArrayObj = json_array_get(dataArray, i);
+		port = json_object_get(dataArrayObj, "port");
+		stats = json_object_get(dataArrayObj, "stats");
+	}
+
+	if (port == NULL) {
+		TELEMETRY_LOG_ERR("Request does not have port field");
+		return -EINVAL;
+	}
+
+	if (!json_is_integer(port)) {
+		TELEMETRY_LOG_ERR("Port value is not an integer");
+		return -EINVAL;
+	}
+
+	json_data_struct->port = json_integer_value(port);
+
+	if (stats == NULL) {
+		TELEMETRY_LOG_ERR("Request does not have stats field");
+		return -EINVAL;
+	}
+
+	arraylen = json_array_size(stats);
+	for (i = 0; i < arraylen; i++) {
+		statsArrayObj = json_array_get(stats, i);
+		name = json_object_get(statsArrayObj, "name");
+		value = json_object_get(statsArrayObj, "value");
+	}
+
+	if (name == NULL) {
+		TELEMETRY_LOG_ERR("Request does not have name field");
+		return -EINVAL;
+	}
+
+	if (!json_is_string(name)) {
+		TELEMETRY_LOG_ERR("Stat name value is not a string");
+		return -EINVAL;
+	}
+
+	json_data_struct->stat_name = strdup(json_string_value(name));
+
+	if (value == NULL) {
+		TELEMETRY_LOG_ERR("Request does not have value field");
+		return -EINVAL;
+	}
+
+	if (!json_is_integer(value)) {
+		TELEMETRY_LOG_ERR("Stat value is not an integer");
+		return -EINVAL;
+	}
+
+	json_data_struct->stat_value = json_integer_value(value);
+
+	return 0;
+}
+
+static void
+rte_telemetry_free_test_data(struct json_data *data)
+{
+	free(data->status_code);
+	free(data->stat_name);
+	free(data);
+}
+
+int32_t
+rte_telemetry_valid_json_test(struct telemetry_impl *telemetry, int fd)
+{
+	int ret;
+	int port = 0;
+	int value = 0;
+	int fail_count = 0;
+	int buffer_read = 0;
+	char buf[BUF_SIZE];
+	struct json_data *data_struct;
+	errno = 0;
+	const char *status = "Status OK: 200";
+	const char *name = "rx_good_packets";
+	const char *valid_json_message = "{\"action\":0,\"command\":"
+	"\"ports_stats_values_by_name\",\"data\":{\"ports\""
+	":[0],\"stats\":[\"rx_good_packets\"]}}";
+
+	ret = send(fd, valid_json_message, strlen(valid_json_message), 0);
+	if (ret < 0) {
+		TELEMETRY_LOG_ERR("Could not send message over socket");
+		return -1;
+	}
+
+	rte_telemetry_run(telemetry);
+	buffer_read = recv(fd, buf, BUF_SIZE-1, 0);
+
+	if (buffer_read == -1) {
+		TELEMETRY_LOG_ERR("Read error");
+		return -1;
+	}
+
+	buf[buffer_read] = '\0';
+	data_struct = calloc(1, sizeof(struct json_data));
+	ret = rte_telemetry_stat_parse(buf, data_struct);
+
+	if (ret < 0) {
+		TELEMETRY_LOG_ERR("Could not parse stats");
+		fail_count++;
+	}
+
+	if (strcmp(data_struct->status_code, status) != 0) {
+		TELEMETRY_LOG_ERR("Status code is invalid");
+		fail_count++;
+	}
+
+	if (data_struct->port != port) {
+		TELEMETRY_LOG_ERR("Port is invalid");
+		fail_count++;
+	}
+
+	if (strcmp(data_struct->stat_name, name) != 0) {
+		TELEMETRY_LOG_ERR("Stat name is invalid");
+		fail_count++;
+	}
+
+	if (data_struct->stat_value != value) {
+		TELEMETRY_LOG_ERR("Stat value is invalid");
+		fail_count++;
+	}
+
+	rte_telemetry_free_test_data(data_struct);
+	if (fail_count > 0)
+		return -1;
+
+	TELEMETRY_LOG_INFO("Success - Passed valid JSON message test passed");
+
+	return 0;
+}
+
+int32_t
+rte_telemetry_invalid_json_test(struct telemetry_impl *telemetry, int fd)
+{
+	int ret;
+	char buf[BUF_SIZE];
+	int fail_count = 0;
+	const char *invalid_json = "{]";
+	const char *status = "Status Error: Unknown";
+	const char *data = "null";
+	struct json_data *data_struct;
+	int buffer_read = 0;
+	errno = 0;
+
+	ret = send(fd, invalid_json, strlen(invalid_json), 0);
+	if (ret < 0) {
+		TELEMETRY_LOG_ERR("Could not send message over socket");
+		return -1;
+	}
+
+	rte_telemetry_run(telemetry);
+	buffer_read = recv(fd, buf, BUF_SIZE-1, 0);
+
+	if (buffer_read == -1) {
+		TELEMETRY_LOG_ERR("Read error");
+		return -1;
+	}
+
+	buf[buffer_read] = '\0';
+
+	data_struct = calloc(1, sizeof(struct json_data));
+	ret = rte_telemetry_stat_parse(buf, data_struct);
+
+	if (ret < 0)
+		TELEMETRY_LOG_ERR("Could not parse stats");
+
+	if (strcmp(data_struct->status_code, status) != 0) {
+		TELEMETRY_LOG_ERR("Status code is invalid");
+		fail_count++;
+	}
+
+	if (strcmp(data_struct->data, data) != 0) {
+		TELEMETRY_LOG_ERR("Data status is invalid");
+		fail_count++;
+	}
+
+	rte_telemetry_free_test_data(data_struct);
+	if (fail_count > 0)
+		return -1;
+
+	TELEMETRY_LOG_INFO("Success - Passed invalid JSON message test");
+
+	return 0;
+}
+
+int32_t
+rte_telemetry_json_contents_test(struct telemetry_impl *telemetry, int fd)
+{
+	int ret;
+	char buf[BUF_SIZE];
+	int fail_count = 0;
+	char *status = "Status Error: Invalid Argument 404";
+	char *data = "null";
+	struct json_data *data_struct;
+	const char *invalid_contents = "{\"action\":0,\"command\":"
+	"\"ports_stats_values_by_name\",\"data\":{\"ports\""
+	":[0],\"stats\":[\"some_invalid_param\","
+	"\"another_invalid_param\"]}}";
+	int buffer_read = 0;
+	errno = 0;
+
+	ret = send(fd, invalid_contents, strlen(invalid_contents), 0);
+	if (ret < 0) {
+		TELEMETRY_LOG_ERR("Could not send message over socket");
+		return -1;
+	}
+
+	rte_telemetry_run(telemetry);
+	buffer_read = recv(fd, buf, BUF_SIZE-1, 0);
+
+	if (buffer_read == -1) {
+		TELEMETRY_LOG_ERR("Read error");
+		return -1;
+	}
+
+	buf[buffer_read] = '\0';
+	data_struct = calloc(1, sizeof(struct json_data));
+	ret = rte_telemetry_stat_parse(buf, data_struct);
+
+	if (ret < 0)
+		TELEMETRY_LOG_ERR("Could not parse stats");
+
+	if (strcmp(data_struct->status_code, status) != 0) {
+		TELEMETRY_LOG_ERR("Status code is invalid");
+		fail_count++;
+	}
+
+	if (strcmp(data_struct->data, data) != 0) {
+		TELEMETRY_LOG_ERR("Data status is invalid");
+		fail_count++;
+	}
+
+	rte_telemetry_free_test_data(data_struct);
+	if (fail_count > 0)
+		return -1;
+
+	TELEMETRY_LOG_INFO("Success - Passed invalid JSON content test");
+
+	return 0;
+}
+
+int32_t
+rte_telemetry_json_empty_test(struct telemetry_impl *telemetry, int fd)
+{
+	int ret;
+	char buf[BUF_SIZE];
+	int fail_count = 0;
+	const char *status = "Status Error: Invalid Argument 404";
+	char *data = "null";
+	struct json_data *data_struct;
+	const char *empty_json  = "{}";
+	int buffer_read = 0;
+	errno = 0;
+
+	ret = (send(fd, empty_json, strlen(empty_json), 0));
+	if (ret < 0) {
+		TELEMETRY_LOG_ERR("Could not send message over socket");
+		return -1;
+	}
+
+	rte_telemetry_run(telemetry);
+	buffer_read = recv(fd, buf, BUF_SIZE-1, 0);
+
+	if (buffer_read == -1) {
+		TELEMETRY_LOG_ERR("Read error");
+		return -1;
+	}
+
+	buf[buffer_read] = '\0';
+	data_struct = calloc(1, sizeof(struct json_data));
+	ret = rte_telemetry_stat_parse(buf, data_struct);
+
+	if (ret < 0)
+		TELEMETRY_LOG_ERR("Could not parse stats");
+
+	if (strcmp(data_struct->status_code, status) != 0) {
+		TELEMETRY_LOG_ERR("Status code is invalid");
+		fail_count++;
+	}
+
+	if (strcmp(data_struct->data, data) != 0) {
+		TELEMETRY_LOG_ERR("Data status is invalid");
+		fail_count++;
+	}
+
+	rte_telemetry_free_test_data(data_struct);
+
+	if (fail_count > 0)
+		return -1;
+
+	TELEMETRY_LOG_INFO("Success - Passed JSON empty message test");
+
+	return 0;
+}
+
+int32_t
+rte_telemetry_json_socket_message_test(struct telemetry_impl *telemetry, int fd)
+{
+	uint16_t i;
+	int ret, fail_count;
+
+	fail_count = 0;
+	struct telemetry_message_test socket_json_tests[] = {
+		{.test_name = "Invalid JSON test",
+			.test_func_ptr = rte_telemetry_invalid_json_test},
+		{.test_name = "Valid JSON test",
+			.test_func_ptr = rte_telemetry_valid_json_test},
+		{.test_name = "JSON contents test",
+			.test_func_ptr = rte_telemetry_json_contents_test},
+		{.test_name = "JSON empty tests",
+			.test_func_ptr = rte_telemetry_json_empty_test}
+		};
+
+#define NUM_TESTS RTE_DIM(socket_json_tests)
+
+	for (i = 0; i < NUM_TESTS; i++) {
+		TELEMETRY_LOG_INFO("%s", socket_json_tests[i].test_name);
+		ret = (socket_json_tests[i].test_func_ptr)
+			(telemetry, fd);
+		if (ret < 0) {
+			TELEMETRY_LOG_ERR("%s failed",
+					socket_json_tests[i].test_name);
+			fail_count++;
+		}
+	}
+
+	if (fail_count > 0) {
+		TELEMETRY_LOG_ERR("Failed %i JSON socket message test(s)",
+				fail_count);
+		return -1;
+	}
+
+	TELEMETRY_LOG_INFO("Success - All JSON tests passed");
+
+	return 0;
+}
+
+int telemetry_log_level;
+
+static struct rte_option option = {
+	.opt_str = "--telemetry",
+	.cb = &rte_telemetry_init,
+	.enabled = 0
+};
+
+RTE_INIT(rte_telemetry_register)
+{
+	telemetry_log_level = rte_log_register("lib.telemetry");
+	if (telemetry_log_level >= 0)
+		rte_log_set_level(telemetry_log_level, RTE_LOG_ERR);
+
+	rte_option_register(&option);
+}
diff --git a/lib/librte_telemetry/rte_telemetry.h b/lib/librte_telemetry/rte_telemetry.h
new file mode 100644
index 00000000..119db16f
--- /dev/null
+++ b/lib/librte_telemetry/rte_telemetry.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <stdint.h>
+
+#ifndef _RTE_TELEMETRY_H_
+#define _RTE_TELEMETRY_H_
+
+/**
+ * @file
+ * RTE Telemetry
+ *
+ * The telemetry library provides a method to retrieve statistics from
+ * DPDK by sending a JSON encoded message over a socket. DPDK will send
+ * a JSON encoded response containing telemetry data.
+ ***/
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Initialize Telemetry
+ *
+ * @return
+ *  0 on successful initialisation.
+ * @return
+ *  -ENOMEM on memory allocation error
+ * @return
+ *  -EPERM on unknown error failure
+ * @return
+ *  -EALREADY if Telemetry is already initialised.
+ */
+int32_t __rte_experimental
+rte_telemetry_init(void);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Clean up and free memory.
+ *
+ * @return
+ *  0 on success
+ * @return
+ *  -EPERM on failure
+ */
+int32_t __rte_experimental
+rte_telemetry_cleanup(void);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Runs various tests to ensure telemetry initialisation and register/unregister
+ * functions are working correctly.
+ *
+ * @return
+ *  0 on success when all tests have passed
+ * @return
+ *  -1 on failure when the test has failed
+ */
+int32_t __rte_experimental
+rte_telemetry_selftest(void);
+
+#endif
diff --git a/lib/librte_telemetry/rte_telemetry_internal.h b/lib/librte_telemetry/rte_telemetry_internal.h
new file mode 100644
index 00000000..de7afda3
--- /dev/null
+++ b/lib/librte_telemetry/rte_telemetry_internal.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <rte_log.h>
+#include <rte_tailq.h>
+
+#ifndef _RTE_TELEMETRY_INTERNAL_H_
+#define _RTE_TELEMETRY_INTERNAL_H_
+
+/* Logging Macros */
+extern int telemetry_log_level;
+
+#define TELEMETRY_LOG(level, fmt, args...) \
+	rte_log(RTE_LOG_ ##level, telemetry_log_level, "%s(): "fmt "\n", \
+		__func__, ##args)
+
+#define TELEMETRY_LOG_ERR(fmt, args...) \
+	TELEMETRY_LOG(ERR, fmt, ## args)
+
+#define TELEMETRY_LOG_WARN(fmt, args...) \
+	TELEMETRY_LOG(WARNING, fmt, ## args)
+
+#define TELEMETRY_LOG_INFO(fmt, args...) \
+	TELEMETRY_LOG(INFO, fmt, ## args)
+
+typedef struct telemetry_client {
+	char *file_path;
+	int fd;
+	TAILQ_ENTRY(telemetry_client) client_list;
+} telemetry_client;
+
+typedef struct telemetry_impl {
+	int accept_fd;
+	int server_fd;
+	pthread_t thread_id;
+	int thread_status;
+	uint32_t socket_id;
+	int reg_index;
+	int metrics_register_done;
+	TAILQ_HEAD(, telemetry_client) client_list_head;
+	struct telemetry_client *request_client;
+	int register_fail_count;
+} telemetry_impl;
+
+enum rte_telemetry_parser_actions {
+	ACTION_GET = 0,
+	ACTION_DELETE = 2
+};
+
+int32_t
+rte_telemetry_parse_client_message(struct telemetry_impl *telemetry, char *buf);
+
+int32_t
+rte_telemetry_send_error_response(struct telemetry_impl *telemetry,
+	int error_type);
+
+int32_t
+rte_telemetry_register_client(struct telemetry_impl *telemetry,
+	const char *client_path);
+
+int32_t
+rte_telemetry_unregister_client(struct telemetry_impl *telemetry,
+	const char *client_path);
+
+/**
+ * This is a wrapper for the ethdev api rte_eth_find_next().
+ * If rte_eth_find_next() returns the same port id that we passed it,
+ * then we know that that port is active.
+ */
+int32_t
+rte_telemetry_is_port_active(int port_id);
+
+int32_t
+rte_telemetry_send_ports_stats_values(uint32_t *metric_ids, int num_metric_ids,
+	uint32_t *port_ids, int num_port_ids, struct telemetry_impl *telemetry);
+
+int32_t
+rte_telemetry_socket_messaging_testing(int index, int socket);
+
+#endif
diff --git a/lib/librte_telemetry/rte_telemetry_parser.c b/lib/librte_telemetry/rte_telemetry_parser.c
new file mode 100644
index 00000000..03a58a2f
--- /dev/null
+++ b/lib/librte_telemetry/rte_telemetry_parser.c
@@ -0,0 +1,586 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <errno.h>
+#include <jansson.h>
+
+#include <rte_metrics.h>
+#include <rte_common.h>
+#include <rte_ethdev.h>
+
+#include "rte_telemetry_internal.h"
+
+typedef int (*command_func)(struct telemetry_impl *, int, json_t *);
+
+struct rte_telemetry_command {
+	char *text;
+	command_func fn;
+} command;
+
+static int32_t
+rte_telemetry_command_clients(struct telemetry_impl *telemetry, int action,
+	json_t *data)
+{
+	int ret;
+
+	if (telemetry == NULL) {
+		TELEMETRY_LOG_ERR("Invalid telemetry argument");
+		return -1;
+	}
+
+	if (action != ACTION_DELETE) {
+		TELEMETRY_LOG_WARN("Invalid action for this command");
+		goto einval_fail;
+	}
+
+	if (!json_is_object(data)) {
+		TELEMETRY_LOG_WARN("Invalid data provided for this command");
+		goto einval_fail;
+	}
+
+	json_t *client_path = json_object_get(data, "client_path");
+	if (!json_is_string(client_path)) {
+		TELEMETRY_LOG_WARN("Command value is not a string");
+		goto einval_fail;
+	}
+
+	ret = rte_telemetry_unregister_client(telemetry,
+			json_string_value(client_path));
+	if (ret < 0) {
+		TELEMETRY_LOG_ERR("Could not unregister client");
+		goto einval_fail;
+	}
+
+	return 0;
+
+einval_fail:
+	ret = rte_telemetry_send_error_response(telemetry, -EINVAL);
+	if (ret < 0)
+		TELEMETRY_LOG_ERR("Could not send error");
+	return -1;
+}
+
+static int32_t
+rte_telemetry_command_ports(struct telemetry_impl *telemetry, int action,
+	json_t *data)
+{
+	int ret;
+
+	if (telemetry == NULL) {
+		TELEMETRY_LOG_ERR("Invalid telemetry argument");
+		return -1;
+	}
+
+	if (!json_is_null(data)) {
+		TELEMETRY_LOG_WARN("Data should be NULL JSON object for 'ports' command");
+		goto einval_fail;
+	}
+
+	if (action != ACTION_GET) {
+		TELEMETRY_LOG_WARN("Invalid action for this command");
+		goto einval_fail;
+	}
+
+	return 0;
+
+einval_fail:
+	ret = rte_telemetry_send_error_response(telemetry, -EINVAL);
+	if (ret < 0)
+		TELEMETRY_LOG_ERR("Could not send error");
+	return -1;
+}
+
+static int32_t
+rte_telemetry_command_ports_details(struct telemetry_impl *telemetry,
+	int action, json_t *data)
+{
+	json_t *value, *port_ids_json = json_object_get(data, "ports");
+	uint64_t num_port_ids = json_array_size(port_ids_json);
+	int ret, port_ids[num_port_ids];
+	RTE_SET_USED(port_ids);
+	size_t index;
+
+	if (telemetry == NULL) {
+		TELEMETRY_LOG_ERR("Invalid telemetry argument");
+		return -1;
+	}
+
+	if (action != ACTION_GET) {
+		TELEMETRY_LOG_WARN("Invalid action for this command");
+		goto einval_fail;
+	}
+
+	if (!json_is_object(data)) {
+		TELEMETRY_LOG_WARN("Invalid data provided for this command");
+		goto einval_fail;
+	}
+
+	if (!json_is_array(port_ids_json)) {
+		TELEMETRY_LOG_WARN("Invalid Port ID array");
+		goto einval_fail;
+	}
+
+	json_array_foreach(port_ids_json, index, value) {
+		if (!json_is_integer(value)) {
+			TELEMETRY_LOG_WARN("Port ID given is invalid");
+			goto einval_fail;
+		}
+		port_ids[index] = json_integer_value(value);
+	}
+
+	return 0;
+
+einval_fail:
+	ret = rte_telemetry_send_error_response(telemetry, -EINVAL);
+	if (ret < 0)
+		TELEMETRY_LOG_ERR("Could not send error");
+	return -1;
+}
+
+static int32_t
+rte_telemetry_command_port_stats(struct telemetry_impl *telemetry, int action,
+	json_t *data)
+{
+	int ret;
+
+	if (telemetry == NULL) {
+		TELEMETRY_LOG_ERR("Invalid telemetry argument");
+		return -1;
+	}
+
+	if (!json_is_null(data)) {
+		TELEMETRY_LOG_WARN("Data should be NULL JSON object for 'port_stats' command");
+		goto einval_fail;
+	}
+
+	if (action != ACTION_GET) {
+		TELEMETRY_LOG_WARN("Invalid action for this command");
+		goto einval_fail;
+	}
+
+	return 0;
+
+einval_fail:
+	ret = rte_telemetry_send_error_response(telemetry, -EINVAL);
+	if (ret < 0)
+		TELEMETRY_LOG_ERR("Could not send error");
+	return -1;
+}
+
+static int32_t
+rte_telemetry_stat_names_to_ids(struct telemetry_impl *telemetry,
+	const char * const *stat_names, uint32_t *stat_ids,
+	uint64_t num_stat_names)
+{
+	struct rte_metric_name *names;
+	int ret, num_metrics;
+	uint32_t i, k;
+
+	if (stat_names == NULL) {
+		TELEMETRY_LOG_WARN("Invalid stat_names argument");
+		goto einval_fail;
+	}
+
+	if (num_stat_names <= 0) {
+		TELEMETRY_LOG_WARN("Invalid num_stat_names argument");
+		goto einval_fail;
+	}
+
+	num_metrics = rte_metrics_get_names(NULL, 0);
+	if (num_metrics < 0) {
+		TELEMETRY_LOG_ERR("Cannot get metrics count");
+		goto eperm_fail;
+	} else if (num_metrics == 0) {
+		TELEMETRY_LOG_WARN("No metrics have been registered");
+		goto eperm_fail;
+	}
+
+	names = malloc(sizeof(struct rte_metric_name) * num_metrics);
+	if (names == NULL) {
+		TELEMETRY_LOG_ERR("Cannot allocate memory for names");
+
+		ret = rte_telemetry_send_error_response(telemetry, -ENOMEM);
+		if (ret < 0)
+			TELEMETRY_LOG_ERR("Could not send error");
+
+		return -1;
+	}
+
+	ret = rte_metrics_get_names(names, num_metrics);
+	if (ret < 0 || ret > num_metrics) {
+		TELEMETRY_LOG_ERR("Cannot get metrics names");
+		free(names);
+		goto eperm_fail;
+	}
+
+	k = 0;
+	for (i = 0; i < (uint32_t)num_stat_names; i++) {
+		uint32_t j;
+		for (j = 0; j < (uint32_t)num_metrics; j++) {
+			if (strcmp(stat_names[i], names[j].name) == 0) {
+				stat_ids[k] = j;
+				k++;
+				break;
+			}
+		}
+	}
+
+	if (k != num_stat_names) {
+		TELEMETRY_LOG_WARN("Invalid stat names provided");
+		free(names);
+		goto einval_fail;
+	}
+
+	free(names);
+	return 0;
+
+einval_fail:
+	ret = rte_telemetry_send_error_response(telemetry, -EINVAL);
+	if (ret < 0)
+		TELEMETRY_LOG_ERR("Could not send error");
+	return -1;
+
+eperm_fail:
+	ret = rte_telemetry_send_error_response(telemetry, -EPERM);
+	if (ret < 0)
+		TELEMETRY_LOG_ERR("Could not send error");
+	return -1;
+}
+
+int32_t
+rte_telemetry_command_ports_all_stat_values(struct telemetry_impl *telemetry,
+	 int action, json_t *data)
+{
+	int ret, num_metrics, i, p;
+	struct rte_metric_name *names;
+	uint64_t num_port_ids = 0;
+	uint32_t port_ids[RTE_MAX_ETHPORTS];
+
+	if (telemetry == NULL) {
+		TELEMETRY_LOG_ERR("Invalid telemetry argument");
+		return -1;
+	}
+
+	if (action != ACTION_GET) {
+		TELEMETRY_LOG_WARN("Invalid action for this command");
+		ret = rte_telemetry_send_error_response(telemetry, -EINVAL);
+		if (ret < 0)
+			TELEMETRY_LOG_ERR("Could not send error");
+		return -1;
+	}
+
+	if (json_is_object(data)) {
+		TELEMETRY_LOG_WARN("Invalid data provided for this command");
+		ret = rte_telemetry_send_error_response(telemetry, -EINVAL);
+		if (ret < 0)
+			TELEMETRY_LOG_ERR("Could not send error");
+		return -1;
+	}
+
+	num_metrics = rte_metrics_get_names(NULL, 0);
+	if (num_metrics < 0) {
+		TELEMETRY_LOG_ERR("Cannot get metrics count");
+
+		ret = rte_telemetry_send_error_response(telemetry, -EINVAL);
+		if (ret < 0)
+			TELEMETRY_LOG_ERR("Could not send error");
+
+		return -1;
+	} else if (num_metrics == 0) {
+		TELEMETRY_LOG_ERR("No metrics to display (none have been registered)");
+
+		ret = rte_telemetry_send_error_response(telemetry, -EPERM);
+		if (ret < 0)
+			TELEMETRY_LOG_ERR("Could not send error");
+
+		return -1;
+	}
+
+	names = malloc(sizeof(struct rte_metric_name) * num_metrics);
+	if (names == NULL) {
+		TELEMETRY_LOG_ERR("Cannot allocate memory");
+		ret = rte_telemetry_send_error_response(telemetry,
+			 -ENOMEM);
+		if (ret < 0)
+			TELEMETRY_LOG_ERR("Could not send error");
+		return -1;
+	}
+
+	const char *stat_names[num_metrics];
+	uint32_t stat_ids[num_metrics];
+
+	RTE_ETH_FOREACH_DEV(p) {
+		port_ids[num_port_ids] = p;
+		num_port_ids++;
+	}
+
+	if (!num_port_ids) {
+		TELEMETRY_LOG_WARN("No active ports");
+
+		ret = rte_telemetry_send_error_response(telemetry, -EINVAL);
+		if (ret < 0)
+			TELEMETRY_LOG_ERR("Could not send error");
+
+		goto fail;
+	}
+
+	ret = rte_metrics_get_names(names, num_metrics);
+	for (i = 0; i < num_metrics; i++)
+		stat_names[i] = names[i].name;
+
+	ret = rte_telemetry_stat_names_to_ids(telemetry, stat_names, stat_ids,
+		num_metrics);
+	if (ret < 0) {
+		TELEMETRY_LOG_ERR("Could not convert stat names to IDs");
+		goto fail;
+	}
+
+	ret = rte_telemetry_send_ports_stats_values(stat_ids, num_metrics,
+		port_ids, num_port_ids, telemetry);
+	if (ret < 0) {
+		TELEMETRY_LOG_ERR("Sending ports stats values failed");
+		goto fail;
+	}
+
+	return 0;
+
+fail:
+	free(names);
+	return -1;
+}
+
+int32_t
+rte_telemetry_command_ports_stats_values_by_name(struct telemetry_impl
+	*telemetry, int action, json_t *data)
+{
+	int ret;
+	json_t *port_ids_json = json_object_get(data, "ports");
+	json_t *stat_names_json = json_object_get(data, "stats");
+	uint64_t num_port_ids = json_array_size(port_ids_json);
+	uint64_t num_stat_names = json_array_size(stat_names_json);
+	const char *stat_names[num_stat_names];
+	uint32_t port_ids[num_port_ids], stat_ids[num_stat_names];
+	size_t index;
+	json_t *value;
+
+	if (telemetry == NULL) {
+		TELEMETRY_LOG_ERR("Invalid telemetry argument");
+		return -1;
+	}
+
+	if (action != ACTION_GET) {
+		TELEMETRY_LOG_WARN("Invalid action for this command");
+		ret = rte_telemetry_send_error_response(telemetry, -EINVAL);
+		if (ret < 0)
+			TELEMETRY_LOG_ERR("Could not send error");
+		return -1;
+	}
+
+	if (!json_is_object(data)) {
+		TELEMETRY_LOG_WARN("Invalid data provided for this command");
+		ret = rte_telemetry_send_error_response(telemetry, -EINVAL);
+		if (ret < 0)
+			TELEMETRY_LOG_ERR("Could not send error");
+		return -1;
+	}
+
+	if (!json_is_array(port_ids_json) ||
+		 !json_is_array(stat_names_json)) {
+		TELEMETRY_LOG_WARN("Invalid input data array(s)");
+		ret = rte_telemetry_send_error_response(telemetry, -EINVAL);
+		if (ret < 0)
+			TELEMETRY_LOG_ERR("Could not send error");
+		return -1;
+	}
+
+	json_array_foreach(port_ids_json, index, value) {
+		if (!json_is_integer(value)) {
+			TELEMETRY_LOG_WARN("Port ID given is not valid");
+			ret = rte_telemetry_send_error_response(telemetry,
+				-EINVAL);
+			if (ret < 0)
+				TELEMETRY_LOG_ERR("Could not send error");
+			return -1;
+		}
+		port_ids[index] = json_integer_value(value);
+		ret = rte_telemetry_is_port_active(port_ids[index]);
+		if (ret < 1) {
+			ret = rte_telemetry_send_error_response(telemetry,
+				-EINVAL);
+			if (ret < 0)
+				TELEMETRY_LOG_ERR("Could not send error");
+			return -1;
+		}
+	}
+
+	json_array_foreach(stat_names_json, index, value) {
+		if (!json_is_string(value)) {
+			TELEMETRY_LOG_WARN("Stat Name given is not a string");
+
+			ret = rte_telemetry_send_error_response(telemetry,
+					-EINVAL);
+			if (ret < 0)
+				TELEMETRY_LOG_ERR("Could not send error");
+
+			return -1;
+		}
+		stat_names[index] = json_string_value(value);
+	}
+
+	ret = rte_telemetry_stat_names_to_ids(telemetry, stat_names, stat_ids,
+		num_stat_names);
+	if (ret < 0) {
+		TELEMETRY_LOG_ERR("Could not convert stat names to IDs");
+		return -1;
+	}
+
+	ret = rte_telemetry_send_ports_stats_values(stat_ids, num_stat_names,
+		port_ids, num_port_ids, telemetry);
+	if (ret < 0) {
+		TELEMETRY_LOG_ERR("Sending ports stats values failed");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int32_t
+rte_telemetry_parse_command(struct telemetry_impl *telemetry, int action,
+	const char *command, json_t *data)
+{
+	int ret;
+	uint32_t i;
+
+	if (telemetry == NULL) {
+		TELEMETRY_LOG_ERR("Invalid telemetry argument");
+		return -1;
+	}
+
+	struct rte_telemetry_command commands[] = {
+		{
+			.text = "clients",
+			.fn = &rte_telemetry_command_clients
+		},
+		{
+			.text = "ports",
+			.fn = &rte_telemetry_command_ports
+		},
+		{
+			.text = "ports_details",
+			.fn = &rte_telemetry_command_ports_details
+		},
+		{
+			.text = "port_stats",
+			.fn = &rte_telemetry_command_port_stats
+		},
+		{
+			.text = "ports_stats_values_by_name",
+			.fn = &rte_telemetry_command_ports_stats_values_by_name
+		},
+		{
+			.text = "ports_all_stat_values",
+			.fn = &rte_telemetry_command_ports_all_stat_values
+		}
+	};
+
+	const uint32_t num_commands = RTE_DIM(commands);
+
+	for (i = 0; i < num_commands; i++) {
+		if (strcmp(command, commands[i].text) == 0) {
+			ret = commands[i].fn(telemetry, action, data);
+			if (ret < 0) {
+				TELEMETRY_LOG_ERR("Command Function for %s failed",
+					commands[i].text);
+				return -1;
+			}
+			return 0;
+		}
+	}
+
+	TELEMETRY_LOG_WARN("\"%s\" command not found", command);
+
+	ret = rte_telemetry_send_error_response(telemetry, -EINVAL);
+	if (ret < 0)
+		TELEMETRY_LOG_ERR("Could not send error");
+
+	return -1;
+}
+
+int32_t __rte_experimental
+rte_telemetry_parse(struct telemetry_impl *telemetry, char *socket_rx_data)
+{
+	int ret, action_int;
+	json_error_t error;
+	json_t *root, *action, *command, *data;
+
+	if (telemetry == NULL) {
+		TELEMETRY_LOG_ERR("Invalid telemetry argument");
+		return -1;
+	}
+
+	root = json_loads(socket_rx_data, 0, &error);
+	if (root == NULL) {
+		TELEMETRY_LOG_WARN("Could not load JSON object from data passed in : %s",
+				error.text);
+		ret = rte_telemetry_send_error_response(telemetry, -EPERM);
+		if (ret < 0)
+			TELEMETRY_LOG_ERR("Could not send error");
+		return -EPERM;
+	} else if (!json_is_object(root)) {
+		TELEMETRY_LOG_WARN("JSON Request is not a JSON object");
+		json_decref(root);
+		goto einval_fail;
+	}
+
+	action = json_object_get(root, "action");
+	if (action == NULL) {
+		TELEMETRY_LOG_WARN("Request does not have action field");
+		goto einval_fail;
+	} else if (!json_is_integer(action)) {
+		TELEMETRY_LOG_WARN("Action value is not an integer");
+		goto einval_fail;
+	}
+
+	command = json_object_get(root, "command");
+	if (command == NULL) {
+		TELEMETRY_LOG_WARN("Request does not have command field");
+		goto einval_fail;
+	} else if (!json_is_string(command)) {
+		TELEMETRY_LOG_WARN("Command value is not a string");
+		goto einval_fail;
+	}
+
+	action_int = json_integer_value(action);
+	if (action_int != ACTION_GET && action_int != ACTION_DELETE) {
+		TELEMETRY_LOG_WARN("Invalid action code");
+		goto einval_fail;
+	}
+
+	const char *command_string = json_string_value(command);
+	data = json_object_get(root, "data");
+	if (data == NULL) {
+		TELEMETRY_LOG_WARN("Request does not have data field");
+		goto einval_fail;
+	}
+
+	ret = rte_telemetry_parse_command(telemetry, action_int, command_string,
+		data);
+	if (ret < 0) {
+		TELEMETRY_LOG_WARN("Could not parse command");
+		return -EINVAL;
+	}
+
+	return 0;
+
+einval_fail:
+	ret = rte_telemetry_send_error_response(telemetry, -EINVAL);
+	if (ret < 0) {
+		TELEMETRY_LOG_ERR("Could not send error");
+		return -EPERM;
+	}
+	return -EINVAL;
+}
diff --git a/lib/librte_telemetry/rte_telemetry_parser.h b/lib/librte_telemetry/rte_telemetry_parser.h
new file mode 100644
index 00000000..b7051945
--- /dev/null
+++ b/lib/librte_telemetry/rte_telemetry_parser.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include "rte_telemetry_internal.h"
+#include "rte_compat.h"
+
+#ifndef _RTE_TELEMETRY_PARSER_H_
+#define _RTE_TELEMETRY_PARSER_H_
+
+int32_t __rte_experimental
+rte_telemetry_parse(struct telemetry_impl *telemetry, char *socket_rx_data);
+
+#endif
diff --git a/lib/librte_telemetry/rte_telemetry_parser_test.c b/lib/librte_telemetry/rte_telemetry_parser_test.c
new file mode 100644
index 00000000..5fe93fa6
--- /dev/null
+++ b/lib/librte_telemetry/rte_telemetry_parser_test.c
@@ -0,0 +1,534 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <errno.h>
+#include <jansson.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <unistd.h>
+
+#include <rte_common.h>
+#include <rte_tailq.h>
+#include <rte_string_fns.h>
+
+#include "rte_telemetry_parser.h"
+
+enum choices {
+	INV_ACTION_VAL,
+	INV_COMMAND_VAL,
+	INV_DATA_VAL,
+	INV_ACTION_FIELD,
+	INV_COMMAND_FIELD,
+	INV_DATA_FIELD,
+	INV_JSON_FORMAT,
+	VALID_REQ
+};
+
+
+#define TEST_CLIENT "/var/run/dpdk/test_client"
+
+int32_t
+rte_telemetry_create_test_socket(struct telemetry_impl *telemetry,
+	const char *test_client_path)
+{
+	int ret, sockfd;
+	struct sockaddr_un addr = {0};
+	struct telemetry_client *client;
+
+	if (telemetry == NULL) {
+		TELEMETRY_LOG_ERR("Telemetry argument has not been initialised");
+		return -EINVAL;
+	}
+
+	sockfd = socket(AF_UNIX, SOCK_SEQPACKET, 0);
+	if (sockfd < 0) {
+		TELEMETRY_LOG_ERR("Test socket creation failure");
+		return -1;
+	}
+
+	addr.sun_family = AF_UNIX;
+	strlcpy(addr.sun_path, test_client_path, sizeof(addr.sun_path));
+	unlink(test_client_path);
+
+	if (bind(sockfd, (struct sockaddr *)&addr, sizeof(addr)) < 0) {
+		TELEMETRY_LOG_ERR("Test socket binding failure");
+		return -1;
+	}
+
+	if (listen(sockfd, 1) < 0) {
+		TELEMETRY_LOG_ERR("Listen failure");
+		return -1;
+	}
+
+	ret = rte_telemetry_register_client(telemetry, test_client_path);
+	if (ret < 0) {
+		TELEMETRY_LOG_ERR("Register dummy client failed: %i", ret);
+		return -1;
+	}
+
+	ret = accept(sockfd, NULL, NULL);
+	if (ret < 0) {
+		TELEMETRY_LOG_ERR("Socket accept failed");
+		return -1;
+	}
+
+	TAILQ_FOREACH(client, &telemetry->client_list_head, client_list)
+		telemetry->request_client = client;
+
+	return 0;
+}
+
+int32_t
+rte_telemetry_format_port_stat_ids(int *port_ids, int num_port_ids,
+	const char * const *stat_names, int num_stat_names, json_t **data)
+{
+
+	int ret;
+	json_t *stat_names_json_array = NULL;
+	json_t *port_ids_json_array = NULL;
+	uint32_t i;
+
+	if (num_port_ids < 0) {
+		TELEMETRY_LOG_ERR("Port Ids Count invalid");
+		goto fail;
+	}
+
+	*data = json_object();
+	if (*data == NULL) {
+		TELEMETRY_LOG_ERR("Data json object creation failed");
+		goto fail;
+	}
+
+	port_ids_json_array = json_array();
+	if (port_ids_json_array == NULL) {
+		TELEMETRY_LOG_ERR("port_ids_json_array creation failed");
+		goto fail;
+	}
+
+	for (i = 0; i < (uint32_t)num_port_ids; i++) {
+		ret = json_array_append(port_ids_json_array,
+				json_integer(port_ids[i]));
+		if (ret < 0) {
+			TELEMETRY_LOG_ERR("JSON array creation failed");
+			goto fail;
+		}
+	}
+
+	ret = json_object_set_new(*data, "ports", port_ids_json_array);
+	if (ret < 0) {
+		TELEMETRY_LOG_ERR("Setting 'ports' value in data object failed");
+		goto fail;
+	}
+
+	if (stat_names) {
+		if (num_stat_names < 0) {
+			TELEMETRY_LOG_ERR("Stat Names Count invalid");
+			goto fail;
+		}
+
+		stat_names_json_array = json_array();
+		if (stat_names_json_array == NULL) {
+			TELEMETRY_LOG_ERR("stat_names_json_array creation failed");
+			goto fail;
+		}
+
+		uint32_t i;
+		for (i = 0; i < (uint32_t)num_stat_names; i++) {
+			ret = json_array_append(stat_names_json_array,
+				 json_string(stat_names[i]));
+			if (ret < 0) {
+				TELEMETRY_LOG_ERR("JSON array creation failed");
+				goto fail;
+			}
+		}
+
+		ret = json_object_set_new(*data, "stats", stat_names_json_array);
+		if (ret < 0) {
+			TELEMETRY_LOG_ERR("Setting 'stats' value in data object failed");
+			goto fail;
+		}
+	}
+
+	return 0;
+
+fail:
+	if (*data)
+		json_decref(*data);
+	if (stat_names_json_array)
+		json_decref(stat_names_json_array);
+	if (port_ids_json_array)
+		json_decref(port_ids_json_array);
+	return -1;
+}
+
+int32_t
+rte_telemetry_create_json_request(int action, char *command,
+	const char *client_path, int *port_ids, int num_port_ids,
+	const char * const *stat_names, int num_stat_names, char **request,
+	int inv_choice)
+{
+	int ret;
+	json_t *root = json_object();
+	json_t *data;
+
+	if (root == NULL) {
+		TELEMETRY_LOG_ERR("Could not create root json object");
+		goto fail;
+	}
+
+	if (inv_choice == INV_ACTION_FIELD) {
+		ret = json_object_set_new(root, "ac--on", json_integer(action));
+		if (ret < 0) {
+			TELEMETRY_LOG_ERR("Setting invalid action field in root object failed");
+			goto fail;
+		}
+	} else {
+		ret = json_object_set_new(root, "action", json_integer(action));
+		if (ret < 0) {
+			TELEMETRY_LOG_ERR("Setting valid action field in root object failed");
+			goto fail;
+		}
+	}
+
+	if (inv_choice == INV_COMMAND_FIELD) {
+		ret = json_object_set_new(root, "co---nd", json_string(command));
+		if (ret < 0) {
+			TELEMETRY_LOG_ERR("Setting invalid command field in root object failed");
+			goto fail;
+		}
+	} else {
+		ret = json_object_set_new(root, "command", json_string(command));
+		if (ret < 0) {
+			TELEMETRY_LOG_ERR("Setting valid command field in root object failed");
+			goto fail;
+		}
+	}
+
+	data = json_null();
+	if (client_path) {
+		data = json_object();
+		if (data == NULL) {
+			TELEMETRY_LOG_ERR("Data json object creation failed");
+			goto fail;
+		}
+
+		ret = json_object_set_new(data, "client_path",
+				json_string(client_path));
+		if (ret < 0) {
+			TELEMETRY_LOG_ERR("Setting valid client_path field in data object failed");
+			goto fail;
+		}
+
+	} else if (port_ids) {
+		ret = rte_telemetry_format_port_stat_ids(port_ids, num_port_ids,
+				stat_names, num_stat_names, &data);
+		if (ret < 0) {
+			TELEMETRY_LOG_ERR("Formatting Port/Stat arrays failed");
+			goto fail;
+		}
+
+	}
+
+	if (inv_choice == INV_DATA_FIELD) {
+		ret = json_object_set_new(root, "d--a", data);
+		if (ret < 0) {
+			TELEMETRY_LOG_ERR("Setting invalid data field in data object failed");
+			goto fail;
+		}
+	} else {
+		ret = json_object_set_new(root, "data", data);
+		if (ret < 0) {
+			TELEMETRY_LOG_ERR("Setting valid data field in data object failed");
+			goto fail;
+		}
+	}
+
+	*request = json_dumps(root, 0);
+	if (*request == NULL) {
+		TELEMETRY_LOG_ERR("Converting JSON root object to char* failed");
+		goto fail;
+	}
+
+	json_decref(root);
+	return 0;
+
+fail:
+	if (root)
+		json_decref(root);
+	return -1;
+}
+
+int32_t
+rte_telemetry_send_get_ports_and_stats_request(struct telemetry_impl *telemetry,
+	int action_choice, char *command_choice, int inv_choice)
+{
+	int ret;
+	char *request;
+	char *client_path_data = NULL;
+
+	if (telemetry == NULL) {
+		TELEMETRY_LOG_ERR("Telemetry argument has not been initialised");
+		return -EINVAL;
+	}
+
+
+	if (inv_choice == INV_ACTION_VAL)
+		action_choice = -1;
+	else if (inv_choice == INV_COMMAND_VAL)
+		command_choice = "INVALID_COMMAND";
+	else if (inv_choice == INV_DATA_VAL)
+		client_path_data = "INVALID_DATA";
+
+	ret = rte_telemetry_create_json_request(action_choice, command_choice,
+		client_path_data, NULL, -1, NULL, -1, &request, inv_choice);
+	if (ret < 0) {
+		TELEMETRY_LOG_ERR("Could not create JSON Request");
+		return -1;
+	}
+
+	if (inv_choice == INV_JSON_FORMAT)
+		request++;
+
+	ret = rte_telemetry_parse(telemetry, request);
+	if (ret < 0) {
+		TELEMETRY_LOG_WARN("Could not parse JSON Request");
+		return -1;
+	}
+
+	return 0;
+}
+
+int32_t
+rte_telemetry_send_get_ports_details_request(struct telemetry_impl *telemetry,
+	int action_choice, int *port_ids, int num_port_ids, int inv_choice)
+{
+	int ret;
+	char *request;
+	if (telemetry == NULL) {
+		TELEMETRY_LOG_ERR("Telemetry argument has not been initialised");
+		return -EINVAL;
+	}
+
+	char *command = "ports_details";
+
+	if (inv_choice == INV_ACTION_VAL)
+		action_choice = -1;
+	else if (inv_choice == INV_COMMAND_VAL)
+		command = "INVALID_COMMAND";
+	else if (inv_choice == INV_DATA_VAL)
+		port_ids = NULL;
+
+
+	ret = rte_telemetry_create_json_request(action_choice, command, NULL,
+		port_ids, num_port_ids, NULL, -1, &request, inv_choice);
+	if (ret < 0) {
+		TELEMETRY_LOG_ERR("Could not create JSON Request");
+		return -1;
+	}
+
+	if (inv_choice == INV_JSON_FORMAT)
+		request++;
+
+	ret = rte_telemetry_parse(telemetry, request);
+	if (ret < 0) {
+		TELEMETRY_LOG_WARN("Could not parse JSON Request");
+		return -1;
+	}
+
+	return 0;
+}
+
+int32_t
+rte_telemetry_send_stats_values_by_name_request(struct telemetry_impl
+	*telemetry, int action_choice, int *port_ids, int num_port_ids,
+	const char * const *stat_names, int num_stat_names,
+	int inv_choice)
+{
+	int ret;
+	char *request;
+	char *command = "ports_stats_values_by_name";
+
+	if (telemetry == NULL) {
+		TELEMETRY_LOG_ERR("Telemetry argument has not been initialised");
+		return -EINVAL;
+	}
+
+	if (inv_choice == INV_ACTION_VAL)
+		action_choice = -1;
+	else if (inv_choice == INV_COMMAND_VAL)
+		command = "INVALID_COMMAND";
+	else if (inv_choice == INV_DATA_VAL) {
+		port_ids = NULL;
+		stat_names = NULL;
+	}
+
+	ret = rte_telemetry_create_json_request(action_choice, command, NULL,
+		port_ids, num_port_ids, stat_names, num_stat_names, &request,
+		inv_choice);
+	if (ret < 0) {
+		TELEMETRY_LOG_ERR("Could not create JSON Request");
+		return -1;
+	}
+
+	if (inv_choice == INV_JSON_FORMAT)
+		request++;
+
+	ret = rte_telemetry_parse(telemetry, request);
+	if (ret < 0) {
+		TELEMETRY_LOG_WARN("Could not parse JSON Request");
+		return -1;
+	}
+
+	return 0;
+}
+
+int32_t
+rte_telemetry_send_unreg_request(struct telemetry_impl *telemetry,
+	int action_choice, const char *client_path, int inv_choice)
+{
+	int ret;
+	char *request;
+
+	if (telemetry == NULL) {
+		TELEMETRY_LOG_ERR("Telemetry argument has not been initialised");
+		return -EINVAL;
+	}
+
+	char *command = "clients";
+
+	if (inv_choice == INV_ACTION_VAL)
+		action_choice = -1;
+	else if (inv_choice == INV_COMMAND_VAL)
+		command = "INVALID_COMMAND";
+	else if (inv_choice == INV_DATA_VAL)
+		client_path = NULL;
+
+	ret = rte_telemetry_create_json_request(action_choice, command,
+		client_path, NULL, -1, NULL, -1, &request, inv_choice);
+	if (ret < 0) {
+		TELEMETRY_LOG_ERR("Could not create JSON Request");
+		return -1;
+	}
+
+	if (inv_choice == INV_JSON_FORMAT)
+		request++;
+
+	ret = rte_telemetry_parse(telemetry, request);
+	if (ret < 0) {
+		TELEMETRY_LOG_WARN("Could not parse JSON Request");
+		return -1;
+	}
+
+	return 0;
+}
+
+int32_t
+rte_telemetry_parser_test(struct telemetry_impl *telemetry)
+{
+	int ret;
+	const char *client_path = TEST_CLIENT;
+
+	if (telemetry == NULL) {
+		TELEMETRY_LOG_ERR("Telemetry argument has not been initialised");
+		return -EINVAL;
+	}
+
+	ret = rte_telemetry_create_test_socket(telemetry, client_path);
+	if (ret < 0) {
+		TELEMETRY_LOG_ERR("Could not create test request client socket");
+		return -1;
+	}
+
+	int port_ids[] = {0, 1};
+	int num_port_ids = RTE_DIM(port_ids);
+
+	static const char * const stat_names[] = {"tx_good_packets",
+		"rx_good_packets"};
+	int num_stat_names = RTE_DIM(stat_names);
+
+	static const char * const test_types[] = {
+		"INVALID ACTION VALUE TESTS",
+		"INVALID COMMAND VALUE TESTS",
+		"INVALID DATA VALUE TESTS",
+		"INVALID ACTION FIELD TESTS",
+		"INVALID COMMAND FIELD TESTS",
+		"INVALID DATA FIELD TESTS",
+		"INVALID JSON FORMAT TESTS",
+		"VALID TESTS"
+	};
+
+
+#define NUM_TEST_TYPES (sizeof(test_types)/sizeof(const char * const))
+
+	uint32_t i;
+	for (i = 0; i < NUM_TEST_TYPES; i++) {
+		TELEMETRY_LOG_INFO("%s", test_types[i]);
+
+		ret = rte_telemetry_send_get_ports_and_stats_request(telemetry,
+			ACTION_GET, "ports", i);
+		if (ret != 0 && i == VALID_REQ) {
+			TELEMETRY_LOG_ERR("Get ports valid test failed");
+			return -EPERM;
+		} else if (ret != -1 && i != VALID_REQ) {
+			TELEMETRY_LOG_ERR("Get ports invalid test failed");
+			return -EPERM;
+		}
+
+		TELEMETRY_LOG_INFO("Success - Get ports test passed");
+
+		ret = rte_telemetry_send_get_ports_details_request(telemetry,
+			ACTION_GET, port_ids, num_port_ids, i);
+		if (ret != 0 && i == VALID_REQ) {
+			TELEMETRY_LOG_ERR("Get ports details valid");
+			return -EPERM;
+		} else if (ret != -1 && i != VALID_REQ) {
+			TELEMETRY_LOG_ERR("Get ports details invalid");
+			return -EPERM;
+		}
+
+		TELEMETRY_LOG_INFO("Success - Get ports details test passed");
+
+		ret = rte_telemetry_send_get_ports_and_stats_request(telemetry,
+			ACTION_GET, "port_stats", i);
+		if (ret != 0  && i == VALID_REQ) {
+			TELEMETRY_LOG_ERR("Get port stats valid test");
+			return -EPERM;
+		} else if (ret != -1 && i != VALID_REQ) {
+			TELEMETRY_LOG_ERR("Get ports stats invalid test failed");
+			return -EPERM;
+		}
+
+		TELEMETRY_LOG_INFO("Success - Get ports stats test passed");
+
+		ret = rte_telemetry_send_stats_values_by_name_request(telemetry,
+			ACTION_GET, port_ids, num_port_ids, stat_names,
+			num_stat_names, i);
+		if (ret != 0 && i == VALID_REQ) {
+			TELEMETRY_LOG_ERR("Get ports stats values by name valid test failed");
+			return -EPERM;
+		} else if (ret != -1 && i != VALID_REQ) {
+			TELEMETRY_LOG_ERR("Get ports stats values by name invalid test failed");
+			return -EPERM;
+		}
+
+		TELEMETRY_LOG_INFO("Success - Get ports stats values by name test passed");
+
+		ret = rte_telemetry_send_unreg_request(telemetry, ACTION_DELETE,
+			client_path, i);
+		if (ret != 0 && i == VALID_REQ) {
+			TELEMETRY_LOG_ERR("Deregister valid test failed");
+			return -EPERM;
+		} else if (ret != -1 && i != VALID_REQ) {
+			TELEMETRY_LOG_ERR("Deregister invalid test failed");
+			return -EPERM;
+		}
+
+		TELEMETRY_LOG_INFO("Success - Deregister test passed");
+	}
+
+	return 0;
+}
diff --git a/lib/librte_telemetry/rte_telemetry_parser_test.h b/lib/librte_telemetry/rte_telemetry_parser_test.h
new file mode 100644
index 00000000..6ada8527
--- /dev/null
+++ b/lib/librte_telemetry/rte_telemetry_parser_test.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _RTE_TELEMETRY_PARSER_TEST_H_
+#define _RTE_TELEMETRY_PARSER_TEST_H_
+
+int32_t
+rte_telemetry_parser_test(struct telemetry_impl *telemetry);
+
+int32_t
+rte_telemetry_format_port_stat_ids(int *port_ids, int num_port_ids,
+	const char * const stat_names, int num_stat_names, json_t **data);
+
+int32_t
+rte_telemetry_create_json_request(int action, char *command,
+	const char *client_path, int *port_ids, int num_port_ids,
+	const char * const stat_names, int num_stat_names, char **request,
+	int inv_choice);
+
+int32_t
+rte_telemetry_send_get_ports_and_stats_request(struct telemetry_impl *telemetry,
+	int action_choice, char *command_choice, int inv_choice);
+
+int32_t
+rte_telemetry_send_get_ports_details_request(struct telemetry_impl *telemetry,
+	int action_choice, int *port_ids, int num_port_ids, int inv_choice);
+
+int32_t
+rte_telemetry_send_stats_values_by_name_request(struct telemetry_impl
+	*telemetry, int action_choice, int *port_ids, int num_port_ids,
+	const char * const stat_names, int num_stat_names,
+	int inv_choice);
+
+int32_t
+rte_telemetry_send_unreg_request(int action_choice, const char *client_path,
+	int inv_choice);
+
+#endif
diff --git a/lib/librte_telemetry/rte_telemetry_socket_tests.h b/lib/librte_telemetry/rte_telemetry_socket_tests.h
new file mode 100644
index 00000000..db9167c5
--- /dev/null
+++ b/lib/librte_telemetry/rte_telemetry_socket_tests.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <stdbool.h>
+
+#include "rte_telemetry_internal.h"
+
+#ifndef _RTE_TELEMETRY_SOCKET_TESTING_H_
+#define _RTE_TELEMETRY_SOCKET_TESTING_H_
+
+int32_t
+rte_telemetry_json_socket_message_test(struct telemetry_impl *telemetry,
+	int fd);
+
+int32_t
+rte_telemetry_invalid_json_test(struct telemetry_impl *telemetry, int fd);
+
+int32_t
+rte_telemetry_valid_json_test(struct telemetry_impl *telemetry, int fd);
+
+int32_t
+rte_telemetry_json_contents_test(struct telemetry_impl *telemetry, int fd);
+
+int32_t
+rte_telemetry_json_empty_test(struct telemetry_impl *telemetry, int fd);
+
+int32_t
+rte_telemetry_socket_register_test(struct telemetry_impl *telemetry, int *fd,
+	int send_fd, int recv_fd);
+
+int32_t
+rte_telemetry_socket_test_setup(struct telemetry_impl *telemetry, int *send_fd,
+	int *recv_fd);
+
+#endif
diff --git a/lib/librte_telemetry/rte_telemetry_version.map b/lib/librte_telemetry/rte_telemetry_version.map
new file mode 100644
index 00000000..fa62d771
--- /dev/null
+++ b/lib/librte_telemetry/rte_telemetry_version.map
@@ -0,0 +1,10 @@
+EXPERIMENTAL {
+	global:
+
+	rte_telemetry_cleanup;
+	rte_telemetry_init;
+	rte_telemetry_parse;
+	rte_telemetry_selftest;
+
+	local: *;
+};
diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
index de431fbb..5dd31898 100644
--- a/lib/librte_vhost/Makefile
+++ b/lib/librte_vhost/Makefile
@@ -13,13 +13,13 @@ LIBABIVER := 4
 CFLAGS += -DALLOW_EXPERIMENTAL_API
 CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 -D_FILE_OFFSET_BITS=64
 CFLAGS += -I vhost_user
+CFLAGS += -fno-strict-aliasing
 LDLIBS += -lpthread
 
 ifeq ($(CONFIG_RTE_LIBRTE_VHOST_NUMA),y)
 LDLIBS += -lnuma
 endif
-LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_ethdev -lrte_net \
-					-lrte_cryptodev -lrte_hash
+LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_ethdev -lrte_net
 
 # all source are stored in SRCS-y
 SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := fd_man.c iotlb.c socket.c vhost.c \
@@ -30,6 +30,7 @@ SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_vhost.h rte_vdpa.h
 
 # only compile vhost crypto when cryptodev is enabled
 ifeq ($(CONFIG_RTE_LIBRTE_CRYPTODEV),y)
+LDLIBS += -lrte_cryptodev -lrte_hash
 SRCS-$(CONFIG_RTE_LIBRTE_VHOST) += vhost_crypto.c
 SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_vhost_crypto.h
 endif
diff --git a/lib/librte_vhost/meson.build b/lib/librte_vhost/meson.build
index bd62e0e3..e33e6fc1 100644
--- a/lib/librte_vhost/meson.build
+++ b/lib/librte_vhost/meson.build
@@ -7,8 +7,11 @@ endif
 if has_libnuma == 1
 	dpdk_conf.set10('RTE_LIBRTE_VHOST_NUMA', true)
 endif
+dpdk_conf.set('RTE_LIBRTE_VHOST_POSTCOPY',
+	      cc.has_header('linux/userfaultfd.h'))
 version = 4
 allow_experimental_apis = true
+cflags += '-fno-strict-aliasing'
 sources = files('fd_man.c', 'iotlb.c', 'socket.c', 'vdpa.c',
 		'vhost.c', 'vhost_user.c',
 		'virtio_net.c', 'vhost_crypto.c')
diff --git a/lib/librte_vhost/rte_vdpa.h b/lib/librte_vhost/rte_vdpa.h
index 90465ca2..a418da47 100644
--- a/lib/librte_vhost/rte_vdpa.h
+++ b/lib/librte_vhost/rte_vdpa.h
@@ -21,67 +21,138 @@ enum vdpa_addr_type {
 	VDPA_ADDR_MAX
 };
 
+/**
+ * vdpa device address
+ */
 struct rte_vdpa_dev_addr {
+	/** vdpa address type */
 	enum vdpa_addr_type type;
+
+	/** vdpa pci address */
 	union {
 		uint8_t __dummy[64];
 		struct rte_pci_addr pci_addr;
 	};
 };
 
+/**
+ * vdpa device operations
+ */
 struct rte_vdpa_dev_ops {
-	/* Get capabilities of this device */
+	/** Get capabilities of this device */
 	int (*get_queue_num)(int did, uint32_t *queue_num);
+
+	/** Get supported features of this device */
 	int (*get_features)(int did, uint64_t *features);
+
+	/** Get supported protocol features of this device */
 	int (*get_protocol_features)(int did, uint64_t *protocol_features);
 
-	/* Driver configure/close the device */
+	/** Driver configure/close the device */
 	int (*dev_conf)(int vid);
 	int (*dev_close)(int vid);
 
-	/* Enable/disable this vring */
+	/** Enable/disable this vring */
 	int (*set_vring_state)(int vid, int vring, int state);
 
-	/* Set features when changed */
+	/** Set features when changed */
 	int (*set_features)(int vid);
 
-	/* Destination operations when migration done */
+	/** Destination operations when migration done */
 	int (*migration_done)(int vid);
 
-	/* Get the vfio group fd */
+	/** Get the vfio group fd */
 	int (*get_vfio_group_fd)(int vid);
 
-	/* Get the vfio device fd */
+	/** Get the vfio device fd */
 	int (*get_vfio_device_fd)(int vid);
 
-	/* Get the notify area info of the queue */
+	/** Get the notify area info of the queue */
 	int (*get_notify_area)(int vid, int qid,
 			uint64_t *offset, uint64_t *size);
 
-	/* Reserved for future extension */
+	/** Reserved for future extension */
 	void *reserved[5];
 };
 
+/**
+ * vdpa device structure includes device address and device operations.
+ */
 struct rte_vdpa_device {
+	/** vdpa device address */
 	struct rte_vdpa_dev_addr addr;
+	/** vdpa device operations */
 	struct rte_vdpa_dev_ops *ops;
 } __rte_cache_aligned;
 
-/* Register a vdpa device, return did if successful, -1 on failure */
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Register a vdpa device
+ *
+ * @param addr
+ *  the vdpa device address
+ * @param ops
+ *  the vdpa device operations
+ * @return
+ *  device id on success, -1 on failure
+ */
 int __rte_experimental
 rte_vdpa_register_device(struct rte_vdpa_dev_addr *addr,
 		struct rte_vdpa_dev_ops *ops);
 
-/* Unregister a vdpa device, return -1 on failure */
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Unregister a vdpa device
+ *
+ * @param did
+ *  vdpa device id
+ * @return
+ *  device id on success, -1 on failure
+ */
 int __rte_experimental
 rte_vdpa_unregister_device(int did);
 
-/* Find did of a vdpa device, return -1 on failure */
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Find the device id of a vdpa device
+ *
+ * @param addr
+ *  the vdpa device address
+ * @return
+ *  device id on success, -1 on failure
+ */
 int __rte_experimental
 rte_vdpa_find_device_id(struct rte_vdpa_dev_addr *addr);
 
-/* Find a vdpa device based on did */
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Find a vdpa device based on device id
+ *
+ * @param did
+ *  device id
+ * @return
+ *  rte_vdpa_device on success, NULL on failure
+ */
 struct rte_vdpa_device * __rte_experimental
 rte_vdpa_get_device(int did);
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Get current available vdpa device number
+ *
+ * @return
+ *  available vdpa device number
+ */
+int __rte_experimental
+rte_vdpa_get_device_num(void);
 #endif /* _RTE_VDPA_H_ */
diff --git a/lib/librte_vhost/rte_vhost.h b/lib/librte_vhost/rte_vhost.h
index b02673d4..d280ac42 100644
--- a/lib/librte_vhost/rte_vhost.h
+++ b/lib/librte_vhost/rte_vhost.h
@@ -28,6 +28,7 @@ extern "C" {
 #define RTE_VHOST_USER_NO_RECONNECT	(1ULL << 1)
 #define RTE_VHOST_USER_DEQUEUE_ZERO_COPY	(1ULL << 2)
 #define RTE_VHOST_USER_IOMMU_SUPPORT	(1ULL << 3)
+#define RTE_VHOST_USER_POSTCOPY_SUPPORT		(1ULL << 4)
 
 /** Protocol features. */
 #ifndef VHOST_USER_PROTOCOL_F_MQ
@@ -58,6 +59,10 @@ extern "C" {
 #define VHOST_USER_PROTOCOL_F_CRYPTO_SESSION 7
 #endif
 
+#ifndef VHOST_USER_PROTOCOL_F_PAGEFAULT
+#define VHOST_USER_PROTOCOL_F_PAGEFAULT 8
+#endif
+
 #ifndef VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD
 #define VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD 10
 #endif
diff --git a/lib/librte_vhost/rte_vhost_version.map b/lib/librte_vhost/rte_vhost_version.map
index da220dd0..ae39b6e2 100644
--- a/lib/librte_vhost/rte_vhost_version.map
+++ b/lib/librte_vhost/rte_vhost_version.map
@@ -67,6 +67,7 @@ EXPERIMENTAL {
 	rte_vdpa_unregister_device;
 	rte_vdpa_find_device_id;
 	rte_vdpa_get_device;
+	rte_vdpa_get_device_num;
 	rte_vhost_driver_attach_vdpa_device;
 	rte_vhost_driver_detach_vdpa_device;
 	rte_vhost_driver_get_vdpa_device_id;
diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c
index d6303174..01b60ff9 100644
--- a/lib/librte_vhost/socket.c
+++ b/lib/librte_vhost/socket.c
@@ -51,6 +51,8 @@ struct vhost_user_socket {
 	uint64_t supported_features;
 	uint64_t features;
 
+	uint64_t protocol_features;
+
 	/*
 	 * Device id to identify a specific backend device.
 	 * It's set to -1 for the default software implementation.
@@ -94,18 +96,23 @@ static struct vhost_user vhost_user = {
 	.mutex = PTHREAD_MUTEX_INITIALIZER,
 };
 
-/* return bytes# of read on success or negative val on failure. */
+/*
+ * return bytes# of read on success or negative val on failure. Update fdnum
+ * with number of fds read.
+ */
 int
-read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
+read_fd_message(int sockfd, char *buf, int buflen, int *fds, int max_fds,
+		int *fd_num)
 {
 	struct iovec iov;
 	struct msghdr msgh;
-	size_t fdsize = fd_num * sizeof(int);
-	char control[CMSG_SPACE(fdsize)];
+	char control[CMSG_SPACE(max_fds * sizeof(int))];
 	struct cmsghdr *cmsg;
 	int got_fds = 0;
 	int ret;
 
+	*fd_num = 0;
+
 	memset(&msgh, 0, sizeof(msgh));
 	iov.iov_base = buf;
 	iov.iov_len  = buflen;
@@ -131,13 +138,14 @@ read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
 		if ((cmsg->cmsg_level == SOL_SOCKET) &&
 			(cmsg->cmsg_type == SCM_RIGHTS)) {
 			got_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
+			*fd_num = got_fds;
 			memcpy(fds, CMSG_DATA(cmsg), got_fds * sizeof(int));
 			break;
 		}
 	}
 
 	/* Clear out unused file descriptors */
-	while (got_fds < fd_num)
+	while (got_fds < max_fds)
 		fds[got_fds++] = -1;
 
 	return ret;
@@ -720,7 +728,7 @@ rte_vhost_driver_get_protocol_features(const char *path,
 	did = vsocket->vdpa_dev_id;
 	vdpa_dev = rte_vdpa_get_device(did);
 	if (!vdpa_dev || !vdpa_dev->ops->get_protocol_features) {
-		*protocol_features = VHOST_USER_PROTOCOL_FEATURES;
+		*protocol_features = vsocket->protocol_features;
 		goto unlock_exit;
 	}
 
@@ -733,7 +741,7 @@ rte_vhost_driver_get_protocol_features(const char *path,
 		goto unlock_exit;
 	}
 
-	*protocol_features = VHOST_USER_PROTOCOL_FEATURES
+	*protocol_features = vsocket->protocol_features
 		& vdpa_protocol_features;
 
 unlock_exit:
@@ -852,11 +860,21 @@ rte_vhost_driver_register(const char *path, uint64_t flags)
 	vsocket->use_builtin_virtio_net = true;
 	vsocket->supported_features = VIRTIO_NET_SUPPORTED_FEATURES;
 	vsocket->features           = VIRTIO_NET_SUPPORTED_FEATURES;
+	vsocket->protocol_features  = VHOST_USER_PROTOCOL_FEATURES;
 
-	/* Dequeue zero copy can't assure descriptors returned in order */
+	/*
+	 * Dequeue zero copy can't assure descriptors returned in order.
+	 * Also, it requires that the guest memory is populated, which is
+	 * not compatible with postcopy.
+	 */
 	if (vsocket->dequeue_zero_copy) {
 		vsocket->supported_features &= ~(1ULL << VIRTIO_F_IN_ORDER);
 		vsocket->features &= ~(1ULL << VIRTIO_F_IN_ORDER);
+
+		RTE_LOG(INFO, VHOST_CONFIG,
+			"Dequeue zero copy requested, disabling postcopy support\n");
+		vsocket->protocol_features &=
+			~(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT);
 	}
 
 	if (!(flags & RTE_VHOST_USER_IOMMU_SUPPORT)) {
@@ -864,6 +882,18 @@ rte_vhost_driver_register(const char *path, uint64_t flags)
 		vsocket->features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
 	}
 
+	if (!(flags & RTE_VHOST_USER_POSTCOPY_SUPPORT)) {
+		vsocket->protocol_features &=
+			~(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT);
+	} else {
+#ifndef RTE_LIBRTE_VHOST_POSTCOPY
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Postcopy requested but not compiled\n");
+		ret = -1;
+		goto out_mutex;
+#endif
+	}
+
 	if ((flags & RTE_VHOST_USER_CLIENT) != 0) {
 		vsocket->reconnect = !(flags & RTE_VHOST_USER_NO_RECONNECT);
 		if (vsocket->reconnect && reconn_tid == 0) {
diff --git a/lib/librte_vhost/vdpa.c b/lib/librte_vhost/vdpa.c
index c82fd437..c2c5dff1 100644
--- a/lib/librte_vhost/vdpa.c
+++ b/lib/librte_vhost/vdpa.c
@@ -113,3 +113,9 @@ rte_vdpa_get_device(int did)
 
 	return vdpa_devices[did];
 }
+
+int
+rte_vdpa_get_device_num(void)
+{
+	return vdpa_device_num;
+}
diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index 3c9be10a..70ac6bc9 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -8,6 +8,7 @@
 #include <stdint.h>
 #include <stdlib.h>
 #ifdef RTE_LIBRTE_VHOST_NUMA
+#include <numa.h>
 #include <numaif.h>
 #endif
 
@@ -343,6 +344,7 @@ vhost_new_device(void)
 	dev->flags = VIRTIO_DEV_BUILTIN_VIRTIO_NET;
 	dev->slave_req_fd = -1;
 	dev->vdpa_dev_id = -1;
+	dev->postcopy_ufd = -1;
 	rte_spinlock_init(&dev->slave_req_lock);
 
 	return i;
@@ -480,7 +482,7 @@ rte_vhost_get_numa_node(int vid)
 	int numa_node;
 	int ret;
 
-	if (dev == NULL)
+	if (dev == NULL || numa_available() != 0)
 		return -1;
 
 	ret = get_mempolicy(&numa_node, NULL, 0, dev,
@@ -646,12 +648,18 @@ rte_vhost_avail_entries(int vid, uint16_t queue_id)
 }
 
 static inline void
-vhost_enable_notify_split(struct vhost_virtqueue *vq, int enable)
+vhost_enable_notify_split(struct virtio_net *dev,
+		struct vhost_virtqueue *vq, int enable)
 {
-	if (enable)
-		vq->used->flags &= ~VRING_USED_F_NO_NOTIFY;
-	else
-		vq->used->flags |= VRING_USED_F_NO_NOTIFY;
+	if (!(dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))) {
+		if (enable)
+			vq->used->flags &= ~VRING_USED_F_NO_NOTIFY;
+		else
+			vq->used->flags |= VRING_USED_F_NO_NOTIFY;
+	} else {
+		if (enable)
+			vhost_avail_event(vq) = vq->last_avail_idx;
+	}
 }
 
 static inline void
@@ -660,8 +668,10 @@ vhost_enable_notify_packed(struct virtio_net *dev,
 {
 	uint16_t flags;
 
-	if (!enable)
+	if (!enable) {
 		vq->device_event->flags = VRING_EVENT_F_DISABLE;
+		return;
+	}
 
 	flags = VRING_EVENT_F_ENABLE;
 	if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) {
@@ -689,7 +699,7 @@ rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable)
 	if (vq_is_packed(dev))
 		vhost_enable_notify_packed(dev, vq, enable);
 	else
-		vhost_enable_notify_split(vq, enable);
+		vhost_enable_notify_split(dev, vq, enable);
 
 	return 0;
 }
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 760a09c0..b4abad30 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -284,6 +284,16 @@ struct guest_page {
 	uint64_t size;
 };
 
+/* The possible results of a message handling function */
+enum vh_result {
+	/* Message handling failed */
+	VH_RESULT_ERR   = -1,
+	/* Message handling successful */
+	VH_RESULT_OK    =  0,
+	/* Message handling successful and reply prepared */
+	VH_RESULT_REPLY =  1,
+};
+
 /**
  * function prototype for the vhost backend to handler specific vhost user
  * messages prior to the master message handling
@@ -292,17 +302,15 @@ struct guest_page {
  *  vhost device id
  * @param msg
  *  Message pointer.
- * @param require_reply
- *  If the handler requires sending a reply, this varaible shall be written 1,
- *  otherwise 0.
  * @param skip_master
  *  If the handler requires skipping the master message handling, this variable
  *  shall be written 1, otherwise 0.
  * @return
- *  0 on success, -1 on failure
+ *  VH_RESULT_OK on success, VH_RESULT_REPLY on success with reply,
+ *  VH_RESULT_ERR on failure
  */
-typedef int (*vhost_msg_pre_handle)(int vid, void *msg,
-		uint32_t *require_reply, uint32_t *skip_master);
+typedef enum vh_result (*vhost_msg_pre_handle)(int vid, void *msg,
+		uint32_t *skip_master);
 
 /**
  * function prototype for the vhost backend to handler specific vhost user
@@ -312,14 +320,11 @@ typedef int (*vhost_msg_pre_handle)(int vid, void *msg,
  *  vhost device id
  * @param msg
  *  Message pointer.
- * @param require_reply
- *  If the handler requires sending a reply, this varaible shall be written 1,
- *  otherwise 0.
  * @return
- *  0 on success, -1 on failure
+ *  VH_RESULT_OK on success, VH_RESULT_REPLY on success with reply,
+ *  VH_RESULT_ERR on failure
  */
-typedef int (*vhost_msg_post_handle)(int vid, void *msg,
-		uint32_t *require_reply);
+typedef enum vh_result (*vhost_msg_post_handle)(int vid, void *msg);
 
 /**
  * pre and post vhost user message handlers
@@ -363,6 +368,9 @@ struct virtio_net {
 	int			slave_req_fd;
 	rte_spinlock_t		slave_req_lock;
 
+	int			postcopy_ufd;
+	int			postcopy_listening;
+
 	/*
 	 * Device id to identify a specific backend device.
 	 * It's set to -1 for the default software implementation.
@@ -648,6 +656,8 @@ vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	return __vhost_iova_to_vva(dev, vq, iova, len, perm);
 }
 
+#define vhost_avail_event(vr) \
+	(*(volatile uint16_t*)&(vr)->used->ring[(vr)->size])
 #define vhost_used_event(vr) \
 	(*(volatile uint16_t*)&(vr)->avail->ring[(vr)->size])
 
diff --git a/lib/librte_vhost/vhost_crypto.c b/lib/librte_vhost/vhost_crypto.c
index 57341ef8..9811a232 100644
--- a/lib/librte_vhost/vhost_crypto.c
+++ b/lib/librte_vhost/vhost_crypto.c
@@ -425,35 +425,34 @@ vhost_crypto_close_sess(struct vhost_crypto *vcrypto, uint64_t session_id)
 	return 0;
 }
 
-static int
-vhost_crypto_msg_post_handler(int vid, void *msg, uint32_t *require_reply)
+static enum vh_result
+vhost_crypto_msg_post_handler(int vid, void *msg)
 {
 	struct virtio_net *dev = get_device(vid);
 	struct vhost_crypto *vcrypto;
 	VhostUserMsg *vmsg = msg;
-	int ret = 0;
+	enum vh_result ret = VH_RESULT_OK;
 
-	if (dev == NULL || require_reply == NULL) {
+	if (dev == NULL) {
 		VC_LOG_ERR("Invalid vid %i", vid);
-		return -EINVAL;
+		return VH_RESULT_ERR;
 	}
 
 	vcrypto = dev->extern_data;
 	if (vcrypto == NULL) {
 		VC_LOG_ERR("Cannot find required data, is it initialized?");
-		return -ENOENT;
+		return VH_RESULT_ERR;
 	}
 
-	*require_reply = 0;
-
 	if (vmsg->request.master == VHOST_USER_CRYPTO_CREATE_SESS) {
 		vhost_crypto_create_sess(vcrypto,
 				&vmsg->payload.crypto_session);
-		*require_reply = 1;
-	} else if (vmsg->request.master == VHOST_USER_CRYPTO_CLOSE_SESS)
-		ret = vhost_crypto_close_sess(vcrypto, vmsg->payload.u64);
-	else
-		ret = -EINVAL;
+		vmsg->fd_num = 0;
+		ret = VH_RESULT_REPLY;
+	} else if (vmsg->request.master == VHOST_USER_CRYPTO_CLOSE_SESS) {
+		if (vhost_crypto_close_sess(vcrypto, vmsg->payload.u64))
+			ret = VH_RESULT_ERR;
+	}
 
 	return ret;
 }
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index a2d4c9ff..508228a3 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -24,13 +24,19 @@
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
 #include <sys/mman.h>
 #include <sys/types.h>
 #include <sys/stat.h>
+#include <sys/syscall.h>
 #include <assert.h>
 #ifdef RTE_LIBRTE_VHOST_NUMA
 #include <numaif.h>
 #endif
+#ifdef RTE_LIBRTE_VHOST_POSTCOPY
+#include <linux/userfaultfd.h>
+#endif
 
 #include <rte_common.h>
 #include <rte_malloc.h>
@@ -69,8 +75,14 @@ static const char *vhost_message_str[VHOST_USER_MAX] = {
 	[VHOST_USER_IOTLB_MSG]  = "VHOST_USER_IOTLB_MSG",
 	[VHOST_USER_CRYPTO_CREATE_SESS] = "VHOST_USER_CRYPTO_CREATE_SESS",
 	[VHOST_USER_CRYPTO_CLOSE_SESS] = "VHOST_USER_CRYPTO_CLOSE_SESS",
+	[VHOST_USER_POSTCOPY_ADVISE]  = "VHOST_USER_POSTCOPY_ADVISE",
+	[VHOST_USER_POSTCOPY_LISTEN]  = "VHOST_USER_POSTCOPY_LISTEN",
+	[VHOST_USER_POSTCOPY_END]  = "VHOST_USER_POSTCOPY_END",
 };
 
+static int send_vhost_reply(int sockfd, struct VhostUserMsg *msg);
+static int read_vhost_message(int sockfd, struct VhostUserMsg *msg);
+
 static uint64_t
 get_blk_size(int fd)
 {
@@ -120,6 +132,13 @@ vhost_backend_cleanup(struct virtio_net *dev)
 		close(dev->slave_req_fd);
 		dev->slave_req_fd = -1;
 	}
+
+	if (dev->postcopy_ufd >= 0) {
+		close(dev->postcopy_ufd);
+		dev->postcopy_ufd = -1;
+	}
+
+	dev->postcopy_listening = 0;
 }
 
 /*
@@ -127,51 +146,73 @@ vhost_backend_cleanup(struct virtio_net *dev)
  * the device hasn't been initialised.
  */
 static int
-vhost_user_set_owner(void)
+vhost_user_set_owner(struct virtio_net **pdev __rte_unused,
+			struct VhostUserMsg *msg __rte_unused,
+			int main_fd __rte_unused)
 {
-	return 0;
+	return VH_RESULT_OK;
 }
 
 static int
-vhost_user_reset_owner(struct virtio_net *dev)
+vhost_user_reset_owner(struct virtio_net **pdev,
+			struct VhostUserMsg *msg __rte_unused,
+			int main_fd __rte_unused)
 {
+	struct virtio_net *dev = *pdev;
 	vhost_destroy_device_notify(dev);
 
 	cleanup_device(dev, 0);
 	reset_device(dev);
-	return 0;
+	return VH_RESULT_OK;
 }
 
 /*
  * The features that we support are requested.
  */
-static uint64_t
-vhost_user_get_features(struct virtio_net *dev)
+static int
+vhost_user_get_features(struct virtio_net **pdev, struct VhostUserMsg *msg,
+			int main_fd __rte_unused)
 {
+	struct virtio_net *dev = *pdev;
 	uint64_t features = 0;
 
 	rte_vhost_driver_get_features(dev->ifname, &features);
-	return features;
+
+	msg->payload.u64 = features;
+	msg->size = sizeof(msg->payload.u64);
+	msg->fd_num = 0;
+
+	return VH_RESULT_REPLY;
 }
 
 /*
  * The queue number that we support are requested.
  */
-static uint32_t
-vhost_user_get_queue_num(struct virtio_net *dev)
+static int
+vhost_user_get_queue_num(struct virtio_net **pdev, struct VhostUserMsg *msg,
+			int main_fd __rte_unused)
 {
+	struct virtio_net *dev = *pdev;
 	uint32_t queue_num = 0;
 
 	rte_vhost_driver_get_queue_num(dev->ifname, &queue_num);
-	return queue_num;
+
+	msg->payload.u64 = (uint64_t)queue_num;
+	msg->size = sizeof(msg->payload.u64);
+	msg->fd_num = 0;
+
+	return VH_RESULT_REPLY;
 }
 
 /*
  * We receive the negotiated features supported by us and the virtio device.
  */
 static int
-vhost_user_set_features(struct virtio_net *dev, uint64_t features)
+vhost_user_set_features(struct virtio_net **pdev, struct VhostUserMsg *msg,
+			int main_fd __rte_unused)
 {
+	struct virtio_net *dev = *pdev;
+	uint64_t features = msg->payload.u64;
 	uint64_t vhost_features = 0;
 	struct rte_vdpa_device *vdpa_dev;
 	int did = -1;
@@ -181,12 +222,12 @@ vhost_user_set_features(struct virtio_net *dev, uint64_t features)
 		RTE_LOG(ERR, VHOST_CONFIG,
 			"(%d) received invalid negotiated features.\n",
 			dev->vid);
-		return -1;
+		return VH_RESULT_ERR;
 	}
 
 	if (dev->flags & VIRTIO_DEV_RUNNING) {
 		if (dev->features == features)
-			return 0;
+			return VH_RESULT_OK;
 
 		/*
 		 * Error out if master tries to change features while device is
@@ -197,7 +238,7 @@ vhost_user_set_features(struct virtio_net *dev, uint64_t features)
 			RTE_LOG(ERR, VHOST_CONFIG,
 				"(%d) features changed while device is running.\n",
 				dev->vid);
-			return -1;
+			return VH_RESULT_ERR;
 		}
 
 		if (dev->notify_ops->features_changed)
@@ -242,16 +283,18 @@ vhost_user_set_features(struct virtio_net *dev, uint64_t features)
 	if (vdpa_dev && vdpa_dev->ops->set_features)
 		vdpa_dev->ops->set_features(dev->vid);
 
-	return 0;
+	return VH_RESULT_OK;
 }
 
 /*
  * The virtio device sends us the size of the descriptor ring.
  */
 static int
-vhost_user_set_vring_num(struct virtio_net *dev,
-			 VhostUserMsg *msg)
+vhost_user_set_vring_num(struct virtio_net **pdev,
+			struct VhostUserMsg *msg,
+			int main_fd __rte_unused)
 {
+	struct virtio_net *dev = *pdev;
 	struct vhost_virtqueue *vq = dev->virtqueue[msg->payload.state.index];
 
 	vq->size = msg->payload.state.num;
@@ -264,7 +307,7 @@ vhost_user_set_vring_num(struct virtio_net *dev,
 	if ((vq->size & (vq->size - 1)) || vq->size > 32768) {
 		RTE_LOG(ERR, VHOST_CONFIG,
 			"invalid virtqueue size %u\n", vq->size);
-		return -1;
+		return VH_RESULT_ERR;
 	}
 
 	if (dev->dequeue_zero_copy) {
@@ -290,7 +333,7 @@ vhost_user_set_vring_num(struct virtio_net *dev,
 		if (!vq->shadow_used_packed) {
 			RTE_LOG(ERR, VHOST_CONFIG,
 					"failed to allocate memory for shadow used ring.\n");
-			return -1;
+			return VH_RESULT_ERR;
 		}
 
 	} else {
@@ -300,7 +343,7 @@ vhost_user_set_vring_num(struct virtio_net *dev,
 		if (!vq->shadow_used_split) {
 			RTE_LOG(ERR, VHOST_CONFIG,
 					"failed to allocate memory for shadow used ring.\n");
-			return -1;
+			return VH_RESULT_ERR;
 		}
 	}
 
@@ -310,10 +353,10 @@ vhost_user_set_vring_num(struct virtio_net *dev,
 	if (!vq->batch_copy_elems) {
 		RTE_LOG(ERR, VHOST_CONFIG,
 			"failed to allocate memory for batching copy.\n");
-		return -1;
+		return VH_RESULT_ERR;
 	}
 
-	return 0;
+	return VH_RESULT_OK;
 }
 
 /*
@@ -357,11 +400,13 @@ numa_realloc(struct virtio_net *dev, int index)
 		memcpy(vq, old_vq, sizeof(*vq));
 		TAILQ_INIT(&vq->zmbuf_list);
 
-		new_zmbuf = rte_malloc_socket(NULL, vq->zmbuf_size *
-			sizeof(struct zcopy_mbuf), 0, newnode);
-		if (new_zmbuf) {
-			rte_free(vq->zmbufs);
-			vq->zmbufs = new_zmbuf;
+		if (dev->dequeue_zero_copy) {
+			new_zmbuf = rte_malloc_socket(NULL, vq->zmbuf_size *
+					sizeof(struct zcopy_mbuf), 0, newnode);
+			if (new_zmbuf) {
+				rte_free(vq->zmbufs);
+				vq->zmbufs = new_zmbuf;
+			}
 		}
 
 		if (vq_is_packed(dev)) {
@@ -609,14 +654,15 @@ translate_ring_addresses(struct virtio_net *dev, int vq_index)
  * This function then converts these to our address space.
  */
 static int
-vhost_user_set_vring_addr(struct virtio_net **pdev, VhostUserMsg *msg)
+vhost_user_set_vring_addr(struct virtio_net **pdev, struct VhostUserMsg *msg,
+			int main_fd __rte_unused)
 {
+	struct virtio_net *dev = *pdev;
 	struct vhost_virtqueue *vq;
 	struct vhost_vring_addr *addr = &msg->payload.addr;
-	struct virtio_net *dev = *pdev;
 
 	if (dev->mem == NULL)
-		return -1;
+		return VH_RESULT_ERR;
 
 	/* addr->index refers to the queue index. The txq 1, rxq is 0. */
 	vq = dev->virtqueue[msg->payload.addr.index];
@@ -633,27 +679,29 @@ vhost_user_set_vring_addr(struct virtio_net **pdev, VhostUserMsg *msg)
 				(1ULL << VHOST_USER_F_PROTOCOL_FEATURES))) {
 		dev = translate_ring_addresses(dev, msg->payload.addr.index);
 		if (!dev)
-			return -1;
+			return VH_RESULT_ERR;
 
 		*pdev = dev;
 	}
 
-	return 0;
+	return VH_RESULT_OK;
 }
 
 /*
  * The virtio device sends us the available ring last used index.
  */
 static int
-vhost_user_set_vring_base(struct virtio_net *dev,
-			  VhostUserMsg *msg)
+vhost_user_set_vring_base(struct virtio_net **pdev,
+			struct VhostUserMsg *msg,
+			int main_fd __rte_unused)
 {
+	struct virtio_net *dev = *pdev;
 	dev->virtqueue[msg->payload.state.index]->last_used_idx  =
 			msg->payload.state.num;
 	dev->virtqueue[msg->payload.state.index]->last_avail_idx =
 			msg->payload.state.num;
 
-	return 0;
+	return VH_RESULT_OK;
 }
 
 static int
@@ -778,10 +826,11 @@ vhost_memory_changed(struct VhostUserMemory *new,
 }
 
 static int
-vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *pmsg)
+vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *msg,
+			int main_fd)
 {
 	struct virtio_net *dev = *pdev;
-	struct VhostUserMemory memory = pmsg->payload.memory;
+	struct VhostUserMemory *memory = &msg->payload.memory;
 	struct rte_vhost_mem_region *reg;
 	void *mmap_addr;
 	uint64_t mmap_size;
@@ -791,20 +840,20 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *pmsg)
 	int populate;
 	int fd;
 
-	if (memory.nregions > VHOST_MEMORY_MAX_NREGIONS) {
+	if (memory->nregions > VHOST_MEMORY_MAX_NREGIONS) {
 		RTE_LOG(ERR, VHOST_CONFIG,
-			"too many memory regions (%u)\n", memory.nregions);
-		return -1;
+			"too many memory regions (%u)\n", memory->nregions);
+		return VH_RESULT_ERR;
 	}
 
-	if (dev->mem && !vhost_memory_changed(&memory, dev->mem)) {
+	if (dev->mem && !vhost_memory_changed(memory, dev->mem)) {
 		RTE_LOG(INFO, VHOST_CONFIG,
 			"(%d) memory regions not changed\n", dev->vid);
 
-		for (i = 0; i < memory.nregions; i++)
-			close(pmsg->fds[i]);
+		for (i = 0; i < memory->nregions; i++)
+			close(msg->fds[i]);
 
-		return 0;
+		return VH_RESULT_OK;
 	}
 
 	if (dev->mem) {
@@ -828,30 +877,30 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *pmsg)
 				"(%d) failed to allocate memory "
 				"for dev->guest_pages\n",
 				dev->vid);
-			return -1;
+			return VH_RESULT_ERR;
 		}
 	}
 
 	dev->mem = rte_zmalloc("vhost-mem-table", sizeof(struct rte_vhost_memory) +
-		sizeof(struct rte_vhost_mem_region) * memory.nregions, 0);
+		sizeof(struct rte_vhost_mem_region) * memory->nregions, 0);
 	if (dev->mem == NULL) {
 		RTE_LOG(ERR, VHOST_CONFIG,
 			"(%d) failed to allocate memory for dev->mem\n",
 			dev->vid);
-		return -1;
+		return VH_RESULT_ERR;
 	}
-	dev->mem->nregions = memory.nregions;
+	dev->mem->nregions = memory->nregions;
 
-	for (i = 0; i < memory.nregions; i++) {
-		fd  = pmsg->fds[i];
+	for (i = 0; i < memory->nregions; i++) {
+		fd  = msg->fds[i];
 		reg = &dev->mem->regions[i];
 
-		reg->guest_phys_addr = memory.regions[i].guest_phys_addr;
-		reg->guest_user_addr = memory.regions[i].userspace_addr;
-		reg->size            = memory.regions[i].memory_size;
+		reg->guest_phys_addr = memory->regions[i].guest_phys_addr;
+		reg->guest_user_addr = memory->regions[i].userspace_addr;
+		reg->size            = memory->regions[i].memory_size;
 		reg->fd              = fd;
 
-		mmap_offset = memory.regions[i].mmap_offset;
+		mmap_offset = memory->regions[i].mmap_offset;
 
 		/* Check for memory_size + mmap_offset overflow */
 		if (mmap_offset >= -reg->size) {
@@ -920,6 +969,70 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *pmsg)
 			mmap_size,
 			alignment,
 			mmap_offset);
+
+		if (dev->postcopy_listening) {
+			/*
+			 * We haven't a better way right now than sharing
+			 * DPDK's virtual address with Qemu, so that Qemu can
+			 * retrieve the region offset when handling userfaults.
+			 */
+			memory->regions[i].userspace_addr =
+				reg->host_user_addr;
+		}
+	}
+	if (dev->postcopy_listening) {
+		/* Send the addresses back to qemu */
+		msg->fd_num = 0;
+		send_vhost_reply(main_fd, msg);
+
+		/* Wait for qemu to acknolwedge it's got the addresses
+		 * we've got to wait before we're allowed to generate faults.
+		 */
+		VhostUserMsg ack_msg;
+		if (read_vhost_message(main_fd, &ack_msg) <= 0) {
+			RTE_LOG(ERR, VHOST_CONFIG,
+				"Failed to read qemu ack on postcopy set-mem-table\n");
+			goto err_mmap;
+		}
+		if (ack_msg.request.master != VHOST_USER_SET_MEM_TABLE) {
+			RTE_LOG(ERR, VHOST_CONFIG,
+				"Bad qemu ack on postcopy set-mem-table (%d)\n",
+				ack_msg.request.master);
+			goto err_mmap;
+		}
+
+		/* Now userfault register and we can use the memory */
+		for (i = 0; i < memory->nregions; i++) {
+#ifdef RTE_LIBRTE_VHOST_POSTCOPY
+			reg = &dev->mem->regions[i];
+			struct uffdio_register reg_struct;
+
+			/*
+			 * Let's register all the mmap'ed area to ensure
+			 * alignment on page boundary.
+			 */
+			reg_struct.range.start =
+				(uint64_t)(uintptr_t)reg->mmap_addr;
+			reg_struct.range.len = reg->mmap_size;
+			reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
+
+			if (ioctl(dev->postcopy_ufd, UFFDIO_REGISTER,
+						&reg_struct)) {
+				RTE_LOG(ERR, VHOST_CONFIG,
+					"Failed to register ufd for region %d: (ufd = %d) %s\n",
+					i, dev->postcopy_ufd,
+					strerror(errno));
+				goto err_mmap;
+			}
+			RTE_LOG(INFO, VHOST_CONFIG,
+				"\t userfaultfd registered for range : %llx - %llx\n",
+				reg_struct.range.start,
+				reg_struct.range.start +
+				reg_struct.range.len - 1);
+#else
+			goto err_mmap;
+#endif
+		}
 	}
 
 	for (i = 0; i < dev->nr_vring; i++) {
@@ -934,8 +1047,10 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *pmsg)
 			vring_invalidate(dev, vq);
 
 			dev = translate_ring_addresses(dev, i);
-			if (!dev)
-				return -1;
+			if (!dev) {
+				dev = *pdev;
+				goto err_mmap;
+			}
 
 			*pdev = dev;
 		}
@@ -943,13 +1058,13 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *pmsg)
 
 	dump_guest_pages(dev);
 
-	return 0;
+	return VH_RESULT_OK;
 
 err_mmap:
 	free_mem_region(dev);
 	rte_free(dev->mem);
 	dev->mem = NULL;
-	return -1;
+	return VH_RESULT_ERR;
 }
 
 static bool
@@ -991,17 +1106,19 @@ virtio_is_ready(struct virtio_net *dev)
 	return 1;
 }
 
-static void
-vhost_user_set_vring_call(struct virtio_net *dev, struct VhostUserMsg *pmsg)
+static int
+vhost_user_set_vring_call(struct virtio_net **pdev, struct VhostUserMsg *msg,
+			int main_fd __rte_unused)
 {
+	struct virtio_net *dev = *pdev;
 	struct vhost_vring_file file;
 	struct vhost_virtqueue *vq;
 
-	file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
-	if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
+	file.index = msg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+	if (msg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
 		file.fd = VIRTIO_INVALID_EVENTFD;
 	else
-		file.fd = pmsg->fds[0];
+		file.fd = msg->fds[0];
 	RTE_LOG(INFO, VHOST_CONFIG,
 		"vring call idx:%d file:%d\n", file.index, file.fd);
 
@@ -1010,27 +1127,41 @@ vhost_user_set_vring_call(struct virtio_net *dev, struct VhostUserMsg *pmsg)
 		close(vq->callfd);
 
 	vq->callfd = file.fd;
+
+	return VH_RESULT_OK;
 }
 
-static void
-vhost_user_set_vring_kick(struct virtio_net **pdev, struct VhostUserMsg *pmsg)
+static int vhost_user_set_vring_err(struct virtio_net **pdev __rte_unused,
+			struct VhostUserMsg *msg,
+			int main_fd __rte_unused)
 {
+	if (!(msg->payload.u64 & VHOST_USER_VRING_NOFD_MASK))
+		close(msg->fds[0]);
+	RTE_LOG(INFO, VHOST_CONFIG, "not implemented\n");
+
+	return VH_RESULT_OK;
+}
+
+static int
+vhost_user_set_vring_kick(struct virtio_net **pdev, struct VhostUserMsg *msg,
+			int main_fd __rte_unused)
+{
+	struct virtio_net *dev = *pdev;
 	struct vhost_vring_file file;
 	struct vhost_virtqueue *vq;
-	struct virtio_net *dev = *pdev;
 
-	file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
-	if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
+	file.index = msg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+	if (msg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
 		file.fd = VIRTIO_INVALID_EVENTFD;
 	else
-		file.fd = pmsg->fds[0];
+		file.fd = msg->fds[0];
 	RTE_LOG(INFO, VHOST_CONFIG,
 		"vring kick idx:%d file:%d\n", file.index, file.fd);
 
 	/* Interpret ring addresses only when ring is started. */
 	dev = translate_ring_addresses(dev, file.index);
 	if (!dev)
-		return;
+		return VH_RESULT_ERR;
 
 	*pdev = dev;
 
@@ -1047,6 +1178,8 @@ vhost_user_set_vring_kick(struct virtio_net **pdev, struct VhostUserMsg *pmsg)
 	if (vq->kickfd >= 0)
 		close(vq->kickfd);
 	vq->kickfd = file.fd;
+
+	return VH_RESULT_OK;
 }
 
 static void
@@ -1069,9 +1202,11 @@ free_zmbufs(struct vhost_virtqueue *vq)
  * when virtio is stopped, qemu will send us the GET_VRING_BASE message.
  */
 static int
-vhost_user_get_vring_base(struct virtio_net *dev,
-			  VhostUserMsg *msg)
+vhost_user_get_vring_base(struct virtio_net **pdev,
+			struct VhostUserMsg *msg,
+			int main_fd __rte_unused)
 {
+	struct virtio_net *dev = *pdev;
 	struct vhost_virtqueue *vq = dev->virtqueue[msg->payload.state.index];
 
 	/* We have to stop the queue (virtio) if it is running. */
@@ -1114,7 +1249,10 @@ vhost_user_get_vring_base(struct virtio_net *dev,
 	rte_free(vq->batch_copy_elems);
 	vq->batch_copy_elems = NULL;
 
-	return 0;
+	msg->size = sizeof(msg->payload.state);
+	msg->fd_num = 0;
+
+	return VH_RESULT_REPLY;
 }
 
 /*
@@ -1122,9 +1260,11 @@ vhost_user_get_vring_base(struct virtio_net *dev,
  * enable the virtio queue pair.
  */
 static int
-vhost_user_set_vring_enable(struct virtio_net *dev,
-			    VhostUserMsg *msg)
+vhost_user_set_vring_enable(struct virtio_net **pdev,
+			struct VhostUserMsg *msg,
+			int main_fd __rte_unused)
 {
+	struct virtio_net *dev = *pdev;
 	int enable = (int)msg->payload.state.num;
 	int index = (int)msg->payload.state.index;
 	struct rte_vdpa_device *vdpa_dev;
@@ -1145,13 +1285,15 @@ vhost_user_set_vring_enable(struct virtio_net *dev,
 
 	dev->virtqueue[index]->enabled = enable;
 
-	return 0;
+	return VH_RESULT_OK;
 }
 
-static void
-vhost_user_get_protocol_features(struct virtio_net *dev,
-				 struct VhostUserMsg *msg)
+static int
+vhost_user_get_protocol_features(struct virtio_net **pdev,
+			struct VhostUserMsg *msg,
+			int main_fd __rte_unused)
 {
+	struct virtio_net *dev = *pdev;
 	uint64_t features, protocol_features;
 
 	rte_vhost_driver_get_features(dev->ifname, &features);
@@ -1168,35 +1310,53 @@ vhost_user_get_protocol_features(struct virtio_net *dev,
 
 	msg->payload.u64 = protocol_features;
 	msg->size = sizeof(msg->payload.u64);
+	msg->fd_num = 0;
+
+	return VH_RESULT_REPLY;
 }
 
-static void
-vhost_user_set_protocol_features(struct virtio_net *dev,
-				 uint64_t protocol_features)
+static int
+vhost_user_set_protocol_features(struct virtio_net **pdev,
+			struct VhostUserMsg *msg,
+			int main_fd __rte_unused)
 {
-	if (protocol_features & ~VHOST_USER_PROTOCOL_FEATURES)
-		return;
+	struct virtio_net *dev = *pdev;
+	uint64_t protocol_features = msg->payload.u64;
+	uint64_t slave_protocol_features = 0;
+
+	rte_vhost_driver_get_protocol_features(dev->ifname,
+			&slave_protocol_features);
+	if (protocol_features & ~slave_protocol_features) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"(%d) received invalid protocol features.\n",
+			dev->vid);
+		return VH_RESULT_ERR;
+	}
 
 	dev->protocol_features = protocol_features;
+
+	return VH_RESULT_OK;
 }
 
 static int
-vhost_user_set_log_base(struct virtio_net *dev, struct VhostUserMsg *msg)
+vhost_user_set_log_base(struct virtio_net **pdev, struct VhostUserMsg *msg,
+			int main_fd __rte_unused)
 {
+	struct virtio_net *dev = *pdev;
 	int fd = msg->fds[0];
 	uint64_t size, off;
 	void *addr;
 
 	if (fd < 0) {
 		RTE_LOG(ERR, VHOST_CONFIG, "invalid log fd: %d\n", fd);
-		return -1;
+		return VH_RESULT_ERR;
 	}
 
 	if (msg->size != sizeof(VhostUserLog)) {
 		RTE_LOG(ERR, VHOST_CONFIG,
 			"invalid log base msg size: %"PRId32" != %d\n",
 			msg->size, (int)sizeof(VhostUserLog));
-		return -1;
+		return VH_RESULT_ERR;
 	}
 
 	size = msg->payload.log.mmap_size;
@@ -1207,7 +1367,7 @@ vhost_user_set_log_base(struct virtio_net *dev, struct VhostUserMsg *msg)
 		RTE_LOG(ERR, VHOST_CONFIG,
 			"log offset %#"PRIx64" exceeds log size %#"PRIx64"\n",
 			off, size);
-		return -1;
+		return VH_RESULT_ERR;
 	}
 
 	RTE_LOG(INFO, VHOST_CONFIG,
@@ -1222,7 +1382,7 @@ vhost_user_set_log_base(struct virtio_net *dev, struct VhostUserMsg *msg)
 	close(fd);
 	if (addr == MAP_FAILED) {
 		RTE_LOG(ERR, VHOST_CONFIG, "mmap log base failed!\n");
-		return -1;
+		return VH_RESULT_ERR;
 	}
 
 	/*
@@ -1236,7 +1396,24 @@ vhost_user_set_log_base(struct virtio_net *dev, struct VhostUserMsg *msg)
 	dev->log_base = dev->log_addr + off;
 	dev->log_size = size;
 
-	return 0;
+	/*
+	 * The spec is not clear about it (yet), but QEMU doesn't expect
+	 * any payload in the reply.
+	 */
+	msg->size = 0;
+	msg->fd_num = 0;
+
+	return VH_RESULT_REPLY;
+}
+
+static int vhost_user_set_log_fd(struct virtio_net **pdev __rte_unused,
+			struct VhostUserMsg *msg,
+			int main_fd __rte_unused)
+{
+	close(msg->fds[0]);
+	RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n");
+
+	return VH_RESULT_OK;
 }
 
 /*
@@ -1248,8 +1425,10 @@ vhost_user_set_log_base(struct virtio_net *dev, struct VhostUserMsg *msg)
  * a flag 'broadcast_rarp' to let rte_vhost_dequeue_burst() inject it.
  */
 static int
-vhost_user_send_rarp(struct virtio_net *dev, struct VhostUserMsg *msg)
+vhost_user_send_rarp(struct virtio_net **pdev, struct VhostUserMsg *msg,
+			int main_fd __rte_unused)
 {
+	struct virtio_net *dev = *pdev;
 	uint8_t *mac = (uint8_t *)&msg->payload.u64;
 	struct rte_vdpa_device *vdpa_dev;
 	int did = -1;
@@ -1273,40 +1452,44 @@ vhost_user_send_rarp(struct virtio_net *dev, struct VhostUserMsg *msg)
 	if (vdpa_dev && vdpa_dev->ops->migration_done)
 		vdpa_dev->ops->migration_done(dev->vid);
 
-	return 0;
+	return VH_RESULT_OK;
 }
 
 static int
-vhost_user_net_set_mtu(struct virtio_net *dev, struct VhostUserMsg *msg)
+vhost_user_net_set_mtu(struct virtio_net **pdev, struct VhostUserMsg *msg,
+			int main_fd __rte_unused)
 {
+	struct virtio_net *dev = *pdev;
 	if (msg->payload.u64 < VIRTIO_MIN_MTU ||
 			msg->payload.u64 > VIRTIO_MAX_MTU) {
 		RTE_LOG(ERR, VHOST_CONFIG, "Invalid MTU size (%"PRIu64")\n",
 				msg->payload.u64);
 
-		return -1;
+		return VH_RESULT_ERR;
 	}
 
 	dev->mtu = msg->payload.u64;
 
-	return 0;
+	return VH_RESULT_OK;
 }
 
 static int
-vhost_user_set_req_fd(struct virtio_net *dev, struct VhostUserMsg *msg)
+vhost_user_set_req_fd(struct virtio_net **pdev, struct VhostUserMsg *msg,
+			int main_fd __rte_unused)
 {
+	struct virtio_net *dev = *pdev;
 	int fd = msg->fds[0];
 
 	if (fd < 0) {
 		RTE_LOG(ERR, VHOST_CONFIG,
 				"Invalid file descriptor for slave channel (%d)\n",
 				fd);
-		return -1;
+		return VH_RESULT_ERR;
 	}
 
 	dev->slave_req_fd = fd;
 
-	return 0;
+	return VH_RESULT_OK;
 }
 
 static int
@@ -1359,7 +1542,8 @@ is_vring_iotlb_invalidate(struct vhost_virtqueue *vq,
 }
 
 static int
-vhost_user_iotlb_msg(struct virtio_net **pdev, struct VhostUserMsg *msg)
+vhost_user_iotlb_msg(struct virtio_net **pdev, struct VhostUserMsg *msg,
+			int main_fd __rte_unused)
 {
 	struct virtio_net *dev = *pdev;
 	struct vhost_iotlb_msg *imsg = &msg->payload.iotlb;
@@ -1371,7 +1555,7 @@ vhost_user_iotlb_msg(struct virtio_net **pdev, struct VhostUserMsg *msg)
 		len = imsg->size;
 		vva = qva_to_vva(dev, imsg->uaddr, &len);
 		if (!vva)
-			return -1;
+			return VH_RESULT_ERR;
 
 		for (i = 0; i < dev->nr_vring; i++) {
 			struct vhost_virtqueue *vq = dev->virtqueue[i];
@@ -1397,12 +1581,118 @@ vhost_user_iotlb_msg(struct virtio_net **pdev, struct VhostUserMsg *msg)
 	default:
 		RTE_LOG(ERR, VHOST_CONFIG, "Invalid IOTLB message type (%d)\n",
 				imsg->type);
-		return -1;
+		return VH_RESULT_ERR;
 	}
 
-	return 0;
+	return VH_RESULT_OK;
 }
 
+static int
+vhost_user_set_postcopy_advise(struct virtio_net **pdev,
+			struct VhostUserMsg *msg,
+			int main_fd __rte_unused)
+{
+	struct virtio_net *dev = *pdev;
+#ifdef RTE_LIBRTE_VHOST_POSTCOPY
+	struct uffdio_api api_struct;
+
+	dev->postcopy_ufd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
+
+	if (dev->postcopy_ufd == -1) {
+		RTE_LOG(ERR, VHOST_CONFIG, "Userfaultfd not available: %s\n",
+			strerror(errno));
+		return VH_RESULT_ERR;
+	}
+	api_struct.api = UFFD_API;
+	api_struct.features = 0;
+	if (ioctl(dev->postcopy_ufd, UFFDIO_API, &api_struct)) {
+		RTE_LOG(ERR, VHOST_CONFIG, "UFFDIO_API ioctl failure: %s\n",
+			strerror(errno));
+		close(dev->postcopy_ufd);
+		dev->postcopy_ufd = -1;
+		return VH_RESULT_ERR;
+	}
+	msg->fds[0] = dev->postcopy_ufd;
+	msg->fd_num = 1;
+
+	return VH_RESULT_REPLY;
+#else
+	dev->postcopy_ufd = -1;
+	msg->fd_num = 0;
+
+	return VH_RESULT_ERR;
+#endif
+}
+
+static int
+vhost_user_set_postcopy_listen(struct virtio_net **pdev,
+			struct VhostUserMsg *msg __rte_unused,
+			int main_fd __rte_unused)
+{
+	struct virtio_net *dev = *pdev;
+
+	if (dev->mem && dev->mem->nregions) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Regions already registered at postcopy-listen\n");
+		return VH_RESULT_ERR;
+	}
+	dev->postcopy_listening = 1;
+
+	return VH_RESULT_OK;
+}
+
+static int
+vhost_user_postcopy_end(struct virtio_net **pdev, struct VhostUserMsg *msg,
+			int main_fd __rte_unused)
+{
+	struct virtio_net *dev = *pdev;
+
+	dev->postcopy_listening = 0;
+	if (dev->postcopy_ufd >= 0) {
+		close(dev->postcopy_ufd);
+		dev->postcopy_ufd = -1;
+	}
+
+	msg->payload.u64 = 0;
+	msg->size = sizeof(msg->payload.u64);
+	msg->fd_num = 0;
+
+	return VH_RESULT_REPLY;
+}
+
+typedef int (*vhost_message_handler_t)(struct virtio_net **pdev,
+					struct VhostUserMsg *msg,
+					int main_fd);
+static vhost_message_handler_t vhost_message_handlers[VHOST_USER_MAX] = {
+	[VHOST_USER_NONE] = NULL,
+	[VHOST_USER_GET_FEATURES] = vhost_user_get_features,
+	[VHOST_USER_SET_FEATURES] = vhost_user_set_features,
+	[VHOST_USER_SET_OWNER] = vhost_user_set_owner,
+	[VHOST_USER_RESET_OWNER] = vhost_user_reset_owner,
+	[VHOST_USER_SET_MEM_TABLE] = vhost_user_set_mem_table,
+	[VHOST_USER_SET_LOG_BASE] = vhost_user_set_log_base,
+	[VHOST_USER_SET_LOG_FD] = vhost_user_set_log_fd,
+	[VHOST_USER_SET_VRING_NUM] = vhost_user_set_vring_num,
+	[VHOST_USER_SET_VRING_ADDR] = vhost_user_set_vring_addr,
+	[VHOST_USER_SET_VRING_BASE] = vhost_user_set_vring_base,
+	[VHOST_USER_GET_VRING_BASE] = vhost_user_get_vring_base,
+	[VHOST_USER_SET_VRING_KICK] = vhost_user_set_vring_kick,
+	[VHOST_USER_SET_VRING_CALL] = vhost_user_set_vring_call,
+	[VHOST_USER_SET_VRING_ERR] = vhost_user_set_vring_err,
+	[VHOST_USER_GET_PROTOCOL_FEATURES] = vhost_user_get_protocol_features,
+	[VHOST_USER_SET_PROTOCOL_FEATURES] = vhost_user_set_protocol_features,
+	[VHOST_USER_GET_QUEUE_NUM] = vhost_user_get_queue_num,
+	[VHOST_USER_SET_VRING_ENABLE] = vhost_user_set_vring_enable,
+	[VHOST_USER_SEND_RARP] = vhost_user_send_rarp,
+	[VHOST_USER_NET_SET_MTU] = vhost_user_net_set_mtu,
+	[VHOST_USER_SET_SLAVE_REQ_FD] = vhost_user_set_req_fd,
+	[VHOST_USER_IOTLB_MSG] = vhost_user_iotlb_msg,
+	[VHOST_USER_POSTCOPY_ADVISE] = vhost_user_set_postcopy_advise,
+	[VHOST_USER_POSTCOPY_LISTEN] = vhost_user_set_postcopy_listen,
+	[VHOST_USER_POSTCOPY_END] = vhost_user_postcopy_end,
+};
+
+
 /* return bytes# of read on success or negative val on failure. */
 static int
 read_vhost_message(int sockfd, struct VhostUserMsg *msg)
@@ -1410,7 +1700,7 @@ read_vhost_message(int sockfd, struct VhostUserMsg *msg)
 	int ret;
 
 	ret = read_fd_message(sockfd, (char *)msg, VHOST_USER_HDR_SIZE,
-		msg->fds, VHOST_MEMORY_MAX_NREGIONS);
+		msg->fds, VHOST_MEMORY_MAX_NREGIONS, &msg->fd_num);
 	if (ret <= 0)
 		return ret;
 
@@ -1434,13 +1724,13 @@ read_vhost_message(int sockfd, struct VhostUserMsg *msg)
 }
 
 static int
-send_vhost_message(int sockfd, struct VhostUserMsg *msg, int *fds, int fd_num)
+send_vhost_message(int sockfd, struct VhostUserMsg *msg)
 {
 	if (!msg)
 		return 0;
 
 	return send_fd_message(sockfd, (char *)msg,
-		VHOST_USER_HDR_SIZE + msg->size, fds, fd_num);
+		VHOST_USER_HDR_SIZE + msg->size, msg->fds, msg->fd_num);
 }
 
 static int
@@ -1454,19 +1744,18 @@ send_vhost_reply(int sockfd, struct VhostUserMsg *msg)
 	msg->flags |= VHOST_USER_VERSION;
 	msg->flags |= VHOST_USER_REPLY_MASK;
 
-	return send_vhost_message(sockfd, msg, NULL, 0);
+	return send_vhost_message(sockfd, msg);
 }
 
 static int
-send_vhost_slave_message(struct virtio_net *dev, struct VhostUserMsg *msg,
-			 int *fds, int fd_num)
+send_vhost_slave_message(struct virtio_net *dev, struct VhostUserMsg *msg)
 {
 	int ret;
 
 	if (msg->flags & VHOST_USER_NEED_REPLY)
 		rte_spinlock_lock(&dev->slave_req_lock);
 
-	ret = send_vhost_message(dev->slave_req_fd, msg, fds, fd_num);
+	ret = send_vhost_message(dev->slave_req_fd, msg);
 	if (ret < 0 && (msg->flags & VHOST_USER_NEED_REPLY))
 		rte_spinlock_unlock(&dev->slave_req_lock);
 
@@ -1477,7 +1766,8 @@ send_vhost_slave_message(struct virtio_net *dev, struct VhostUserMsg *msg,
  * Allocate a queue pair if it hasn't been allocated yet
  */
 static int
-vhost_user_check_and_alloc_queue_pair(struct virtio_net *dev, VhostUserMsg *msg)
+vhost_user_check_and_alloc_queue_pair(struct virtio_net *dev,
+			struct VhostUserMsg *msg)
 {
 	uint16_t vring_idx;
 
@@ -1555,6 +1845,7 @@ vhost_user_msg_handler(int vid, int fd)
 	int ret;
 	int unlock_required = 0;
 	uint32_t skip_master = 0;
+	int request;
 
 	dev = get_device(vid);
 	if (dev == NULL)
@@ -1633,132 +1924,54 @@ vhost_user_msg_handler(int vid, int fd)
 	}
 
 	if (dev->extern_ops.pre_msg_handle) {
-		uint32_t need_reply;
-
 		ret = (*dev->extern_ops.pre_msg_handle)(dev->vid,
-				(void *)&msg, &need_reply, &skip_master);
-		if (ret < 0)
+				(void *)&msg, &skip_master);
+		if (ret == VH_RESULT_ERR)
 			goto skip_to_reply;
-
-		if (need_reply)
+		else if (ret == VH_RESULT_REPLY)
 			send_vhost_reply(fd, &msg);
 
 		if (skip_master)
 			goto skip_to_post_handle;
 	}
 
-	switch (msg.request.master) {
-	case VHOST_USER_GET_FEATURES:
-		msg.payload.u64 = vhost_user_get_features(dev);
-		msg.size = sizeof(msg.payload.u64);
-		send_vhost_reply(fd, &msg);
-		break;
-	case VHOST_USER_SET_FEATURES:
-		ret = vhost_user_set_features(dev, msg.payload.u64);
-		if (ret)
-			return -1;
-		break;
-
-	case VHOST_USER_GET_PROTOCOL_FEATURES:
-		vhost_user_get_protocol_features(dev, &msg);
-		send_vhost_reply(fd, &msg);
-		break;
-	case VHOST_USER_SET_PROTOCOL_FEATURES:
-		vhost_user_set_protocol_features(dev, msg.payload.u64);
-		break;
-
-	case VHOST_USER_SET_OWNER:
-		vhost_user_set_owner();
-		break;
-	case VHOST_USER_RESET_OWNER:
-		vhost_user_reset_owner(dev);
-		break;
-
-	case VHOST_USER_SET_MEM_TABLE:
-		ret = vhost_user_set_mem_table(&dev, &msg);
-		break;
-
-	case VHOST_USER_SET_LOG_BASE:
-		vhost_user_set_log_base(dev, &msg);
-
-		/* it needs a reply */
-		msg.size = sizeof(msg.payload.u64);
-		send_vhost_reply(fd, &msg);
-		break;
-	case VHOST_USER_SET_LOG_FD:
-		close(msg.fds[0]);
-		RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n");
-		break;
-
-	case VHOST_USER_SET_VRING_NUM:
-		vhost_user_set_vring_num(dev, &msg);
-		break;
-	case VHOST_USER_SET_VRING_ADDR:
-		vhost_user_set_vring_addr(&dev, &msg);
-		break;
-	case VHOST_USER_SET_VRING_BASE:
-		vhost_user_set_vring_base(dev, &msg);
-		break;
-
-	case VHOST_USER_GET_VRING_BASE:
-		vhost_user_get_vring_base(dev, &msg);
-		msg.size = sizeof(msg.payload.state);
-		send_vhost_reply(fd, &msg);
-		break;
-
-	case VHOST_USER_SET_VRING_KICK:
-		vhost_user_set_vring_kick(&dev, &msg);
-		break;
-	case VHOST_USER_SET_VRING_CALL:
-		vhost_user_set_vring_call(dev, &msg);
-		break;
-
-	case VHOST_USER_SET_VRING_ERR:
-		if (!(msg.payload.u64 & VHOST_USER_VRING_NOFD_MASK))
-			close(msg.fds[0]);
-		RTE_LOG(INFO, VHOST_CONFIG, "not implemented\n");
-		break;
-
-	case VHOST_USER_GET_QUEUE_NUM:
-		msg.payload.u64 = (uint64_t)vhost_user_get_queue_num(dev);
-		msg.size = sizeof(msg.payload.u64);
-		send_vhost_reply(fd, &msg);
-		break;
-
-	case VHOST_USER_SET_VRING_ENABLE:
-		vhost_user_set_vring_enable(dev, &msg);
-		break;
-	case VHOST_USER_SEND_RARP:
-		vhost_user_send_rarp(dev, &msg);
-		break;
-
-	case VHOST_USER_NET_SET_MTU:
-		ret = vhost_user_net_set_mtu(dev, &msg);
-		break;
-
-	case VHOST_USER_SET_SLAVE_REQ_FD:
-		ret = vhost_user_set_req_fd(dev, &msg);
-		break;
-
-	case VHOST_USER_IOTLB_MSG:
-		ret = vhost_user_iotlb_msg(&dev, &msg);
-		break;
+	request = msg.request.master;
+	if (request > VHOST_USER_NONE && request < VHOST_USER_MAX) {
+		if (!vhost_message_handlers[request])
+			goto skip_to_post_handle;
+		ret = vhost_message_handlers[request](&dev, &msg, fd);
 
-	default:
-		ret = -1;
-		break;
+		switch (ret) {
+		case VH_RESULT_ERR:
+			RTE_LOG(ERR, VHOST_CONFIG,
+				"Processing %s failed.\n",
+				vhost_message_str[request]);
+			break;
+		case VH_RESULT_OK:
+			RTE_LOG(DEBUG, VHOST_CONFIG,
+				"Processing %s succeeded.\n",
+				vhost_message_str[request]);
+			break;
+		case VH_RESULT_REPLY:
+			RTE_LOG(DEBUG, VHOST_CONFIG,
+				"Processing %s succeeded and needs reply.\n",
+				vhost_message_str[request]);
+			send_vhost_reply(fd, &msg);
+			break;
+		}
+	} else {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Requested invalid message type %d.\n", request);
+		ret = VH_RESULT_ERR;
 	}
 
 skip_to_post_handle:
-	if (dev->extern_ops.post_msg_handle) {
-		uint32_t need_reply;
-
+	if (ret != VH_RESULT_ERR && dev->extern_ops.post_msg_handle) {
 		ret = (*dev->extern_ops.post_msg_handle)(
-				dev->vid, (void *)&msg, &need_reply);
-		if (ret < 0)
+				dev->vid, (void *)&msg);
+		if (ret == VH_RESULT_ERR)
 			goto skip_to_reply;
-
-		if (need_reply)
+		else if (ret == VH_RESULT_REPLY)
 			send_vhost_reply(fd, &msg);
 	}
 
@@ -1766,10 +1979,20 @@ skip_to_reply:
 	if (unlock_required)
 		vhost_user_unlock_all_queue_pairs(dev);
 
+	/*
+	 * If the request required a reply that was already sent,
+	 * this optional reply-ack won't be sent as the
+	 * VHOST_USER_NEED_REPLY was cleared in send_vhost_reply().
+	 */
 	if (msg.flags & VHOST_USER_NEED_REPLY) {
-		msg.payload.u64 = !!ret;
+		msg.payload.u64 = ret == VH_RESULT_ERR;
 		msg.size = sizeof(msg.payload.u64);
+		msg.fd_num = 0;
 		send_vhost_reply(fd, &msg);
+	} else if (ret == VH_RESULT_ERR) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"vhost message handling failed.\n");
+		return -1;
 	}
 
 	if (!(dev->flags & VIRTIO_DEV_RUNNING) && virtio_is_ready(dev)) {
@@ -1805,9 +2028,9 @@ skip_to_reply:
 }
 
 static int process_slave_message_reply(struct virtio_net *dev,
-				       const VhostUserMsg *msg)
+				       const struct VhostUserMsg *msg)
 {
-	VhostUserMsg msg_reply;
+	struct VhostUserMsg msg_reply;
 	int ret;
 
 	if ((msg->flags & VHOST_USER_NEED_REPLY) == 0)
@@ -1848,7 +2071,7 @@ vhost_user_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm)
 		},
 	};
 
-	ret = send_vhost_message(dev->slave_req_fd, &msg, NULL, 0);
+	ret = send_vhost_message(dev->slave_req_fd, &msg);
 	if (ret < 0) {
 		RTE_LOG(ERR, VHOST_CONFIG,
 				"Failed to send IOTLB miss message (%d)\n",
@@ -1864,8 +2087,6 @@ static int vhost_user_slave_set_vring_host_notifier(struct virtio_net *dev,
 						    uint64_t offset,
 						    uint64_t size)
 {
-	int *fdp = NULL;
-	size_t fd_num = 0;
 	int ret;
 	struct VhostUserMsg msg = {
 		.request.slave = VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG,
@@ -1881,11 +2102,11 @@ static int vhost_user_slave_set_vring_host_notifier(struct virtio_net *dev,
 	if (fd < 0)
 		msg.payload.area.u64 |= VHOST_USER_VRING_NOFD_MASK;
 	else {
-		fdp = &fd;
-		fd_num = 1;
+		msg.fds[0] = fd;
+		msg.fd_num = 1;
 	}
 
-	ret = send_vhost_slave_message(dev, &msg, fdp, fd_num);
+	ret = send_vhost_slave_message(dev, &msg);
 	if (ret < 0) {
 		RTE_LOG(ERR, VHOST_CONFIG,
 			"Failed to set host notifier (%d)\n", ret);
diff --git a/lib/librte_vhost/vhost_user.h b/lib/librte_vhost/vhost_user.h
index 42166adf..dc97be84 100644
--- a/lib/librte_vhost/vhost_user.h
+++ b/lib/librte_vhost/vhost_user.h
@@ -22,7 +22,8 @@
 					 (1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \
 					 (1ULL << VHOST_USER_PROTOCOL_F_CRYPTO_SESSION) | \
 					 (1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD) | \
-					 (1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER))
+					 (1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) | \
+					 (1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT))
 
 typedef enum VhostUserRequest {
 	VHOST_USER_NONE = 0,
@@ -50,7 +51,10 @@ typedef enum VhostUserRequest {
 	VHOST_USER_IOTLB_MSG = 22,
 	VHOST_USER_CRYPTO_CREATE_SESS = 26,
 	VHOST_USER_CRYPTO_CLOSE_SESS = 27,
-	VHOST_USER_MAX = 28
+	VHOST_USER_POSTCOPY_ADVISE = 28,
+	VHOST_USER_POSTCOPY_LISTEN = 29,
+	VHOST_USER_POSTCOPY_END = 30,
+	VHOST_USER_MAX = 31
 } VhostUserRequest;
 
 typedef enum VhostUserSlaveRequest {
@@ -132,6 +136,7 @@ typedef struct VhostUserMsg {
 		VhostUserVringArea area;
 	} payload;
 	int fds[VHOST_MEMORY_MAX_NREGIONS];
+	int fd_num;
 } __attribute((packed)) VhostUserMsg;
 
 #define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
@@ -146,7 +151,8 @@ int vhost_user_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm);
 int vhost_user_host_notifier_ctrl(int vid, bool enable);
 
 /* socket.c */
-int read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num);
+int read_fd_message(int sockfd, char *buf, int buflen, int *fds, int max_fds,
+		int *fd_num);
 int send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num);
 
 #endif
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 99c7afc8..8ad30c94 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -122,7 +122,7 @@ flush_shadow_used_ring_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
 
 static __rte_always_inline void
 update_shadow_used_ring_split(struct vhost_virtqueue *vq,
-			 uint16_t desc_idx, uint16_t len)
+			 uint16_t desc_idx, uint32_t len)
 {
 	uint16_t i = vq->shadow_used_idx++;
 
@@ -186,7 +186,7 @@ flush_shadow_used_ring_packed(struct virtio_net *dev,
 
 static __rte_always_inline void
 update_shadow_used_ring_packed(struct vhost_virtqueue *vq,
-			 uint16_t desc_idx, uint16_t len, uint16_t count)
+			 uint16_t desc_idx, uint32_t len, uint16_t count)
 {
 	uint16_t i = vq->shadow_used_idx++;
 
@@ -329,7 +329,7 @@ static __rte_always_inline int
 fill_vec_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
 			 uint32_t avail_idx, uint16_t *vec_idx,
 			 struct buf_vector *buf_vec, uint16_t *desc_chain_head,
-			 uint16_t *desc_chain_len, uint8_t perm)
+			 uint32_t *desc_chain_len, uint8_t perm)
 {
 	uint16_t idx = vq->avail->ring[avail_idx & (vq->size - 1)];
 	uint16_t vec_id = *vec_idx;
@@ -409,7 +409,7 @@ reserve_avail_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	uint16_t max_tries, tries = 0;
 
 	uint16_t head_idx = 0;
-	uint16_t len = 0;
+	uint32_t len = 0;
 
 	*num_buffers = 0;
 	cur_idx  = vq->last_avail_idx;
@@ -452,7 +452,7 @@ static __rte_always_inline int
 fill_vec_buf_packed_indirect(struct virtio_net *dev,
 			struct vhost_virtqueue *vq,
 			struct vring_packed_desc *desc, uint16_t *vec_idx,
-			struct buf_vector *buf_vec, uint16_t *len, uint8_t perm)
+			struct buf_vector *buf_vec, uint32_t *len, uint8_t perm)
 {
 	uint16_t i;
 	uint32_t nr_descs;
@@ -508,7 +508,7 @@ static __rte_always_inline int
 fill_vec_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
 				uint16_t avail_idx, uint16_t *desc_count,
 				struct buf_vector *buf_vec, uint16_t *vec_idx,
-				uint16_t *buf_id, uint16_t *len, uint8_t perm)
+				uint16_t *buf_id, uint32_t *len, uint8_t perm)
 {
 	bool wrap_counter = vq->avail_wrap_counter;
 	struct vring_packed_desc *descs = vq->desc_packed;
@@ -521,6 +521,7 @@ fill_vec_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		return -1;
 
 	*desc_count = 0;
+	*len = 0;
 
 	while (1) {
 		if (unlikely(vec_id >= BUF_VECTOR_MAX))
@@ -573,7 +574,7 @@ reserve_avail_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	uint16_t max_tries, tries = 0;
 
 	uint16_t buf_id = 0;
-	uint16_t len = 0;
+	uint32_t len = 0;
 	uint16_t desc_count;
 
 	*num_buffers = 0;
@@ -888,6 +889,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 	struct rte_mbuf **pkts, uint32_t count)
 {
 	struct vhost_virtqueue *vq;
+	uint32_t nb_tx = 0;
 
 	VHOST_LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
 	if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
@@ -915,9 +917,9 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 		goto out;
 
 	if (vq_is_packed(dev))
-		count = virtio_dev_rx_packed(dev, vq, pkts, count);
+		nb_tx = virtio_dev_rx_packed(dev, vq, pkts, count);
 	else
-		count = virtio_dev_rx_split(dev, vq, pkts, count);
+		nb_tx = virtio_dev_rx_split(dev, vq, pkts, count);
 
 out:
 	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
@@ -926,7 +928,7 @@ out:
 out_access_unlock:
 	rte_spinlock_unlock(&vq->access_lock);
 
-	return count;
+	return nb_tx;
 }
 
 uint16_t
@@ -1358,8 +1360,10 @@ virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
 			}
 		}
 
-		flush_shadow_used_ring_split(dev, vq);
-		vhost_vring_call_split(dev, vq);
+		if (likely(vq->shadow_used_idx)) {
+			flush_shadow_used_ring_split(dev, vq);
+			vhost_vring_call_split(dev, vq);
+		}
 	}
 
 	rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
@@ -1378,7 +1382,8 @@ virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
 
 	for (i = 0; i < count; i++) {
 		struct buf_vector buf_vec[BUF_VECTOR_MAX];
-		uint16_t head_idx, dummy_len;
+		uint16_t head_idx;
+		uint32_t dummy_len;
 		uint16_t nr_vec = 0;
 		int err;
 
@@ -1437,8 +1442,10 @@ virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		do_data_copy_dequeue(vq);
 		if (unlikely(i < count))
 			vq->shadow_used_idx = i;
-		flush_shadow_used_ring_split(dev, vq);
-		vhost_vring_call_split(dev, vq);
+		if (likely(vq->shadow_used_idx)) {
+			flush_shadow_used_ring_split(dev, vq);
+			vhost_vring_call_split(dev, vq);
+		}
 	}
 
 	return i;
@@ -1473,8 +1480,10 @@ virtio_dev_tx_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
 			}
 		}
 
-		flush_shadow_used_ring_packed(dev, vq);
-		vhost_vring_call_packed(dev, vq);
+		if (likely(vq->shadow_used_idx)) {
+			flush_shadow_used_ring_packed(dev, vq);
+			vhost_vring_call_packed(dev, vq);
+		}
 	}
 
 	VHOST_LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
@@ -1485,7 +1494,8 @@ virtio_dev_tx_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
 
 	for (i = 0; i < count; i++) {
 		struct buf_vector buf_vec[BUF_VECTOR_MAX];
-		uint16_t buf_id, dummy_len;
+		uint16_t buf_id;
+		uint32_t dummy_len;
 		uint16_t desc_count, nr_vec = 0;
 		int err;
 
@@ -1551,8 +1561,10 @@ virtio_dev_tx_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		do_data_copy_dequeue(vq);
 		if (unlikely(i < count))
 			vq->shadow_used_idx = i;
-		flush_shadow_used_ring_packed(dev, vq);
-		vhost_vring_call_packed(dev, vq);
+		if (likely(vq->shadow_used_idx)) {
+			flush_shadow_used_ring_packed(dev, vq);
+			vhost_vring_call_packed(dev, vq);
+		}
 	}
 
 	return i;
diff --git a/lib/meson.build b/lib/meson.build
index eb91f100..bb7f443f 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -9,13 +9,14 @@
 # given as a dep, no need to mention ring. This is especially true for the
 # core libs which are widely reused, so their deps are kept to a minimum.
 libraries = [ 'compat', # just a header, used for versioning
-	'kvargs',
+	'cmdline', # ethdev depends on cmdline for parsing functions
+	'kvargs', # eal depends on kvargs
 	'eal', 'ring', 'mempool', 'mbuf', 'net', 'ethdev', 'pci', # core
 	'metrics', # bitrate/latency stats depends on this
 	'hash',    # efd depends on this
 	'timer',   # eventdev depends on this
 	'acl', 'bbdev', 'bitratestats', 'cfgfile',
-	'cmdline', 'compressdev', 'cryptodev',
+	'compressdev', 'cryptodev',
 	'distributor', 'efd', 'eventdev',
 	'gro', 'gso', 'ip_frag', 'jobstats',
 	'kni', 'latencystats', 'lpm', 'member',
@@ -24,12 +25,18 @@ libraries = [ 'compat', # just a header, used for versioning
 	# add pkt framework libs which use other libs from above
 	'port', 'table', 'pipeline',
 	# flow_classify lib depends on pkt framework table lib
-	'flow_classify', 'bpf']
+	'flow_classify', 'bpf', 'telemetry']
 
 default_cflags = machine_args
 if cc.has_argument('-Wno-format-truncation')
 	default_cflags += '-Wno-format-truncation'
 endif
+
+enabled_libs = [] # used to print summary at the end
+
+# -D_GNU_SOURCE unconditionally
+default_cflags += '-D_GNU_SOURCE'
+
 foreach l:libraries
 	build = true
 	name = l
@@ -45,18 +52,17 @@ foreach l:libraries
 	# use "deps" for internal DPDK dependencies, and "ext_deps" for
 	# external package/library requirements
 	ext_deps = []
-	deps = ['eal']   # eal is standard dependency except for itself
-	if l == 'kvargs'
-		deps = []
-	endif
-	if l == 'eal'
-		deps = ['kvargs']
+	deps = []
+	# eal is standard dependency once built
+	if dpdk_conf.has('RTE_LIBRTE_EAL')
+		deps += ['eal']
 	endif
 
 	dir_name = 'librte_' + l
 	subdir(dir_name)
 
 	if build
+		enabled_libs += name
 		dpdk_conf.set('RTE_LIBRTE_' + name.to_upper(), 1)
 		install_headers(headers)
 
@@ -87,10 +93,8 @@ foreach l:libraries
 				lib_version = '@0@.1'.format(version)
 				so_version = '@0@'.format(version)
 			else
-				prj_ver = meson.project_version().split('.')
-				lib_version = '@0@.@1@'.format(
-						prj_ver.get(0), prj_ver.get(1))
-				so_version = lib_version
+				lib_version = major_version
+				so_version = major_version
 			endif
 
 			# first build static lib
@@ -126,6 +130,7 @@ foreach l:libraries
 					dependencies: shared_deps)
 
 			dpdk_libraries = [shared_lib] + dpdk_libraries
+			dpdk_static_libraries = [static_lib] + dpdk_static_libraries
 		endif # sources.length() > 0
 
 		set_variable('shared_' + libname, shared_dep)
author	Luca Boccassi <luca.boccassi@gmail.com>	2018-11-01 11:59:50 +0000
committer	Luca Boccassi <luca.boccassi@gmail.com>	2018-11-01 12:00:19 +0000
commit	8d01b9cd70a67cdafd5b965a70420c3bd7fb3f82 (patch)
tree	208e3bc33c220854d89d010e3abf720a2e62e546 /lib
parent	b63264c8342e6a1b6971c79550d2af2024b6a4de (diff)