Merge branch 'upstream' into 18.08.x

Change-Id: Ifbda2d554199dd4d11e01f0090881b5f0103ae12 Signed-off-by: Luca Boccassi <luca.boccassi@gmail.com>
author: Luca Boccassi <luca.boccassi@gmail.com> 2018-08-14 18:55:37 +0100
committer: Luca Boccassi <luca.boccassi@gmail.com> 2018-08-14 18:57:39 +0100
commit: ae1479de3027a4a0f68d90bf65ac6ff2b862b837 (patch)
tree: 5dbca675b6717b6b61abbe1a9583d5166fed942b /lib
parent: 8cee230dd1f0f9f31f4b0339c671d0b1ee14e111 (diff)
parent: b63264c8342e6a1b6971c79550d2af2024b6a4de (diff)
178 files changed, 11110 insertions, 4419 deletions
diff --git a/lib/Makefile b/lib/Makefile
index d82462ba..afa604e2 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -4,7 +4,10 @@
 include $(RTE_SDK)/mk/rte.vars.mk
 
 DIRS-y += librte_compat
+DIRS-$(CONFIG_RTE_LIBRTE_KVARGS) += librte_kvargs
+DEPDIRS-librte_kvargs := librte_compat
 DIRS-$(CONFIG_RTE_LIBRTE_EAL) += librte_eal
+DEPDIRS-librte_eal := librte_kvargs
 DIRS-$(CONFIG_RTE_LIBRTE_PCI) += librte_pci
 DEPDIRS-librte_pci := librte_eal
 DIRS-$(CONFIG_RTE_LIBRTE_RING) += librte_ring
@@ -76,8 +79,6 @@ DEPDIRS-librte_flow_classify :=  librte_net librte_table librte_acl
 DIRS-$(CONFIG_RTE_LIBRTE_SCHED) += librte_sched
 DEPDIRS-librte_sched := librte_eal librte_mempool librte_mbuf librte_net
 DEPDIRS-librte_sched += librte_timer
-DIRS-$(CONFIG_RTE_LIBRTE_KVARGS) += librte_kvargs
-DEPDIRS-librte_kvargs := librte_eal
 DIRS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += librte_distributor
 DEPDIRS-librte_distributor := librte_eal librte_mbuf librte_ethdev
 DIRS-$(CONFIG_RTE_LIBRTE_PORT) += librte_port
diff --git a/lib/librte_bbdev/rte_bbdev.c b/lib/librte_bbdev/rte_bbdev.c
index 28434e08..c4cc18d9 100644
--- a/lib/librte_bbdev/rte_bbdev.c
+++ b/lib/librte_bbdev/rte_bbdev.c
@@ -1125,9 +1125,7 @@ rte_bbdev_op_type_str(enum rte_bbdev_op_type op_type)
 	return NULL;
 }
 
-RTE_INIT(rte_bbdev_init_log);
-static void
-rte_bbdev_init_log(void)
+RTE_INIT(rte_bbdev_init_log)
 {
 	bbdev_logtype = rte_log_register("lib.bbdev");
 	if (bbdev_logtype >= 0)
diff --git a/lib/librte_bitratestats/rte_bitrate.c b/lib/librte_bitratestats/rte_bitrate.c
index 964e3c39..c4b28f62 100644
--- a/lib/librte_bitratestats/rte_bitrate.c
+++ b/lib/librte_bitratestats/rte_bitrate.c
@@ -47,6 +47,9 @@ rte_stats_bitrate_reg(struct rte_stats_bitrates *bitrate_data)
 	};
 	int return_value;
 
+	if (bitrate_data == NULL)
+		return -EINVAL;
+
 	return_value = rte_metrics_reg_names(&names[0], ARRAY_SIZE(names));
 	if (return_value >= 0)
 		bitrate_data->id_stats_set = return_value;
@@ -65,6 +68,9 @@ rte_stats_bitrate_calc(struct rte_stats_bitrates *bitrate_data,
 	const int64_t alpha_percent = 20;
 	uint64_t values[6];
 
+	if (bitrate_data == NULL)
+		return -EINVAL;
+
 	ret_code = rte_eth_stats_get(port_id, &eth_stats);
 	if (ret_code != 0)
 		return ret_code;
diff --git a/lib/librte_bpf/bpf.c b/lib/librte_bpf/bpf.c
index dc6d1099..f590c8c3 100644
--- a/lib/librte_bpf/bpf.c
+++ b/lib/librte_bpf/bpf.c
@@ -53,10 +53,7 @@ bpf_jit(struct rte_bpf *bpf)
 	return rc;
 }
 
-RTE_INIT(rte_bpf_init_log);
-
-static void
-rte_bpf_init_log(void)
+RTE_INIT(rte_bpf_init_log)
 {
 	rte_bpf_logtype = rte_log_register("lib.bpf");
 	if (rte_bpf_logtype >= 0)
diff --git a/lib/librte_bpf/bpf_def.h b/lib/librte_bpf/bpf_def.h
index 6b69de34..c10f3aec 100644
--- a/lib/librte_bpf/bpf_def.h
+++ b/lib/librte_bpf/bpf_def.h
@@ -131,6 +131,11 @@ struct ebpf_insn {
 	int32_t imm;
 };
 
+/*
+ * eBPF allows functions with R1-R5 as arguments.
+ */
+#define	EBPF_FUNC_MAX_ARGS	(EBPF_REG_6 - EBPF_REG_1)
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_bpf/bpf_exec.c b/lib/librte_bpf/bpf_exec.c
index e373b1f3..6a79139c 100644
--- a/lib/librte_bpf/bpf_exec.c
+++ b/lib/librte_bpf/bpf_exec.c
@@ -402,7 +402,7 @@ bpf_exec(const struct rte_bpf *bpf, uint64_t reg[EBPF_REG_NUM])
 			break;
 		/* call instructions */
 		case (BPF_JMP | EBPF_CALL):
-			reg[EBPF_REG_0] = bpf->prm.xsym[ins->imm].func(
+			reg[EBPF_REG_0] = bpf->prm.xsym[ins->imm].func.val(
 				reg[EBPF_REG_1], reg[EBPF_REG_2],
 				reg[EBPF_REG_3], reg[EBPF_REG_4],
 				reg[EBPF_REG_5]);
diff --git a/lib/librte_bpf/bpf_impl.h b/lib/librte_bpf/bpf_impl.h
index 5d7e65c3..b577e2cb 100644
--- a/lib/librte_bpf/bpf_impl.h
+++ b/lib/librte_bpf/bpf_impl.h
@@ -34,6 +34,20 @@ extern int rte_bpf_logtype;
 #define	RTE_BPF_LOG(lvl, fmt, args...) \
 	rte_log(RTE_LOG_## lvl, rte_bpf_logtype, fmt, ##args)
 
+static inline size_t
+bpf_size(uint32_t bpf_op_sz)
+{
+	if (bpf_op_sz == BPF_B)
+		return sizeof(uint8_t);
+	else if (bpf_op_sz == BPF_H)
+		return sizeof(uint16_t);
+	else if (bpf_op_sz == BPF_W)
+		return sizeof(uint32_t);
+	else if (bpf_op_sz == EBPF_DW)
+		return sizeof(uint64_t);
+	return 0;
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_bpf/bpf_jit_x86.c b/lib/librte_bpf/bpf_jit_x86.c
index 111e028d..68ea389f 100644
--- a/lib/librte_bpf/bpf_jit_x86.c
+++ b/lib/librte_bpf/bpf_jit_x86.c
@@ -113,20 +113,6 @@ union bpf_jit_imm {
 	uint8_t u8[4];
 };
 
-static size_t
-bpf_size(uint32_t bpf_op_sz)
-{
-	if (bpf_op_sz == BPF_B)
-		return sizeof(uint8_t);
-	else if (bpf_op_sz == BPF_H)
-		return sizeof(uint16_t);
-	else if (bpf_op_sz == BPF_W)
-		return sizeof(uint32_t);
-	else if (bpf_op_sz == EBPF_DW)
-		return sizeof(uint64_t);
-	return 0;
-}
-
 /*
  * In many cases for imm8 we can produce shorter code.
  */
@@ -1294,7 +1280,8 @@ emit(struct bpf_jit_state *st, const struct rte_bpf *bpf)
 			break;
 		/* call instructions */
 		case (BPF_JMP | EBPF_CALL):
-			emit_call(st, (uintptr_t)bpf->prm.xsym[ins->imm].func);
+			emit_call(st,
+				(uintptr_t)bpf->prm.xsym[ins->imm].func.val);
 			break;
 		/* return instruction */
 		case (BPF_JMP | EBPF_EXIT):
diff --git a/lib/librte_bpf/bpf_load.c b/lib/librte_bpf/bpf_load.c
index d1c9abd7..2b84fe72 100644
--- a/lib/librte_bpf/bpf_load.c
+++ b/lib/librte_bpf/bpf_load.c
@@ -51,17 +51,64 @@ bpf_load(const struct rte_bpf_prm *prm)
 	return bpf;
 }
 
+/*
+ * Check that user provided external symbol.
+ */
+static int
+bpf_check_xsym(const struct rte_bpf_xsym *xsym)
+{
+	uint32_t i;
+
+	if (xsym->name == NULL)
+		return -EINVAL;
+
+	if (xsym->type == RTE_BPF_XTYPE_VAR) {
+		if (xsym->var.desc.type == RTE_BPF_ARG_UNDEF)
+			return -EINVAL;
+	} else if (xsym->type == RTE_BPF_XTYPE_FUNC) {
+
+		if (xsym->func.nb_args > EBPF_FUNC_MAX_ARGS)
+			return -EINVAL;
+
+		/* check function arguments */
+		for (i = 0; i != xsym->func.nb_args; i++) {
+			if (xsym->func.args[i].type == RTE_BPF_ARG_UNDEF)
+				return -EINVAL;
+		}
+
+		/* check return value info */
+		if (xsym->func.ret.type != RTE_BPF_ARG_UNDEF &&
+				xsym->func.ret.size == 0)
+			return -EINVAL;
+	} else
+		return -EINVAL;
+
+	return 0;
+}
+
 __rte_experimental struct rte_bpf *
 rte_bpf_load(const struct rte_bpf_prm *prm)
 {
 	struct rte_bpf *bpf;
 	int32_t rc;
+	uint32_t i;
 
-	if (prm == NULL || prm->ins == NULL) {
+	if (prm == NULL || prm->ins == NULL ||
+			(prm->nb_xsym != 0 && prm->xsym == NULL)) {
 		rte_errno = EINVAL;
 		return NULL;
 	}
 
+	rc = 0;
+	for (i = 0; i != prm->nb_xsym && rc == 0; i++)
+		rc = bpf_check_xsym(prm->xsym + i);
+
+	if (rc != 0) {
+		rte_errno = -rc;
+		RTE_BPF_LOG(ERR, "%s: %d-th xsym is invalid\n", __func__, i);
+		return NULL;
+	}
+
 	bpf = bpf_load(prm);
 	if (bpf == NULL) {
 		rte_errno = ENOMEM;
diff --git a/lib/librte_bpf/bpf_load_elf.c b/lib/librte_bpf/bpf_load_elf.c
index 6ab03d86..96d3630f 100644
--- a/lib/librte_bpf/bpf_load_elf.c
+++ b/lib/librte_bpf/bpf_load_elf.c
@@ -81,9 +81,9 @@ resolve_xsym(const char *sn, size_t ofs, struct ebpf_insn *ins, size_t ins_sz,
 		ins[idx].imm = fidx;
 	/* for variable we need to store its absolute address */
 	else {
-		ins[idx].imm = (uintptr_t)prm->xsym[fidx].var;
+		ins[idx].imm = (uintptr_t)prm->xsym[fidx].var.val;
 		ins[idx + 1].imm =
-			(uint64_t)(uintptr_t)prm->xsym[fidx].var >> 32;
+			(uint64_t)(uintptr_t)prm->xsym[fidx].var.val >> 32;
 	}
 
 	return 0;
diff --git a/lib/librte_bpf/bpf_validate.c b/lib/librte_bpf/bpf_validate.c
index b7081c85..83983efc 100644
--- a/lib/librte_bpf/bpf_validate.c
+++ b/lib/librte_bpf/bpf_validate.c
@@ -11,9 +11,28 @@
 
 #include <rte_common.h>
 #include <rte_eal.h>
+#include <rte_byteorder.h>
 
 #include "bpf_impl.h"
 
+struct bpf_reg_val {
+	struct rte_bpf_arg v;
+	uint64_t mask;
+	struct {
+		int64_t min;
+		int64_t max;
+	} s;
+	struct {
+		uint64_t min;
+		uint64_t max;
+	} u;
+};
+
+struct bpf_eval_state {
+	struct bpf_reg_val rv[EBPF_REG_NUM];
+	struct bpf_reg_val sv[MAX_BPF_STACK_SIZE / sizeof(uint64_t)];
+};
+
 /* possible instruction node colour */
 enum {
 	WHITE,
@@ -31,14 +50,6 @@ enum {
 	MAX_EDGE_TYPE
 };
 
-struct bpf_reg_state {
-	uint64_t val;
-};
-
-struct bpf_eval_state {
-	struct bpf_reg_state rs[EBPF_REG_NUM];
-};
-
 #define	MAX_EDGES	2
 
 struct inst_node {
@@ -54,12 +65,13 @@ struct inst_node {
 struct bpf_verifier {
 	const struct rte_bpf_prm *prm;
 	struct inst_node *in;
-	int32_t stack_sz;
+	uint64_t stack_sz;
 	uint32_t nb_nodes;
 	uint32_t nb_jcc_nodes;
 	uint32_t node_colour[MAX_NODE_COLOUR];
 	uint32_t edge_type[MAX_EDGE_TYPE];
 	struct bpf_eval_state *evst;
+	struct inst_node *evin;
 	struct {
 		uint32_t num;
 		uint32_t cur;
@@ -101,40 +113,823 @@ check_alu_bele(const struct ebpf_insn *ins)
 }
 
 static const char *
-eval_stack(struct bpf_verifier *bvf, const struct ebpf_insn *ins)
+eval_exit(struct bpf_verifier *bvf, const struct ebpf_insn *ins)
+{
+	RTE_SET_USED(ins);
+	if (bvf->evst->rv[EBPF_REG_0].v.type == RTE_BPF_ARG_UNDEF)
+		return "undefined return value";
+	return NULL;
+}
+
+/* setup max possible with this mask bounds */
+static void
+eval_umax_bound(struct bpf_reg_val *rv, uint64_t mask)
+{
+	rv->u.max = mask;
+	rv->u.min = 0;
+}
+
+static void
+eval_smax_bound(struct bpf_reg_val *rv, uint64_t mask)
+{
+	rv->s.max = mask >> 1;
+	rv->s.min = rv->s.max ^ UINT64_MAX;
+}
+
+static void
+eval_max_bound(struct bpf_reg_val *rv, uint64_t mask)
+{
+	eval_umax_bound(rv, mask);
+	eval_smax_bound(rv, mask);
+}
+
+static void
+eval_fill_max_bound(struct bpf_reg_val *rv, uint64_t mask)
+{
+	eval_max_bound(rv, mask);
+	rv->v.type = RTE_BPF_ARG_RAW;
+	rv->mask = mask;
+}
+
+static void
+eval_fill_imm64(struct bpf_reg_val *rv, uint64_t mask, uint64_t val)
+{
+	rv->mask = mask;
+	rv->s.min = val;
+	rv->s.max = val;
+	rv->u.min = val;
+	rv->u.max = val;
+}
+
+static void
+eval_fill_imm(struct bpf_reg_val *rv, uint64_t mask, int32_t imm)
+{
+	uint64_t v;
+
+	v = (uint64_t)imm & mask;
+
+	rv->v.type = RTE_BPF_ARG_RAW;
+	eval_fill_imm64(rv, mask, v);
+}
+
+static const char *
+eval_ld_imm64(struct bpf_verifier *bvf, const struct ebpf_insn *ins)
 {
-	int32_t ofs;
+	uint32_t i;
+	uint64_t val;
+	struct bpf_reg_val *rd;
+
+	val = (uint32_t)ins[0].imm | (uint64_t)(uint32_t)ins[1].imm << 32;
 
-	ofs = ins->off;
+	rd = bvf->evst->rv + ins->dst_reg;
+	rd->v.type = RTE_BPF_ARG_RAW;
+	eval_fill_imm64(rd, UINT64_MAX, val);
 
-	if (ofs >= 0 || ofs < -MAX_BPF_STACK_SIZE)
-		return "stack boundary violation";
+	for (i = 0; i != bvf->prm->nb_xsym; i++) {
+
+		/* load of external variable */
+		if (bvf->prm->xsym[i].type == RTE_BPF_XTYPE_VAR &&
+				(uintptr_t)bvf->prm->xsym[i].var.val == val) {
+			rd->v = bvf->prm->xsym[i].var.desc;
+			eval_fill_imm64(rd, UINT64_MAX, 0);
+			break;
+		}
+	}
 
-	ofs = -ofs;
-	bvf->stack_sz = RTE_MAX(bvf->stack_sz, ofs);
 	return NULL;
 }
 
+static void
+eval_apply_mask(struct bpf_reg_val *rv, uint64_t mask)
+{
+	struct bpf_reg_val rt;
+
+	rt.u.min = rv->u.min & mask;
+	rt.u.max = rv->u.max & mask;
+	if (rt.u.min != rv->u.min || rt.u.max != rv->u.max) {
+		rv->u.max = RTE_MAX(rt.u.max, mask);
+		rv->u.min = 0;
+	}
+
+	eval_smax_bound(&rt, mask);
+	rv->s.max = RTE_MIN(rt.s.max, rv->s.max);
+	rv->s.min = RTE_MAX(rt.s.min, rv->s.min);
+
+	rv->mask = mask;
+}
+
+static void
+eval_add(struct bpf_reg_val *rd, const struct bpf_reg_val *rs, uint64_t msk)
+{
+	struct bpf_reg_val rv;
+
+	rv.u.min = (rd->u.min + rs->u.min) & msk;
+	rv.u.max = (rd->u.min + rs->u.max) & msk;
+	rv.s.min = (rd->s.min + rs->s.min) & msk;
+	rv.s.max = (rd->s.max + rs->s.max) & msk;
+
+	/*
+	 * if at least one of the operands is not constant,
+	 * then check for overflow
+	 */
+	if ((rd->u.min != rd->u.max || rs->u.min != rs->u.max) &&
+			(rv.u.min < rd->u.min || rv.u.max < rd->u.max))
+		eval_umax_bound(&rv, msk);
+
+	if ((rd->s.min != rd->s.max || rs->s.min != rs->s.max) &&
+			(((rs->s.min < 0 && rv.s.min > rd->s.min) ||
+			rv.s.min < rd->s.min) ||
+			((rs->s.max < 0 && rv.s.max > rd->s.max) ||
+				rv.s.max < rd->s.max)))
+		eval_smax_bound(&rv, msk);
+
+	rd->s = rv.s;
+	rd->u = rv.u;
+}
+
+static void
+eval_sub(struct bpf_reg_val *rd, const struct bpf_reg_val *rs, uint64_t msk)
+{
+	struct bpf_reg_val rv;
+
+	rv.u.min = (rd->u.min - rs->u.min) & msk;
+	rv.u.max = (rd->u.min - rs->u.max) & msk;
+	rv.s.min = (rd->s.min - rs->s.min) & msk;
+	rv.s.max = (rd->s.max - rs->s.max) & msk;
+
+	/*
+	 * if at least one of the operands is not constant,
+	 * then check for overflow
+	 */
+	if ((rd->u.min != rd->u.max || rs->u.min != rs->u.max) &&
+			(rv.u.min > rd->u.min || rv.u.max > rd->u.max))
+		eval_umax_bound(&rv, msk);
+
+	if ((rd->s.min != rd->s.max || rs->s.min != rs->s.max) &&
+			(((rs->s.min < 0 && rv.s.min < rd->s.min) ||
+			rv.s.min > rd->s.min) ||
+			((rs->s.max < 0 && rv.s.max < rd->s.max) ||
+			rv.s.max > rd->s.max)))
+		eval_smax_bound(&rv, msk);
+
+	rd->s = rv.s;
+	rd->u = rv.u;
+}
+
+static void
+eval_lsh(struct bpf_reg_val *rd, const struct bpf_reg_val *rs, size_t opsz,
+	uint64_t msk)
+{
+	/* check if shift value is less then max result bits */
+	if (rs->u.max >= opsz) {
+		eval_max_bound(rd, msk);
+		return;
+	}
+
+	/* check for overflow */
+	if (rd->u.max > RTE_LEN2MASK(opsz - rs->u.max, uint64_t))
+		eval_umax_bound(rd, msk);
+	else {
+		rd->u.max <<= rs->u.max;
+		rd->u.min <<= rs->u.min;
+	}
+
+	/* check that dreg values are and would remain always positive */
+	if ((uint64_t)rd->s.min >> (opsz - 1) != 0 || rd->s.max >=
+			RTE_LEN2MASK(opsz - rs->u.max - 1, int64_t))
+		eval_smax_bound(rd, msk);
+	else {
+		rd->s.max <<= rs->u.max;
+		rd->s.min <<= rs->u.min;
+	}
+}
+
+static void
+eval_rsh(struct bpf_reg_val *rd, const struct bpf_reg_val *rs, size_t opsz,
+	uint64_t msk)
+{
+	/* check if shift value is less then max result bits */
+	if (rs->u.max >= opsz) {
+		eval_max_bound(rd, msk);
+		return;
+	}
+
+	rd->u.max >>= rs->u.min;
+	rd->u.min >>= rs->u.max;
+
+	/* check that dreg values are always positive */
+	if ((uint64_t)rd->s.min >> (opsz - 1) != 0)
+		eval_smax_bound(rd, msk);
+	else {
+		rd->s.max >>= rs->u.min;
+		rd->s.min >>= rs->u.max;
+	}
+}
+
+static void
+eval_arsh(struct bpf_reg_val *rd, const struct bpf_reg_val *rs, size_t opsz,
+	uint64_t msk)
+{
+	uint32_t shv;
+
+	/* check if shift value is less then max result bits */
+	if (rs->u.max >= opsz) {
+		eval_max_bound(rd, msk);
+		return;
+	}
+
+	rd->u.max = (int64_t)rd->u.max >> rs->u.min;
+	rd->u.min = (int64_t)rd->u.min >> rs->u.max;
+
+	/* if we have 32-bit values - extend them to 64-bit */
+	if (opsz == sizeof(uint32_t) * CHAR_BIT) {
+		rd->s.min <<= opsz;
+		rd->s.max <<= opsz;
+		shv = opsz;
+	} else
+		shv = 0;
+
+	if (rd->s.min < 0)
+		rd->s.min = (rd->s.min >> (rs->u.min + shv)) & msk;
+	else
+		rd->s.min = (rd->s.min >> (rs->u.max + shv)) & msk;
+
+	if (rd->s.max < 0)
+		rd->s.max = (rd->s.max >> (rs->u.max + shv)) & msk;
+	else
+		rd->s.max = (rd->s.max >> (rs->u.min + shv)) & msk;
+}
+
+static uint64_t
+eval_umax_bits(uint64_t v, size_t opsz)
+{
+	if (v == 0)
+		return 0;
+
+	v = __builtin_clzll(v);
+	return RTE_LEN2MASK(opsz - v, uint64_t);
+}
+
+/* estimate max possible value for (v1 & v2) */
+static uint64_t
+eval_uand_max(uint64_t v1, uint64_t v2, size_t opsz)
+{
+	v1 = eval_umax_bits(v1, opsz);
+	v2 = eval_umax_bits(v2, opsz);
+	return (v1 & v2);
+}
+
+/* estimate max possible value for (v1 | v2) */
+static uint64_t
+eval_uor_max(uint64_t v1, uint64_t v2, size_t opsz)
+{
+	v1 = eval_umax_bits(v1, opsz);
+	v2 = eval_umax_bits(v2, opsz);
+	return (v1 | v2);
+}
+
+static void
+eval_and(struct bpf_reg_val *rd, const struct bpf_reg_val *rs, size_t opsz,
+	uint64_t msk)
+{
+	/* both operands are constants */
+	if (rd->u.min == rd->u.max && rs->u.min == rs->u.max) {
+		rd->u.min &= rs->u.min;
+		rd->u.max &= rs->u.max;
+	} else {
+		rd->u.max = eval_uand_max(rd->u.max, rs->u.max, opsz);
+		rd->u.min &= rs->u.min;
+	}
+
+	/* both operands are constants */
+	if (rd->s.min == rd->s.max && rs->s.min == rs->s.max) {
+		rd->s.min &= rs->s.min;
+		rd->s.max &= rs->s.max;
+	/* at least one of operand is non-negative */
+	} else if (rd->s.min >= 0 || rs->s.min >= 0) {
+		rd->s.max = eval_uand_max(rd->s.max & (msk >> 1),
+			rs->s.max & (msk >> 1), opsz);
+		rd->s.min &= rs->s.min;
+	} else
+		eval_smax_bound(rd, msk);
+}
+
+static void
+eval_or(struct bpf_reg_val *rd, const struct bpf_reg_val *rs, size_t opsz,
+	uint64_t msk)
+{
+	/* both operands are constants */
+	if (rd->u.min == rd->u.max && rs->u.min == rs->u.max) {
+		rd->u.min |= rs->u.min;
+		rd->u.max |= rs->u.max;
+	} else {
+		rd->u.max = eval_uor_max(rd->u.max, rs->u.max, opsz);
+		rd->u.min |= rs->u.min;
+	}
+
+	/* both operands are constants */
+	if (rd->s.min == rd->s.max && rs->s.min == rs->s.max) {
+		rd->s.min |= rs->s.min;
+		rd->s.max |= rs->s.max;
+
+	/* both operands are non-negative */
+	} else if (rd->s.min >= 0 || rs->s.min >= 0) {
+		rd->s.max = eval_uor_max(rd->s.max, rs->s.max, opsz);
+		rd->s.min |= rs->s.min;
+	} else
+		eval_smax_bound(rd, msk);
+}
+
+static void
+eval_xor(struct bpf_reg_val *rd, const struct bpf_reg_val *rs, size_t opsz,
+	uint64_t msk)
+{
+	/* both operands are constants */
+	if (rd->u.min == rd->u.max && rs->u.min == rs->u.max) {
+		rd->u.min ^= rs->u.min;
+		rd->u.max ^= rs->u.max;
+	} else {
+		rd->u.max = eval_uor_max(rd->u.max, rs->u.max, opsz);
+		rd->u.min = 0;
+	}
+
+	/* both operands are constants */
+	if (rd->s.min == rd->s.max && rs->s.min == rs->s.max) {
+		rd->s.min ^= rs->s.min;
+		rd->s.max ^= rs->s.max;
+
+	/* both operands are non-negative */
+	} else if (rd->s.min >= 0 || rs->s.min >= 0) {
+		rd->s.max = eval_uor_max(rd->s.max, rs->s.max, opsz);
+		rd->s.min = 0;
+	} else
+		eval_smax_bound(rd, msk);
+}
+
+static void
+eval_mul(struct bpf_reg_val *rd, const struct bpf_reg_val *rs, size_t opsz,
+	uint64_t msk)
+{
+	/* both operands are constants */
+	if (rd->u.min == rd->u.max && rs->u.min == rs->u.max) {
+		rd->u.min = (rd->u.min * rs->u.min) & msk;
+		rd->u.max = (rd->u.max * rs->u.max) & msk;
+	/* check for overflow */
+	} else if (rd->u.max <= msk >> opsz / 2 && rs->u.max <= msk >> opsz) {
+		rd->u.max *= rs->u.max;
+		rd->u.min *= rd->u.min;
+	} else
+		eval_umax_bound(rd, msk);
+
+	/* both operands are constants */
+	if (rd->s.min == rd->s.max && rs->s.min == rs->s.max) {
+		rd->s.min = (rd->s.min * rs->s.min) & msk;
+		rd->s.max = (rd->s.max * rs->s.max) & msk;
+	/* check that both operands are positive and no overflow */
+	} else if (rd->s.min >= 0 && rs->s.min >= 0) {
+		rd->s.max *= rs->s.max;
+		rd->s.min *= rd->s.min;
+	} else
+		eval_smax_bound(rd, msk);
+}
+
 static const char *
-eval_store(struct bpf_verifier *bvf, const struct ebpf_insn *ins)
+eval_divmod(uint32_t op, struct bpf_reg_val *rd, struct bpf_reg_val *rs,
+	size_t opsz, uint64_t msk)
 {
-	if (ins->dst_reg == EBPF_REG_10)
-		return eval_stack(bvf, ins);
+	/* both operands are constants */
+	if (rd->u.min == rd->u.max && rs->u.min == rs->u.max) {
+		if (rs->u.max == 0)
+			return "division by 0";
+		if (op == BPF_DIV) {
+			rd->u.min /= rs->u.min;
+			rd->u.max /= rs->u.max;
+		} else {
+			rd->u.min %= rs->u.min;
+			rd->u.max %= rs->u.max;
+		}
+	} else {
+		if (op == BPF_MOD)
+			rd->u.max = RTE_MIN(rd->u.max, rs->u.max - 1);
+		else
+			rd->u.max = rd->u.max;
+		rd->u.min = 0;
+	}
+
+	/* if we have 32-bit values - extend them to 64-bit */
+	if (opsz == sizeof(uint32_t) * CHAR_BIT) {
+		rd->s.min = (int32_t)rd->s.min;
+		rd->s.max = (int32_t)rd->s.max;
+		rs->s.min = (int32_t)rs->s.min;
+		rs->s.max = (int32_t)rs->s.max;
+	}
+
+	/* both operands are constants */
+	if (rd->s.min == rd->s.max && rs->s.min == rs->s.max) {
+		if (rs->s.max == 0)
+			return "division by 0";
+		if (op == BPF_DIV) {
+			rd->s.min /= rs->s.min;
+			rd->s.max /= rs->s.max;
+		} else {
+			rd->s.min %= rs->s.min;
+			rd->s.max %= rs->s.max;
+		}
+	} else if (op == BPF_MOD) {
+		rd->s.min = RTE_MAX(rd->s.max, 0);
+		rd->s.min = RTE_MIN(rd->s.min, 0);
+	} else
+		eval_smax_bound(rd, msk);
+
+	rd->s.max &= msk;
+	rd->s.min &= msk;
+
 	return NULL;
 }
 
+static void
+eval_neg(struct bpf_reg_val *rd, size_t opsz, uint64_t msk)
+{
+	uint64_t ux, uy;
+	int64_t sx, sy;
+
+	/* if we have 32-bit values - extend them to 64-bit */
+	if (opsz == sizeof(uint32_t) * CHAR_BIT) {
+		rd->u.min = (int32_t)rd->u.min;
+		rd->u.max = (int32_t)rd->u.max;
+	}
+
+	ux = -(int64_t)rd->u.min & msk;
+	uy = -(int64_t)rd->u.max & msk;
+
+	rd->u.max = RTE_MAX(ux, uy);
+	rd->u.min = RTE_MIN(ux, uy);
+
+	/* if we have 32-bit values - extend them to 64-bit */
+	if (opsz == sizeof(uint32_t) * CHAR_BIT) {
+		rd->s.min = (int32_t)rd->s.min;
+		rd->s.max = (int32_t)rd->s.max;
+	}
+
+	sx = -rd->s.min & msk;
+	sy = -rd->s.max & msk;
+
+	rd->s.max = RTE_MAX(sx, sy);
+	rd->s.min = RTE_MIN(sx, sy);
+}
+
+/*
+ * check that destination and source operand are in defined state.
+ */
+static const char *
+eval_defined(const struct bpf_reg_val *dst, const struct bpf_reg_val *src)
+{
+	if (dst != NULL && dst->v.type == RTE_BPF_ARG_UNDEF)
+		return "dest reg value is undefined";
+	if (src != NULL && src->v.type == RTE_BPF_ARG_UNDEF)
+		return "src reg value is undefined";
+	return NULL;
+}
+
+static const char *
+eval_alu(struct bpf_verifier *bvf, const struct ebpf_insn *ins)
+{
+	uint64_t msk;
+	uint32_t op;
+	size_t opsz;
+	const char *err;
+	struct bpf_eval_state *st;
+	struct bpf_reg_val *rd, rs;
+
+	opsz = (BPF_CLASS(ins->code) == BPF_ALU) ?
+		sizeof(uint32_t) : sizeof(uint64_t);
+	opsz = opsz * CHAR_BIT;
+	msk = RTE_LEN2MASK(opsz, uint64_t);
+
+	st = bvf->evst;
+	rd = st->rv + ins->dst_reg;
+
+	if (BPF_SRC(ins->code) == BPF_X) {
+		rs = st->rv[ins->src_reg];
+		eval_apply_mask(&rs, msk);
+	} else
+		eval_fill_imm(&rs, msk, ins->imm);
+
+	eval_apply_mask(rd, msk);
+
+	op = BPF_OP(ins->code);
+
+	err = eval_defined((op != EBPF_MOV) ? rd : NULL,
+			(op != BPF_NEG) ? &rs : NULL);
+	if (err != NULL)
+		return err;
+
+	if (op == BPF_ADD)
+		eval_add(rd, &rs, msk);
+	else if (op == BPF_SUB)
+		eval_sub(rd, &rs, msk);
+	else if (op == BPF_LSH)
+		eval_lsh(rd, &rs, opsz, msk);
+	else if (op == BPF_RSH)
+		eval_rsh(rd, &rs, opsz, msk);
+	else if (op == EBPF_ARSH)
+		eval_arsh(rd, &rs, opsz, msk);
+	else if (op == BPF_AND)
+		eval_and(rd, &rs, opsz, msk);
+	else if (op == BPF_OR)
+		eval_or(rd, &rs, opsz, msk);
+	else if (op == BPF_XOR)
+		eval_xor(rd, &rs, opsz, msk);
+	else if (op == BPF_MUL)
+		eval_mul(rd, &rs, opsz, msk);
+	else if (op == BPF_DIV || op == BPF_MOD)
+		err = eval_divmod(op, rd, &rs, opsz, msk);
+	else if (op == BPF_NEG)
+		eval_neg(rd, opsz, msk);
+	else if (op == EBPF_MOV)
+		*rd = rs;
+	else
+		eval_max_bound(rd, msk);
+
+	return err;
+}
+
+static const char *
+eval_bele(struct bpf_verifier *bvf, const struct ebpf_insn *ins)
+{
+	uint64_t msk;
+	struct bpf_eval_state *st;
+	struct bpf_reg_val *rd;
+	const char *err;
+
+	msk = RTE_LEN2MASK(ins->imm, uint64_t);
+
+	st = bvf->evst;
+	rd = st->rv + ins->dst_reg;
+
+	err = eval_defined(rd, NULL);
+	if (err != NULL)
+		return err;
+
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+	if (ins->code == (BPF_ALU | EBPF_END | EBPF_TO_BE))
+		eval_max_bound(rd, msk);
+	else
+		eval_apply_mask(rd, msk);
+#else
+	if (ins->code == (BPF_ALU | EBPF_END | EBPF_TO_LE))
+		eval_max_bound(rd, msk);
+	else
+		eval_apply_mask(rd, msk);
+#endif
+
+	return NULL;
+}
+
+static const char *
+eval_ptr(struct bpf_verifier *bvf, struct bpf_reg_val *rm, uint32_t opsz,
+	uint32_t align, int16_t off)
+{
+	struct bpf_reg_val rv;
+
+	/* calculate reg + offset */
+	eval_fill_imm(&rv, rm->mask, off);
+	eval_add(rm, &rv, rm->mask);
+
+	if (RTE_BPF_ARG_PTR_TYPE(rm->v.type) == 0)
+		return "destination is not a pointer";
+
+	if (rm->mask != UINT64_MAX)
+		return "pointer truncation";
+
+	if (rm->u.max + opsz > rm->v.size ||
+			(uint64_t)rm->s.max + opsz > rm->v.size ||
+			rm->s.min < 0)
+		return "memory boundary violation";
+
+	if (rm->u.max % align !=  0)
+		return "unaligned memory access";
+
+	if (rm->v.type == RTE_BPF_ARG_PTR_STACK) {
+
+		if (rm->u.max != rm->u.min || rm->s.max != rm->s.min ||
+				rm->u.max != (uint64_t)rm->s.max)
+			return "stack access with variable offset";
+
+		bvf->stack_sz = RTE_MAX(bvf->stack_sz, rm->v.size - rm->u.max);
+
+	/* pointer to mbuf */
+	} else if (rm->v.type == RTE_BPF_ARG_PTR_MBUF) {
+
+		if (rm->u.max != rm->u.min || rm->s.max != rm->s.min ||
+				rm->u.max != (uint64_t)rm->s.max)
+			return "mbuf access with variable offset";
+	}
+
+	return NULL;
+}
+
+static void
+eval_max_load(struct bpf_reg_val *rv, uint64_t mask)
+{
+	eval_umax_bound(rv, mask);
+
+	/* full 64-bit load */
+	if (mask == UINT64_MAX)
+		eval_smax_bound(rv, mask);
+
+	/* zero-extend load */
+	rv->s.min = rv->u.min;
+	rv->s.max = rv->u.max;
+}
+
+
 static const char *
 eval_load(struct bpf_verifier *bvf, const struct ebpf_insn *ins)
 {
-	if (ins->src_reg == EBPF_REG_10)
-		return eval_stack(bvf, ins);
+	uint32_t opsz;
+	uint64_t msk;
+	const char *err;
+	struct bpf_eval_state *st;
+	struct bpf_reg_val *rd, rs;
+	const struct bpf_reg_val *sv;
+
+	st = bvf->evst;
+	rd = st->rv + ins->dst_reg;
+	rs = st->rv[ins->src_reg];
+	opsz = bpf_size(BPF_SIZE(ins->code));
+	msk = RTE_LEN2MASK(opsz * CHAR_BIT, uint64_t);
+
+	err = eval_ptr(bvf, &rs, opsz, 1, ins->off);
+	if (err != NULL)
+		return err;
+
+	if (rs.v.type == RTE_BPF_ARG_PTR_STACK) {
+
+		sv = st->sv + rs.u.max / sizeof(uint64_t);
+		if (sv->v.type == RTE_BPF_ARG_UNDEF || sv->mask < msk)
+			return "undefined value on the stack";
+
+		*rd = *sv;
+
+	/* pointer to mbuf */
+	} else if (rs.v.type == RTE_BPF_ARG_PTR_MBUF) {
+
+		if (rs.u.max == offsetof(struct rte_mbuf, next)) {
+			eval_fill_imm(rd, msk, 0);
+			rd->v = rs.v;
+		} else if (rs.u.max == offsetof(struct rte_mbuf, buf_addr)) {
+			eval_fill_imm(rd, msk, 0);
+			rd->v.type = RTE_BPF_ARG_PTR;
+			rd->v.size = rs.v.buf_size;
+		} else if (rs.u.max == offsetof(struct rte_mbuf, data_off)) {
+			eval_fill_imm(rd, msk, RTE_PKTMBUF_HEADROOM);
+			rd->v.type = RTE_BPF_ARG_RAW;
+		} else {
+			eval_max_load(rd, msk);
+			rd->v.type = RTE_BPF_ARG_RAW;
+		}
+
+	/* pointer to raw data */
+	} else {
+		eval_max_load(rd, msk);
+		rd->v.type = RTE_BPF_ARG_RAW;
+	}
+
 	return NULL;
 }
 
 static const char *
+eval_mbuf_store(const struct bpf_reg_val *rv, uint32_t opsz)
+{
+	uint32_t i;
+
+	static const struct {
+		size_t off;
+		size_t sz;
+	} mbuf_ro_fileds[] = {
+		{ .off = offsetof(struct rte_mbuf, buf_addr), },
+		{ .off = offsetof(struct rte_mbuf, refcnt), },
+		{ .off = offsetof(struct rte_mbuf, nb_segs), },
+		{ .off = offsetof(struct rte_mbuf, buf_len), },
+		{ .off = offsetof(struct rte_mbuf, pool), },
+		{ .off = offsetof(struct rte_mbuf, next), },
+		{ .off = offsetof(struct rte_mbuf, priv_size), },
+	};
+
+	for (i = 0; i != RTE_DIM(mbuf_ro_fileds) &&
+			(mbuf_ro_fileds[i].off + mbuf_ro_fileds[i].sz <=
+			rv->u.max || rv->u.max + opsz <= mbuf_ro_fileds[i].off);
+			i++)
+		;
+
+	if (i != RTE_DIM(mbuf_ro_fileds))
+		return "store to the read-only mbuf field";
+
+	return NULL;
+
+}
+
+static const char *
+eval_store(struct bpf_verifier *bvf, const struct ebpf_insn *ins)
+{
+	uint32_t opsz;
+	uint64_t msk;
+	const char *err;
+	struct bpf_eval_state *st;
+	struct bpf_reg_val rd, rs, *sv;
+
+	opsz = bpf_size(BPF_SIZE(ins->code));
+	msk = RTE_LEN2MASK(opsz * CHAR_BIT, uint64_t);
+
+	st = bvf->evst;
+	rd = st->rv[ins->dst_reg];
+
+	if (BPF_CLASS(ins->code) == BPF_STX) {
+		rs = st->rv[ins->src_reg];
+		eval_apply_mask(&rs, msk);
+	} else
+		eval_fill_imm(&rs, msk, ins->imm);
+
+	err = eval_defined(NULL, &rs);
+	if (err != NULL)
+		return err;
+
+	err = eval_ptr(bvf, &rd, opsz, 1, ins->off);
+	if (err != NULL)
+		return err;
+
+	if (rd.v.type == RTE_BPF_ARG_PTR_STACK) {
+
+		sv = st->sv + rd.u.max / sizeof(uint64_t);
+		if (BPF_CLASS(ins->code) == BPF_STX &&
+				BPF_MODE(ins->code) == EBPF_XADD)
+			eval_max_bound(sv, msk);
+		else
+			*sv = rs;
+
+	/* pointer to mbuf */
+	} else if (rd.v.type == RTE_BPF_ARG_PTR_MBUF) {
+		err = eval_mbuf_store(&rd, opsz);
+		if (err != NULL)
+			return err;
+	}
+
+	return NULL;
+}
+
+static const char *
+eval_func_arg(struct bpf_verifier *bvf, const struct rte_bpf_arg *arg,
+	struct bpf_reg_val *rv)
+{
+	uint32_t i, n;
+	struct bpf_eval_state *st;
+	const char *err;
+
+	st = bvf->evst;
+
+	if (rv->v.type == RTE_BPF_ARG_UNDEF)
+		return "Undefined argument type";
+
+	if (arg->type != rv->v.type &&
+			arg->type != RTE_BPF_ARG_RAW &&
+			(arg->type != RTE_BPF_ARG_PTR ||
+			RTE_BPF_ARG_PTR_TYPE(rv->v.type) == 0))
+		return "Invalid argument type";
+
+	err = NULL;
+
+	/* argument is a pointer */
+	if (RTE_BPF_ARG_PTR_TYPE(arg->type) != 0) {
+
+		err = eval_ptr(bvf, rv, arg->size, 1, 0);
+
+		/*
+		 * pointer to the variable on the stack is passed
+		 * as an argument, mark stack space it occupies as initialized.
+		 */
+		if (err == NULL && rv->v.type == RTE_BPF_ARG_PTR_STACK) {
+
+			i = rv->u.max / sizeof(uint64_t);
+			n = i + arg->size / sizeof(uint64_t);
+			while (i != n) {
+				eval_fill_max_bound(st->sv + i, UINT64_MAX);
+				i++;
+			};
+		}
+	}
+
+	return err;
+}
+
+static const char *
 eval_call(struct bpf_verifier *bvf, const struct ebpf_insn *ins)
 {
-	uint32_t idx;
+	uint64_t msk;
+	uint32_t i, idx;
+	struct bpf_reg_val *rv;
+	const struct rte_bpf_xsym *xsym;
+	const char *err;
 
 	idx = ins->imm;
 
@@ -145,6 +940,144 @@ eval_call(struct bpf_verifier *bvf, const struct ebpf_insn *ins)
 	/* for now don't support function calls on 32 bit platform */
 	if (sizeof(uint64_t) != sizeof(uintptr_t))
 		return "function calls are supported only for 64 bit apps";
+
+	xsym = bvf->prm->xsym + idx;
+
+	/* evaluate function arguments */
+	err = NULL;
+	for (i = 0; i != xsym->func.nb_args && err == NULL; i++) {
+		err = eval_func_arg(bvf, xsym->func.args + i,
+			bvf->evst->rv + EBPF_REG_1 + i);
+	}
+
+	/* R1-R5 argument/scratch registers */
+	for (i = EBPF_REG_1; i != EBPF_REG_6; i++)
+		bvf->evst->rv[i].v.type = RTE_BPF_ARG_UNDEF;
+
+	/* update return value */
+
+	rv = bvf->evst->rv + EBPF_REG_0;
+	rv->v = xsym->func.ret;
+	msk = (rv->v.type == RTE_BPF_ARG_RAW) ?
+		RTE_LEN2MASK(rv->v.size * CHAR_BIT, uint64_t) : UINTPTR_MAX;
+	eval_max_bound(rv, msk);
+	rv->mask = msk;
+
+	return err;
+}
+
+static void
+eval_jeq_jne(struct bpf_reg_val *trd, struct bpf_reg_val *trs)
+{
+	/* sreg is constant */
+	if (trs->u.min == trs->u.max) {
+		trd->u = trs->u;
+	/* dreg is constant */
+	} else if (trd->u.min == trd->u.max) {
+		trs->u = trd->u;
+	} else {
+		trd->u.max = RTE_MIN(trd->u.max, trs->u.max);
+		trd->u.min = RTE_MAX(trd->u.min, trs->u.min);
+		trs->u = trd->u;
+	}
+
+	/* sreg is constant */
+	if (trs->s.min == trs->s.max) {
+		trd->s = trs->s;
+	/* dreg is constant */
+	} else if (trd->s.min == trd->s.max) {
+		trs->s = trd->s;
+	} else {
+		trd->s.max = RTE_MIN(trd->s.max, trs->s.max);
+		trd->s.min = RTE_MAX(trd->s.min, trs->s.min);
+		trs->s = trd->s;
+	}
+}
+
+static void
+eval_jgt_jle(struct bpf_reg_val *trd, struct bpf_reg_val *trs,
+	struct bpf_reg_val *frd, struct bpf_reg_val *frs)
+{
+	frd->u.max = RTE_MIN(frd->u.max, frs->u.min);
+	trd->u.min = RTE_MAX(trd->u.min, trs->u.min + 1);
+}
+
+static void
+eval_jlt_jge(struct bpf_reg_val *trd, struct bpf_reg_val *trs,
+	struct bpf_reg_val *frd, struct bpf_reg_val *frs)
+{
+	frd->u.min = RTE_MAX(frd->u.min, frs->u.min);
+	trd->u.max = RTE_MIN(trd->u.max, trs->u.max - 1);
+}
+
+static void
+eval_jsgt_jsle(struct bpf_reg_val *trd, struct bpf_reg_val *trs,
+	struct bpf_reg_val *frd, struct bpf_reg_val *frs)
+{
+	frd->s.max = RTE_MIN(frd->s.max, frs->s.min);
+	trd->s.min = RTE_MAX(trd->s.min, trs->s.min + 1);
+}
+
+static void
+eval_jslt_jsge(struct bpf_reg_val *trd, struct bpf_reg_val *trs,
+	struct bpf_reg_val *frd, struct bpf_reg_val *frs)
+{
+	frd->s.min = RTE_MAX(frd->s.min, frs->s.min);
+	trd->s.max = RTE_MIN(trd->s.max, trs->s.max - 1);
+}
+
+static const char *
+eval_jcc(struct bpf_verifier *bvf, const struct ebpf_insn *ins)
+{
+	uint32_t op;
+	const char *err;
+	struct bpf_eval_state *fst, *tst;
+	struct bpf_reg_val *frd, *frs, *trd, *trs;
+	struct bpf_reg_val rvf, rvt;
+
+	tst = bvf->evst;
+	fst = bvf->evin->evst;
+
+	frd = fst->rv + ins->dst_reg;
+	trd = tst->rv + ins->dst_reg;
+
+	if (BPF_SRC(ins->code) == BPF_X) {
+		frs = fst->rv + ins->src_reg;
+		trs = tst->rv + ins->src_reg;
+	} else {
+		frs = &rvf;
+		trs = &rvt;
+		eval_fill_imm(frs, UINT64_MAX, ins->imm);
+		eval_fill_imm(trs, UINT64_MAX, ins->imm);
+	}
+
+	err = eval_defined(trd, trs);
+	if (err != NULL)
+		return err;
+
+	op = BPF_OP(ins->code);
+
+	if (op == BPF_JEQ)
+		eval_jeq_jne(trd, trs);
+	else if (op == EBPF_JNE)
+		eval_jeq_jne(frd, frs);
+	else if (op == BPF_JGT)
+		eval_jgt_jle(trd, trs, frd, frs);
+	else if (op == EBPF_JLE)
+		eval_jgt_jle(frd, frs, trd, trs);
+	else if (op == EBPF_JLT)
+		eval_jlt_jge(trd, trs, frd, frs);
+	else if (op == BPF_JGE)
+		eval_jlt_jge(frd, frs, trd, trs);
+	else if (op == EBPF_JSGT)
+		eval_jsgt_jsle(trd, trs, frd, frs);
+	else if (op == EBPF_JSLE)
+		eval_jsgt_jsle(frd, frs, trd, trs);
+	else if (op == EBPF_JLT)
+		eval_jslt_jsge(trd, trs, frd, frs);
+	else if (op == EBPF_JSGE)
+		eval_jslt_jsge(frd, frs, trd, trs);
+
 	return NULL;
 }
 
@@ -157,256 +1090,306 @@ static const struct bpf_ins_check ins_chk[UINT8_MAX] = {
 		.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
 		.off = { .min = 0, .max = 0},
 		.imm = { .min = 0, .max = UINT32_MAX,},
+		.eval = eval_alu,
 	},
 	[(BPF_ALU | BPF_SUB | BPF_K)] = {
 		.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
 		.off = { .min = 0, .max = 0},
 		.imm = { .min = 0, .max = UINT32_MAX,},
+		.eval = eval_alu,
 	},
 	[(BPF_ALU | BPF_AND | BPF_K)] = {
 		.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
 		.off = { .min = 0, .max = 0},
 		.imm = { .min = 0, .max = UINT32_MAX,},
+		.eval = eval_alu,
 	},
 	[(BPF_ALU | BPF_OR | BPF_K)] = {
 		.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
 		.off = { .min = 0, .max = 0},
 		.imm = { .min = 0, .max = UINT32_MAX,},
+		.eval = eval_alu,
 	},
 	[(BPF_ALU | BPF_LSH | BPF_K)] = {
 		.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
 		.off = { .min = 0, .max = 0},
 		.imm = { .min = 0, .max = UINT32_MAX,},
+		.eval = eval_alu,
 	},
 	[(BPF_ALU | BPF_RSH | BPF_K)] = {
 		.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
 		.off = { .min = 0, .max = 0},
 		.imm = { .min = 0, .max = UINT32_MAX,},
+		.eval = eval_alu,
 	},
 	[(BPF_ALU | BPF_XOR | BPF_K)] = {
 		.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
 		.off = { .min = 0, .max = 0},
 		.imm = { .min = 0, .max = UINT32_MAX,},
+		.eval = eval_alu,
 	},
 	[(BPF_ALU | BPF_MUL | BPF_K)] = {
 		.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
 		.off = { .min = 0, .max = 0},
 		.imm = { .min = 0, .max = UINT32_MAX,},
+		.eval = eval_alu,
 	},
 	[(BPF_ALU | EBPF_MOV | BPF_K)] = {
 		.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
 		.off = { .min = 0, .max = 0},
 		.imm = { .min = 0, .max = UINT32_MAX,},
+		.eval = eval_alu,
 	},
 	[(BPF_ALU | BPF_DIV | BPF_K)] = {
 		.mask = { .dreg = WRT_REGS, .sreg = ZERO_REG},
 		.off = { .min = 0, .max = 0},
 		.imm = { .min = 1, .max = UINT32_MAX},
+		.eval = eval_alu,
 	},
 	[(BPF_ALU | BPF_MOD | BPF_K)] = {
 		.mask = { .dreg = WRT_REGS, .sreg = ZERO_REG},
 		.off = { .min = 0, .max = 0},
 		.imm = { .min = 1, .max = UINT32_MAX},
+		.eval = eval_alu,
 	},
 	/* ALU IMM 64-bit instructions */
 	[(EBPF_ALU64 | BPF_ADD | BPF_K)] = {
 		.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
 		.off = { .min = 0, .max = 0},
 		.imm = { .min = 0, .max = UINT32_MAX,},
+		.eval = eval_alu,
 	},
 	[(EBPF_ALU64 | BPF_SUB | BPF_K)] = {
 		.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
 		.off = { .min = 0, .max = 0},
 		.imm = { .min = 0, .max = UINT32_MAX,},
+		.eval = eval_alu,
 	},
 	[(EBPF_ALU64 | BPF_AND | BPF_K)] = {
 		.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
 		.off = { .min = 0, .max = 0},
 		.imm = { .min = 0, .max = UINT32_MAX,},
+		.eval = eval_alu,
 	},
 	[(EBPF_ALU64 | BPF_OR | BPF_K)] = {
 		.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
 		.off = { .min = 0, .max = 0},
 		.imm = { .min = 0, .max = UINT32_MAX,},
+		.eval = eval_alu,
 	},
 	[(EBPF_ALU64 | BPF_LSH | BPF_K)] = {
 		.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
 		.off = { .min = 0, .max = 0},
 		.imm = { .min = 0, .max = UINT32_MAX,},
+		.eval = eval_alu,
 	},
 	[(EBPF_ALU64 | BPF_RSH | BPF_K)] = {
 		.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
 		.off = { .min = 0, .max = 0},
 		.imm = { .min = 0, .max = UINT32_MAX,},
+		.eval = eval_alu,
 	},
 	[(EBPF_ALU64 | EBPF_ARSH | BPF_K)] = {
 		.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
 		.off = { .min = 0, .max = 0},
 		.imm = { .min = 0, .max = UINT32_MAX,},
+		.eval = eval_alu,
 	},
 	[(EBPF_ALU64 | BPF_XOR | BPF_K)] = {
 		.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
 		.off = { .min = 0, .max = 0},
 		.imm = { .min = 0, .max = UINT32_MAX,},
+		.eval = eval_alu,
 	},
 	[(EBPF_ALU64 | BPF_MUL | BPF_K)] = {
 		.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
 		.off = { .min = 0, .max = 0},
 		.imm = { .min = 0, .max = UINT32_MAX,},
+		.eval = eval_alu,
 	},
 	[(EBPF_ALU64 | EBPF_MOV | BPF_K)] = {
 		.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
 		.off = { .min = 0, .max = 0},
 		.imm = { .min = 0, .max = UINT32_MAX,},
+		.eval = eval_alu,
 	},
 	[(EBPF_ALU64 | BPF_DIV | BPF_K)] = {
 		.mask = { .dreg = WRT_REGS, .sreg = ZERO_REG},
 		.off = { .min = 0, .max = 0},
 		.imm = { .min = 1, .max = UINT32_MAX},
+		.eval = eval_alu,
 	},
 	[(EBPF_ALU64 | BPF_MOD | BPF_K)] = {
 		.mask = { .dreg = WRT_REGS, .sreg = ZERO_REG},
 		.off = { .min = 0, .max = 0},
 		.imm = { .min = 1, .max = UINT32_MAX},
+		.eval = eval_alu,
 	},
 	/* ALU REG 32-bit instructions */
 	[(BPF_ALU | BPF_ADD | BPF_X)] = {
 		.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
 		.off = { .min = 0, .max = 0},
 		.imm = { .min = 0, .max = 0},
+		.eval = eval_alu,
 	},
 	[(BPF_ALU | BPF_SUB | BPF_X)] = {
 		.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
 		.off = { .min = 0, .max = 0},
 		.imm = { .min = 0, .max = 0},
+		.eval = eval_alu,
 	},
 	[(BPF_ALU | BPF_AND | BPF_X)] = {
 		.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
 		.off = { .min = 0, .max = 0},
 		.imm = { .min = 0, .max = 0},
+		.eval = eval_alu,
 	},
 	[(BPF_ALU | BPF_OR | BPF_X)] = {
 		.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
 		.off = { .min = 0, .max = 0},
 		.imm = { .min = 0, .max = 0},
+		.eval = eval_alu,
 	},
 	[(BPF_ALU | BPF_LSH | BPF_X)] = {
 		.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
 		.off = { .min = 0, .max = 0},
 		.imm = { .min = 0, .max = 0},
+		.eval = eval_alu,
 	},
 	[(BPF_ALU | BPF_RSH | BPF_X)] = {
 		.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
 		.off = { .min = 0, .max = 0},
 		.imm = { .min = 0, .max = 0},
+		.eval = eval_alu,
 	},
 	[(BPF_ALU | BPF_XOR | BPF_X)] = {
 		.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
 		.off = { .min = 0, .max = 0},
 		.imm = { .min = 0, .max = 0},
+		.eval = eval_alu,
 	},
 	[(BPF_ALU | BPF_MUL | BPF_X)] = {
 		.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
 		.off = { .min = 0, .max = 0},
 		.imm = { .min = 0, .max = 0},
+		.eval = eval_alu,
 	},
 	[(BPF_ALU | BPF_DIV | BPF_X)] = {
 		.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
 		.off = { .min = 0, .max = 0},
 		.imm = { .min = 0, .max = 0},
+		.eval = eval_alu,
 	},
 	[(BPF_ALU | BPF_MOD | BPF_X)] = {
 		.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
 		.off = { .min = 0, .max = 0},
 		.imm = { .min = 0, .max = 0},
+		.eval = eval_alu,
 	},
 	[(BPF_ALU | EBPF_MOV | BPF_X)] = {
 		.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
 		.off = { .min = 0, .max = 0},
 		.imm = { .min = 0, .max = 0},
+		.eval = eval_alu,
 	},
 	[(BPF_ALU | BPF_NEG)] = {
 		.mask = { .dreg = WRT_REGS, .sreg = ZERO_REG},
 		.off = { .min = 0, .max = 0},
 		.imm = { .min = 0, .max = 0},
+		.eval = eval_alu,
 	},
 	[(BPF_ALU | EBPF_END | EBPF_TO_BE)] = {
 		.mask = { .dreg = WRT_REGS, .sreg = ZERO_REG},
 		.off = { .min = 0, .max = 0},
 		.imm = { .min = 16, .max = 64},
 		.check = check_alu_bele,
+		.eval = eval_bele,
 	},
 	[(BPF_ALU | EBPF_END | EBPF_TO_LE)] = {
 		.mask = { .dreg = WRT_REGS, .sreg = ZERO_REG},
 		.off = { .min = 0, .max = 0},
 		.imm = { .min = 16, .max = 64},
 		.check = check_alu_bele,
+		.eval = eval_bele,
 	},
 	/* ALU REG 64-bit instructions */
 	[(EBPF_ALU64 | BPF_ADD | BPF_X)] = {
 		.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
 		.off = { .min = 0, .max = 0},
 		.imm = { .min = 0, .max = 0},
+		.eval = eval_alu,
 	},
 	[(EBPF_ALU64 | BPF_SUB | BPF_X)] = {
 		.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
 		.off = { .min = 0, .max = 0},
 		.imm = { .min = 0, .max = 0},
+		.eval = eval_alu,
 	},
 	[(EBPF_ALU64 | BPF_AND | BPF_X)] = {
 		.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
 		.off = { .min = 0, .max = 0},
 		.imm = { .min = 0, .max = 0},
+		.eval = eval_alu,
 	},
 	[(EBPF_ALU64 | BPF_OR | BPF_X)] = {
 		.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
 		.off = { .min = 0, .max = 0},
 		.imm = { .min = 0, .max = 0},
+		.eval = eval_alu,
 	},
 	[(EBPF_ALU64 | BPF_LSH | BPF_X)] = {
 		.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
 		.off = { .min = 0, .max = 0},
 		.imm = { .min = 0, .max = 0},
+		.eval = eval_alu,
 	},
 	[(EBPF_ALU64 | BPF_RSH | BPF_X)] = {
 		.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
 		.off = { .min = 0, .max = 0},
 		.imm = { .min = 0, .max = 0},
+		.eval = eval_alu,
 	},
 	[(EBPF_ALU64 | EBPF_ARSH | BPF_X)] = {
 		.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
 		.off = { .min = 0, .max = 0},
 		.imm = { .min = 0, .max = 0},
+		.eval = eval_alu,
 	},
 	[(EBPF_ALU64 | BPF_XOR | BPF_X)] = {
 		.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
 		.off = { .min = 0, .max = 0},
 		.imm = { .min = 0, .max = 0},
+		.eval = eval_alu,
 	},
 	[(EBPF_ALU64 | BPF_MUL | BPF_X)] = {
 		.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
 		.off = { .min = 0, .max = 0},
 		.imm = { .min = 0, .max = 0},
+		.eval = eval_alu,
 	},
 	[(EBPF_ALU64 | BPF_DIV | BPF_X)] = {
 		.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
 		.off = { .min = 0, .max = 0},
 		.imm = { .min = 0, .max = 0},
+		.eval = eval_alu,
 	},
 	[(EBPF_ALU64 | BPF_MOD | BPF_X)] = {
 		.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
 		.off = { .min = 0, .max = 0},
 		.imm = { .min = 0, .max = 0},
+		.eval = eval_alu,
 	},
 	[(EBPF_ALU64 | EBPF_MOV | BPF_X)] = {
 		.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
 		.off = { .min = 0, .max = 0},
 		.imm = { .min = 0, .max = 0},
+		.eval = eval_alu,
 	},
 	[(EBPF_ALU64 | BPF_NEG)] = {
 		.mask = { .dreg = WRT_REGS, .sreg = ZERO_REG},
 		.off = { .min = 0, .max = 0},
 		.imm = { .min = 0, .max = 0},
+		.eval = eval_alu,
 	},
 	/* load instructions */
 	[(BPF_LDX | BPF_MEM | BPF_B)] = {
@@ -438,6 +1421,7 @@ static const struct bpf_ins_check ins_chk[UINT8_MAX] = {
 		.mask = { .dreg = WRT_REGS, .sreg = ZERO_REG},
 		.off = { .min = 0, .max = 0},
 		.imm = { .min = 0, .max = UINT32_MAX},
+		.eval = eval_ld_imm64,
 	},
 	/* store REG instructions */
 	[(BPF_STX | BPF_MEM | BPF_B)] = {
@@ -513,92 +1497,110 @@ static const struct bpf_ins_check ins_chk[UINT8_MAX] = {
 		.mask = { .dreg = ALL_REGS, .sreg = ZERO_REG},
 		.off = { .min = 0, .max = UINT16_MAX},
 		.imm = { .min = 0, .max = UINT32_MAX},
+		.eval = eval_jcc,
 	},
 	[(BPF_JMP | EBPF_JNE | BPF_K)] = {
 		.mask = { .dreg = ALL_REGS, .sreg = ZERO_REG},
 		.off = { .min = 0, .max = UINT16_MAX},
 		.imm = { .min = 0, .max = UINT32_MAX},
+		.eval = eval_jcc,
 	},
 	[(BPF_JMP | BPF_JGT | BPF_K)] = {
 		.mask = { .dreg = ALL_REGS, .sreg = ZERO_REG},
 		.off = { .min = 0, .max = UINT16_MAX},
 		.imm = { .min = 0, .max = UINT32_MAX},
+		.eval = eval_jcc,
 	},
 	[(BPF_JMP | EBPF_JLT | BPF_K)] = {
 		.mask = { .dreg = ALL_REGS, .sreg = ZERO_REG},
 		.off = { .min = 0, .max = UINT16_MAX},
 		.imm = { .min = 0, .max = UINT32_MAX},
+		.eval = eval_jcc,
 	},
 	[(BPF_JMP | BPF_JGE | BPF_K)] = {
 		.mask = { .dreg = ALL_REGS, .sreg = ZERO_REG},
 		.off = { .min = 0, .max = UINT16_MAX},
 		.imm = { .min = 0, .max = UINT32_MAX},
+		.eval = eval_jcc,
 	},
 	[(BPF_JMP | EBPF_JLE | BPF_K)] = {
 		.mask = { .dreg = ALL_REGS, .sreg = ZERO_REG},
 		.off = { .min = 0, .max = UINT16_MAX},
 		.imm = { .min = 0, .max = UINT32_MAX},
+		.eval = eval_jcc,
 	},
 	[(BPF_JMP | EBPF_JSGT | BPF_K)] = {
 		.mask = { .dreg = ALL_REGS, .sreg = ZERO_REG},
 		.off = { .min = 0, .max = UINT16_MAX},
 		.imm = { .min = 0, .max = UINT32_MAX},
+		.eval = eval_jcc,
 	},
 	[(BPF_JMP | EBPF_JSLT | BPF_K)] = {
 		.mask = { .dreg = ALL_REGS, .sreg = ZERO_REG},
 		.off = { .min = 0, .max = UINT16_MAX},
 		.imm = { .min = 0, .max = UINT32_MAX},
+		.eval = eval_jcc,
 	},
 	[(BPF_JMP | EBPF_JSGE | BPF_K)] = {
 		.mask = { .dreg = ALL_REGS, .sreg = ZERO_REG},
 		.off = { .min = 0, .max = UINT16_MAX},
 		.imm = { .min = 0, .max = UINT32_MAX},
+		.eval = eval_jcc,
 	},
 	[(BPF_JMP | EBPF_JSLE | BPF_K)] = {
 		.mask = { .dreg = ALL_REGS, .sreg = ZERO_REG},
 		.off = { .min = 0, .max = UINT16_MAX},
 		.imm = { .min = 0, .max = UINT32_MAX},
+		.eval = eval_jcc,
 	},
 	[(BPF_JMP | BPF_JSET | BPF_K)] = {
 		.mask = { .dreg = ALL_REGS, .sreg = ZERO_REG},
 		.off = { .min = 0, .max = UINT16_MAX},
 		.imm = { .min = 0, .max = UINT32_MAX},
+		.eval = eval_jcc,
 	},
 	/* jcc REG instructions */
 	[(BPF_JMP | BPF_JEQ | BPF_X)] = {
 		.mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
 		.off = { .min = 0, .max = UINT16_MAX},
 		.imm = { .min = 0, .max = 0},
+		.eval = eval_jcc,
 	},
 	[(BPF_JMP | EBPF_JNE | BPF_X)] = {
 		.mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
 		.off = { .min = 0, .max = UINT16_MAX},
 		.imm = { .min = 0, .max = 0},
+		.eval = eval_jcc,
 	},
 	[(BPF_JMP | BPF_JGT | BPF_X)] = {
 		.mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
 		.off = { .min = 0, .max = UINT16_MAX},
 		.imm = { .min = 0, .max = 0},
+		.eval = eval_jcc,
 	},
 	[(BPF_JMP | EBPF_JLT | BPF_X)] = {
 		.mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
 		.off = { .min = 0, .max = UINT16_MAX},
 		.imm = { .min = 0, .max = 0},
+		.eval = eval_jcc,
 	},
 	[(BPF_JMP | BPF_JGE | BPF_X)] = {
 		.mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
 		.off = { .min = 0, .max = UINT16_MAX},
 		.imm = { .min = 0, .max = 0},
+		.eval = eval_jcc,
 	},
 	[(BPF_JMP | EBPF_JLE | BPF_X)] = {
 		.mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
 		.off = { .min = 0, .max = UINT16_MAX},
 		.imm = { .min = 0, .max = 0},
+		.eval = eval_jcc,
 	},
 	[(BPF_JMP | EBPF_JSGT | BPF_X)] = {
 		.mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
 		.off = { .min = 0, .max = UINT16_MAX},
 		.imm = { .min = 0, .max = 0},
+		.eval = eval_jcc,
 	},
 	[(BPF_JMP | EBPF_JSLT | BPF_X)] = {
 		.mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
@@ -609,16 +1611,19 @@ static const struct bpf_ins_check ins_chk[UINT8_MAX] = {
 		.mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
 		.off = { .min = 0, .max = UINT16_MAX},
 		.imm = { .min = 0, .max = 0},
+		.eval = eval_jcc,
 	},
 	[(BPF_JMP | EBPF_JSLE | BPF_X)] = {
 		.mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
 		.off = { .min = 0, .max = UINT16_MAX},
 		.imm = { .min = 0, .max = 0},
+		.eval = eval_jcc,
 	},
 	[(BPF_JMP | BPF_JSET | BPF_X)] = {
 		.mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
 		.off = { .min = 0, .max = UINT16_MAX},
 		.imm = { .min = 0, .max = 0},
+		.eval = eval_jcc,
 	},
 	/* call instruction */
 	[(BPF_JMP | EBPF_CALL)] = {
@@ -632,6 +1637,7 @@ static const struct bpf_ins_check ins_chk[UINT8_MAX] = {
 		.mask = { .dreg = ZERO_REG, .sreg = ZERO_REG},
 		.off = { .min = 0, .max = 0},
 		.imm = { .min = 0, .max = 0},
+		.eval = eval_exit,
 	},
 };
 
@@ -1046,7 +2052,7 @@ save_eval_state(struct bpf_verifier *bvf, struct inst_node *node)
 	st = pull_eval_state(bvf);
 	if (st == NULL) {
 		RTE_BPF_LOG(ERR,
-			"%s: internal error (out of space) at pc: %u",
+			"%s: internal error (out of space) at pc: %u\n",
 			__func__, get_node_idx(bvf, node));
 		return -ENOMEM;
 	}
@@ -1078,6 +2084,32 @@ restore_eval_state(struct bpf_verifier *bvf, struct inst_node *node)
 	push_eval_state(bvf);
 }
 
+static void
+log_eval_state(const struct bpf_verifier *bvf, const struct ebpf_insn *ins,
+	uint32_t pc, int32_t loglvl)
+{
+	const struct bpf_eval_state *st;
+	const struct bpf_reg_val *rv;
+
+	rte_log(loglvl, rte_bpf_logtype, "%s(pc=%u):\n", __func__, pc);
+
+	st = bvf->evst;
+	rv = st->rv + ins->dst_reg;
+
+	rte_log(loglvl, rte_bpf_logtype,
+		"r%u={\n"
+		"\tv={type=%u, size=%zu},\n"
+		"\tmask=0x%" PRIx64 ",\n"
+		"\tu={min=0x%" PRIx64 ", max=0x%" PRIx64 "},\n"
+		"\ts={min=%" PRId64 ", max=%" PRId64 "},\n"
+		"};\n",
+		ins->dst_reg,
+		rv->v.type, rv->v.size,
+		rv->mask,
+		rv->u.min, rv->u.max,
+		rv->s.min, rv->s.max);
+}
+
 /*
  * Do second pass through CFG and try to evaluate instructions
  * via each possible path.
@@ -1096,23 +2128,56 @@ evaluate(struct bpf_verifier *bvf)
 	const struct ebpf_insn *ins;
 	struct inst_node *next, *node;
 
-	node = bvf->in;
+	/* initial state of frame pointer */
+	static const struct bpf_reg_val rvfp = {
+		.v = {
+			.type = RTE_BPF_ARG_PTR_STACK,
+			.size = MAX_BPF_STACK_SIZE,
+		},
+		.mask = UINT64_MAX,
+		.u = {.min = MAX_BPF_STACK_SIZE, .max = MAX_BPF_STACK_SIZE},
+		.s = {.min = MAX_BPF_STACK_SIZE, .max = MAX_BPF_STACK_SIZE},
+	};
+
+	bvf->evst->rv[EBPF_REG_1].v = bvf->prm->prog_arg;
+	bvf->evst->rv[EBPF_REG_1].mask = UINT64_MAX;
+	if (bvf->prm->prog_arg.type == RTE_BPF_ARG_RAW)
+		eval_max_bound(bvf->evst->rv + EBPF_REG_1, UINT64_MAX);
+
+	bvf->evst->rv[EBPF_REG_10] = rvfp;
+
 	ins = bvf->prm->ins;
+	node = bvf->in;
+	next = node;
 	rc = 0;
 
 	while (node != NULL && rc == 0) {
 
-		/* current node evaluation */
-		idx = get_node_idx(bvf, node);
-		op = ins[idx].code;
+		/*
+		 * current node evaluation, make sure we evaluate
+		 * each node only once.
+		 */
+		if (next != NULL) {
+
+			bvf->evin = node;
+			idx = get_node_idx(bvf, node);
+			op = ins[idx].code;
 
-		if (ins_chk[op].eval != NULL) {
-			err = ins_chk[op].eval(bvf, ins + idx);
-			if (err != NULL) {
-				RTE_BPF_LOG(ERR, "%s: %s at pc: %u\n",
-					__func__, err, idx);
-				rc = -EINVAL;
+			/* for jcc node make a copy of evaluatoion state */
+			if (node->nb_edge > 1)
+				rc |= save_eval_state(bvf, node);
+
+			if (ins_chk[op].eval != NULL && rc == 0) {
+				err = ins_chk[op].eval(bvf, ins + idx);
+				if (err != NULL) {
+					RTE_BPF_LOG(ERR, "%s: %s at pc: %u\n",
+						__func__, err, idx);
+					rc = -EINVAL;
+				}
 			}
+
+			log_eval_state(bvf, ins + idx, idx, RTE_LOG_DEBUG);
+			bvf->evin = NULL;
 		}
 
 		/* proceed through CFG */
@@ -1120,9 +2185,8 @@ evaluate(struct bpf_verifier *bvf)
 		if (next != NULL) {
 
 			/* proceed with next child */
-			if (node->cur_edge != node->nb_edge)
-				rc |= save_eval_state(bvf, node);
-			else if (node->evst != NULL)
+			if (node->cur_edge == node->nb_edge &&
+					node->evst != NULL)
 				restore_eval_state(bvf, node);
 
 			next->prev_node = get_node_idx(bvf, node);
diff --git a/lib/librte_bpf/meson.build b/lib/librte_bpf/meson.build
index de9de009..bc0cd78f 100644
--- a/lib/librte_bpf/meson.build
+++ b/lib/librte_bpf/meson.build
@@ -8,7 +8,7 @@ sources = files('bpf.c',
 		'bpf_pkt.c',
 		'bpf_validate.c')
 
-if arch_subdir == 'x86'
+if arch_subdir == 'x86' and cc.sizeof('void *') == 8
 	sources += files('bpf_jit_x86.c')
 endif
 
diff --git a/lib/librte_bpf/rte_bpf.h b/lib/librte_bpf/rte_bpf.h
index 1249a992..ad62ef2c 100644
--- a/lib/librte_bpf/rte_bpf.h
+++ b/lib/librte_bpf/rte_bpf.h
@@ -40,7 +40,11 @@ enum rte_bpf_arg_type {
  */
 struct rte_bpf_arg {
 	enum rte_bpf_arg_type type;
-	size_t size;     /**< for pointer types, size of data it points to */
+	/**
+	 * for ptr type - max size of data buffer it points to
+	 * for raw type - the size (in bytes) of the value
+	 */
+	size_t size;
 	size_t buf_size;
 	/**< for mbuf ptr type, max size of rte_mbuf data buffer */
 };
@@ -66,10 +70,19 @@ struct rte_bpf_xsym {
 	const char *name;        /**< name */
 	enum rte_bpf_xtype type; /**< type */
 	union {
-		uint64_t (*func)(uint64_t, uint64_t, uint64_t,
+		struct {
+			uint64_t (*val)(uint64_t, uint64_t, uint64_t,
 				uint64_t, uint64_t);
-		void *var;
-	}; /**< value */
+			uint32_t nb_args;
+			struct rte_bpf_arg args[EBPF_FUNC_MAX_ARGS];
+			/**< Function arguments descriptions. */
+			struct rte_bpf_arg ret; /**< function return value. */
+		} func;
+		struct {
+			void *val; /**< actual memory location */
+			struct rte_bpf_arg desc; /**< type, size, etc. */
+		} var; /**< external variable */
+	};
 };
 
 /**
diff --git a/lib/librte_cmdline/cmdline_parse.c b/lib/librte_cmdline/cmdline_parse.c
index 961f9bef..9666e90c 100644
--- a/lib/librte_cmdline/cmdline_parse.c
+++ b/lib/librte_cmdline/cmdline_parse.c
@@ -208,9 +208,6 @@ cmdline_parse(struct cmdline *cl, const char * buf)
 	int err = CMDLINE_PARSE_NOMATCH;
 	int tok;
 	cmdline_parse_ctx_t *ctx;
-#ifdef RTE_LIBRTE_CMDLINE_DEBUG
-	char debug_buf[BUFSIZ];
-#endif
 	char *result_buf = result.buf;
 
 	if (!cl || !buf)
@@ -250,10 +247,8 @@ cmdline_parse(struct cmdline *cl, const char * buf)
 		return linelen;
 	}
 
-#ifdef RTE_LIBRTE_CMDLINE_DEBUG
-	strlcpy(debug_buf, buf, (linelen > 64 ? 64 : linelen));
-	debug_printf("Parse line : len=%d, <%s>\n", linelen, debug_buf);
-#endif
+	debug_printf("Parse line : len=%d, <%.*s>\n",
+		     linelen, linelen > 64 ? 64 : linelen, buf);
 
 	/* parse it !! */
 	inst = ctx[inst_num];
diff --git a/lib/librte_compat/Makefile b/lib/librte_compat/Makefile
index 0c57533c..61089fe7 100644
--- a/lib/librte_compat/Makefile
+++ b/lib/librte_compat/Makefile
@@ -1,33 +1,6 @@
-#   BSD LICENSE
-#
-#   Copyright(c) 2013 Neil Horman <nhorman@tuxdriver.com>
-#   All rights reserved.
-#
-#   Redistribution and use in source and binary forms, with or without
-#   modification, are permitted provided that the following conditions
-#   are met:
-#
-#     * Redistributions of source code must retain the above copyright
-#       notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above copyright
-#       notice, this list of conditions and the following disclaimer in
-#       the documentation and/or other materials provided with the
-#       distribution.
-#     * Neither the name of Intel Corporation nor the names of its
-#       contributors may be used to endorse or promote products derived
-#       from this software without specific prior written permission.
-#
-#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2013 Neil Horman <nhorman@tuxdriver.com>
+# All rights reserved.
 
 include $(RTE_SDK)/mk/rte.vars.mk
 
diff --git a/lib/librte_compressdev/rte_comp.c b/lib/librte_compressdev/rte_comp.c
index d596ba87..98ad0cfd 100644
--- a/lib/librte_compressdev/rte_comp.c
+++ b/lib/librte_compressdev/rte_comp.c
@@ -14,8 +14,12 @@ rte_comp_get_feature_name(uint64_t flag)
 		return "STATEFUL_COMPRESSION";
 	case RTE_COMP_FF_STATEFUL_DECOMPRESSION:
 		return "STATEFUL_DECOMPRESSION";
-	case RTE_COMP_FF_MBUF_SCATTER_GATHER:
-		return "MBUF_SCATTER_GATHER";
+	case RTE_COMP_FF_OOP_SGL_IN_SGL_OUT:
+		return "OOP_SGL_IN_SGL_OUT";
+	case RTE_COMP_FF_OOP_SGL_IN_LB_OUT:
+		return "OOP_SGL_IN_LB_OUT";
+	case RTE_COMP_FF_OOP_LB_IN_SGL_OUT:
+		return "OOP_LB_IN_SGL_OUT";
 	case RTE_COMP_FF_MULTI_PKT_CHECKSUM:
 		return "MULTI_PKT_CHECKSUM";
 	case RTE_COMP_FF_ADLER32_CHECKSUM:
@@ -32,6 +36,10 @@ rte_comp_get_feature_name(uint64_t flag)
 		return "SHA2_SHA256_HASH";
 	case RTE_COMP_FF_SHAREABLE_PRIV_XFORM:
 		return "SHAREABLE_PRIV_XFORM";
+	case RTE_COMP_FF_HUFFMAN_FIXED:
+		return "HUFFMAN_FIXED";
+	case RTE_COMP_FF_HUFFMAN_DYNAMIC:
+		return "HUFFMAN_DYNAMIC";
 	default:
 		return NULL;
 	}
diff --git a/lib/librte_compressdev/rte_comp.h b/lib/librte_compressdev/rte_comp.h
index 5b513c77..ee9056ea 100644
--- a/lib/librte_compressdev/rte_comp.h
+++ b/lib/librte_compressdev/rte_comp.h
@@ -30,28 +30,43 @@ extern "C" {
 /**< Stateful compression is supported */
 #define RTE_COMP_FF_STATEFUL_DECOMPRESSION	(1ULL << 1)
 /**< Stateful decompression is supported */
-#define	RTE_COMP_FF_MBUF_SCATTER_GATHER		(1ULL << 2)
-/**< Scatter-gather mbufs are supported */
-#define RTE_COMP_FF_ADLER32_CHECKSUM		(1ULL << 3)
+#define RTE_COMP_FF_OOP_SGL_IN_SGL_OUT		(1ULL << 2)
+/**< Out-of-place Scatter-gather (SGL) buffers,
+ * with multiple segments, are supported in input and output
+ */
+#define RTE_COMP_FF_OOP_SGL_IN_LB_OUT		(1ULL << 3)
+/**< Out-of-place Scatter-gather (SGL) buffers are supported
+ * in input, combined with linear buffers (LB), with a
+ * single segment, in output
+ */
+#define RTE_COMP_FF_OOP_LB_IN_SGL_OUT		(1ULL << 4)
+/**< Out-of-place Scatter-gather (SGL) buffers are supported
+ * in output, combined with linear buffers (LB) in input
+ */
+#define RTE_COMP_FF_ADLER32_CHECKSUM		(1ULL << 5)
 /**< Adler-32 Checksum is supported */
-#define RTE_COMP_FF_CRC32_CHECKSUM		(1ULL << 4)
+#define RTE_COMP_FF_CRC32_CHECKSUM		(1ULL << 6)
 /**< CRC32 Checksum is supported */
-#define RTE_COMP_FF_CRC32_ADLER32_CHECKSUM	(1ULL << 5)
+#define RTE_COMP_FF_CRC32_ADLER32_CHECKSUM	(1ULL << 7)
 /**< Adler-32/CRC32 Checksum is supported */
-#define RTE_COMP_FF_MULTI_PKT_CHECKSUM		(1ULL << 6)
+#define RTE_COMP_FF_MULTI_PKT_CHECKSUM		(1ULL << 8)
 /**< Generation of checksum across multiple stateless packets is supported */
-#define RTE_COMP_FF_SHA1_HASH			(1ULL << 7)
+#define RTE_COMP_FF_SHA1_HASH			(1ULL << 9)
 /**< SHA1 Hash is supported */
-#define RTE_COMP_FF_SHA2_SHA256_HASH		(1ULL << 8)
+#define RTE_COMP_FF_SHA2_SHA256_HASH		(1ULL << 10)
 /**< SHA256 Hash of SHA2 family is supported */
-#define RTE_COMP_FF_NONCOMPRESSED_BLOCKS	(1ULL << 9)
+#define RTE_COMP_FF_NONCOMPRESSED_BLOCKS	(1ULL << 11)
 /**< Creation of non-compressed blocks using RTE_COMP_LEVEL_NONE is supported */
-#define RTE_COMP_FF_SHAREABLE_PRIV_XFORM	(1ULL << 10)
+#define RTE_COMP_FF_SHAREABLE_PRIV_XFORM	(1ULL << 12)
 /**< Private xforms created by the PMD can be shared
  * across multiple stateless operations. If not set, then app needs
  * to create as many priv_xforms as it expects to have stateless
  * operations in-flight.
  */
+#define RTE_COMP_FF_HUFFMAN_FIXED		(1ULL << 13)
+/**< Fixed huffman encoding is supported */
+#define RTE_COMP_FF_HUFFMAN_DYNAMIC		(1ULL << 14)
+/**< Dynamic huffman encoding is supported */
 
 /** Status of comp operation */
 enum rte_comp_op_status {
diff --git a/lib/librte_compressdev/rte_compressdev.c b/lib/librte_compressdev/rte_compressdev.c
index 6a38917d..9091dd6e 100644
--- a/lib/librte_compressdev/rte_compressdev.c
+++ b/lib/librte_compressdev/rte_compressdev.c
@@ -764,10 +764,7 @@ rte_compressdev_name_get(uint8_t dev_id)
 	return dev->data->name;
 }
 
-RTE_INIT(rte_compressdev_log);
-
-static void
-rte_compressdev_log(void)
+RTE_INIT(rte_compressdev_log)
 {
 	compressdev_logtype = rte_log_register("lib.compressdev");
 	if (compressdev_logtype >= 0)
diff --git a/lib/librte_cryptodev/Makefile b/lib/librte_cryptodev/Makefile
index bba8dee9..c1148887 100644
--- a/lib/librte_cryptodev/Makefile
+++ b/lib/librte_cryptodev/Makefile
@@ -23,6 +23,7 @@ SYMLINK-y-include += rte_crypto.h
 SYMLINK-y-include += rte_crypto_sym.h
 SYMLINK-y-include += rte_cryptodev.h
 SYMLINK-y-include += rte_cryptodev_pmd.h
+SYMLINK-y-include += rte_crypto_asym.h
 
 # versioning export map
 EXPORT_MAP := rte_cryptodev_version.map
diff --git a/lib/librte_cryptodev/meson.build b/lib/librte_cryptodev/meson.build
index bd5fed89..295f509e 100644
--- a/lib/librte_cryptodev/meson.build
+++ b/lib/librte_cryptodev/meson.build
@@ -6,5 +6,6 @@ sources = files('rte_cryptodev.c', 'rte_cryptodev_pmd.c')
 headers = files('rte_cryptodev.h',
 	'rte_cryptodev_pmd.h',
 	'rte_crypto.h',
-	'rte_crypto_sym.h')
+	'rte_crypto_sym.h',
+	'rte_crypto_asym.h')
 deps += ['kvargs', 'mbuf']
diff --git a/lib/librte_cryptodev/rte_crypto.h b/lib/librte_cryptodev/rte_crypto.h
index 25404264..fd5ef3a8 100644
--- a/lib/librte_cryptodev/rte_crypto.h
+++ b/lib/librte_cryptodev/rte_crypto.h
@@ -23,6 +23,7 @@ extern "C" {
 #include <rte_common.h>
 
 #include "rte_crypto_sym.h"
+#include "rte_crypto_asym.h"
 
 /** Crypto operation types */
 enum rte_crypto_op_type {
@@ -30,6 +31,8 @@ enum rte_crypto_op_type {
 	/**< Undefined operation type */
 	RTE_CRYPTO_OP_TYPE_SYMMETRIC,
 	/**< Symmetric operation */
+	RTE_CRYPTO_OP_TYPE_ASYMMETRIC
+	/**< Asymmetric operation */
 };
 
 /** Status of crypto operation */
@@ -73,26 +76,37 @@ enum rte_crypto_op_sess_type {
  * rte_cryptodev_enqueue_burst() / rte_cryptodev_dequeue_burst() .
  */
 struct rte_crypto_op {
-	uint8_t type;
-	/**< operation type */
-	uint8_t status;
-	/**<
-	 * operation status - this is reset to
-	 * RTE_CRYPTO_OP_STATUS_NOT_PROCESSED on allocation from mempool and
-	 * will be set to RTE_CRYPTO_OP_STATUS_SUCCESS after crypto operation
-	 * is successfully processed by a crypto PMD
-	 */
-	uint8_t sess_type;
-	/**< operation session type */
-	uint16_t private_data_offset;
-	/**< Offset to indicate start of private data (if any). The offset
-	 * is counted from the start of the rte_crypto_op including IV.
-	 * The private data may be used by the application to store
-	 * information which should remain untouched in the library/driver
-	 */
-
-	uint8_t reserved[3];
-	/**< Reserved bytes to fill 64 bits for future additions */
+	__extension__
+	union {
+		uint64_t raw;
+		__extension__
+		struct {
+			uint8_t type;
+			/**< operation type */
+			uint8_t status;
+			/**<
+			 * operation status - this is reset to
+			 * RTE_CRYPTO_OP_STATUS_NOT_PROCESSED on allocation
+			 * from mempool and will be set to
+			 * RTE_CRYPTO_OP_STATUS_SUCCESS after crypto operation
+			 * is successfully processed by a crypto PMD
+			 */
+			uint8_t sess_type;
+			/**< operation session type */
+			uint8_t reserved[3];
+			/**< Reserved bytes to fill 64 bits for
+			 * future additions
+			 */
+			uint16_t private_data_offset;
+			/**< Offset to indicate start of private data (if any).
+			 * The offset is counted from the start of the
+			 * rte_crypto_op including IV.
+			 * The private data may be used by the application
+			 * to store information which should remain untouched
+			 * in the library/driver
+			 */
+		};
+	};
 	struct rte_mempool *mempool;
 	/**< crypto operation mempool which operation is allocated from */
 
@@ -103,6 +117,10 @@ struct rte_crypto_op {
 	union {
 		struct rte_crypto_sym_op sym[0];
 		/**< Symmetric operation parameters */
+
+		struct rte_crypto_asym_op asym[0];
+		/**< Asymmetric operation parameters */
+
 	}; /**< operation specific parameters */
 };
 
@@ -123,6 +141,9 @@ __rte_crypto_op_reset(struct rte_crypto_op *op, enum rte_crypto_op_type type)
 	case RTE_CRYPTO_OP_TYPE_SYMMETRIC:
 		__rte_crypto_sym_op_reset(op->sym);
 		break;
+	case RTE_CRYPTO_OP_TYPE_ASYMMETRIC:
+		memset(op->asym, 0, sizeof(struct rte_crypto_asym_op));
+	break;
 	case RTE_CRYPTO_OP_TYPE_UNDEFINED:
 	default:
 		break;
@@ -289,9 +310,14 @@ __rte_crypto_op_get_priv_data(struct rte_crypto_op *op, uint32_t size)
 	if (likely(op->mempool != NULL)) {
 		priv_size = __rte_crypto_op_get_priv_data_size(op->mempool);
 
-		if (likely(priv_size >= size))
-			return (void *)((uint8_t *)(op + 1) +
+		if (likely(priv_size >= size)) {
+			if (op->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC)
+				return (void *)((uint8_t *)(op + 1) +
 					sizeof(struct rte_crypto_sym_op));
+			if (op->type == RTE_CRYPTO_OP_TYPE_ASYMMETRIC)
+				return (void *)((uint8_t *)(op + 1) +
+					sizeof(struct rte_crypto_asym_op));
+		}
 	}
 
 	return NULL;
@@ -394,6 +420,24 @@ rte_crypto_op_attach_sym_session(struct rte_crypto_op *op,
 	return __rte_crypto_sym_op_attach_sym_session(op->sym, sess);
 }
 
+/**
+ * Attach a asymmetric session to a crypto operation
+ *
+ * @param	op	crypto operation, must be of type asymmetric
+ * @param	sess	cryptodev session
+ */
+static inline int
+rte_crypto_op_attach_asym_session(struct rte_crypto_op *op,
+		struct rte_cryptodev_asym_session *sess)
+{
+	if (unlikely(op->type != RTE_CRYPTO_OP_TYPE_ASYMMETRIC))
+		return -1;
+
+	op->sess_type = RTE_CRYPTO_OP_WITH_SESSION;
+	op->asym->session = sess;
+	return 0;
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_cryptodev/rte_crypto_asym.h b/lib/librte_cryptodev/rte_crypto_asym.h
new file mode 100644
index 00000000..5e185b2d
--- /dev/null
+++ b/lib/librte_cryptodev/rte_crypto_asym.h
@@ -0,0 +1,496 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Cavium Networks
+ */
+
+#ifndef _RTE_CRYPTO_ASYM_H_
+#define _RTE_CRYPTO_ASYM_H_
+
+/**
+ * @file rte_crypto_asym.h
+ *
+ * RTE Definitions for Asymmetric Cryptography
+ *
+ * Defines asymmetric algorithms and modes, as well as supported
+ * asymmetric crypto operations.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <string.h>
+#include <stdint.h>
+
+#include <rte_memory.h>
+#include <rte_mempool.h>
+#include <rte_common.h>
+
+typedef struct rte_crypto_param_t {
+	uint8_t *data;
+	/**< pointer to buffer holding data */
+	rte_iova_t iova;
+	/**< IO address of data buffer */
+	size_t length;
+	/**< length of data in bytes */
+} rte_crypto_param;
+
+/** asym xform type name strings */
+extern const char *
+rte_crypto_asym_xform_strings[];
+
+/** asym operations type name strings */
+extern const char *
+rte_crypto_asym_op_strings[];
+
+/**
+ * Asymmetric crypto transformation types.
+ * Each xform type maps to one asymmetric algorithm
+ * performing specific operation
+ *
+ */
+enum rte_crypto_asym_xform_type {
+	RTE_CRYPTO_ASYM_XFORM_UNSPECIFIED = 0,
+	/**< Invalid xform. */
+	RTE_CRYPTO_ASYM_XFORM_NONE,
+	/**< Xform type None.
+	 * May be supported by PMD to support
+	 * passthrough op for debugging purpose.
+	 * if xform_type none , op_type is disregarded.
+	 */
+	RTE_CRYPTO_ASYM_XFORM_RSA,
+	/**< RSA. Performs Encrypt, Decrypt, Sign and Verify.
+	 * Refer to rte_crypto_asym_op_type
+	 */
+	RTE_CRYPTO_ASYM_XFORM_DH,
+	/**< Diffie-Hellman.
+	 * Performs Key Generate and Shared Secret Compute.
+	 * Refer to rte_crypto_asym_op_type
+	 */
+	RTE_CRYPTO_ASYM_XFORM_DSA,
+	/**< Digital Signature Algorithm
+	 * Performs Signature Generation and Verification.
+	 * Refer to rte_crypto_asym_op_type
+	 */
+	RTE_CRYPTO_ASYM_XFORM_MODINV,
+	/**< Modular Inverse
+	 * Perform Modulus inverse b^(-1) mod n
+	 */
+	RTE_CRYPTO_ASYM_XFORM_MODEX,
+	/**< Modular Exponentiation
+	 * Perform Modular Exponentiation b^e mod n
+	 */
+	RTE_CRYPTO_ASYM_XFORM_TYPE_LIST_END
+	/**< End of list */
+};
+
+/**
+ * Asymmetric crypto operation type variants
+ */
+enum rte_crypto_asym_op_type {
+	RTE_CRYPTO_ASYM_OP_ENCRYPT,
+	/**< Asymmetric Encrypt operation */
+	RTE_CRYPTO_ASYM_OP_DECRYPT,
+	/**< Asymmetric Decrypt operation */
+	RTE_CRYPTO_ASYM_OP_SIGN,
+	/**< Signature Generation operation */
+	RTE_CRYPTO_ASYM_OP_VERIFY,
+	/**< Signature Verification operation */
+	RTE_CRYPTO_ASYM_OP_PRIVATE_KEY_GENERATE,
+	/**< DH Private Key generation operation */
+	RTE_CRYPTO_ASYM_OP_PUBLIC_KEY_GENERATE,
+	/**< DH Public Key generation operation */
+	RTE_CRYPTO_ASYM_OP_SHARED_SECRET_COMPUTE,
+	/**< DH Shared Secret compute operation */
+	RTE_CRYPTO_ASYM_OP_LIST_END
+};
+
+/**
+ * Padding types for RSA signature.
+ */
+enum rte_crypto_rsa_padding_type {
+	RTE_CRYPTO_RSA_PADDING_NONE = 0,
+	/**< RSA no padding scheme */
+	RTE_CRYPTO_RSA_PKCS1_V1_5_BT0,
+	/**< RSA PKCS#1 V1.5 Block Type 0 padding scheme
+	 * as descibed in rfc2313
+	 */
+	RTE_CRYPTO_RSA_PKCS1_V1_5_BT1,
+	/**< RSA PKCS#1 V1.5 Block Type 01 padding scheme
+	 * as descibed in rfc2313
+	 */
+	RTE_CRYPTO_RSA_PKCS1_V1_5_BT2,
+	/**< RSA PKCS#1 V1.5 Block Type 02 padding scheme
+	 * as descibed in rfc2313
+	 */
+	RTE_CRYPTO_RSA_PADDING_OAEP,
+	/**< RSA PKCS#1 OAEP padding scheme */
+	RTE_CRYPTO_RSA_PADDING_PSS,
+	/**< RSA PKCS#1 PSS padding scheme */
+	RTE_CRYPTO_RSA_PADDING_TYPE_LIST_END
+};
+
+/**
+ * RSA private key type enumeration
+ *
+ * enumerates private key format required to perform RSA crypto
+ * transform.
+ *
+ */
+enum rte_crypto_rsa_priv_key_type {
+	RTE_RSA_KEY_TYPE_EXP,
+	/**< RSA private key is an exponent */
+	RTE_RSA_KET_TYPE_QT,
+	/**< RSA private key is in quintuple format
+	 * See rte_crypto_rsa_priv_key_qt
+	 */
+};
+
+/**
+ * Structure describing RSA private key in quintuple format.
+ * See PKCS V1.5 RSA Cryptography Standard.
+ */
+struct rte_crypto_rsa_priv_key_qt {
+	rte_crypto_param p;
+	/**< p - Private key component P
+	 * Private key component of RSA parameter  required for CRT method
+	 * of private key operations in Octet-string network byte order
+	 * format.
+	 */
+
+	rte_crypto_param q;
+	/**< q - Private key component Q
+	 * Private key component of RSA parameter  required for CRT method
+	 * of private key operations in Octet-string network byte order
+	 * format.
+	 */
+
+	rte_crypto_param dP;
+	/**< dP - Private CRT component
+	 * Private CRT component of RSA parameter  required for CRT method
+	 * RSA private key operations in Octet-string network byte order
+	 * format.
+	 * dP = d mod ( p - 1 )
+	 */
+
+	rte_crypto_param dQ;
+	/**< dQ - Private CRT component
+	 * Private CRT component of RSA parameter  required for CRT method
+	 * RSA private key operations in Octet-string network byte order
+	 * format.
+	 * dQ = d mod ( q - 1 )
+	 */
+
+	rte_crypto_param qInv;
+	/**< qInv - Private CRT component
+	 * Private CRT component of RSA parameter  required for CRT method
+	 * RSA private key operations in Octet-string network byte order
+	 * format.
+	 * qInv = inv q mod p
+	 */
+};
+
+/**
+ * Asymmetric RSA transform data
+ *
+ * Structure describing RSA xform params
+ *
+ */
+struct rte_crypto_rsa_xform {
+	rte_crypto_param n;
+	/**< n - Prime modulus
+	 * Prime modulus data of RSA operation in Octet-string network
+	 * byte order format.
+	 */
+
+	rte_crypto_param e;
+	/**< e - Public key exponent
+	 * Public key exponent used for RSA public key operations in Octet-
+	 * string network byte order format.
+	 */
+
+	enum rte_crypto_rsa_priv_key_type key_type;
+
+	__extension__
+	union {
+		rte_crypto_param d;
+		/**< d - Private key exponent
+		 * Private key exponent used for RSA
+		 * private key operations in
+		 * Octet-string  network byte order format.
+		 */
+
+		struct rte_crypto_rsa_priv_key_qt qt;
+		/**< qt - Private key in quintuple format */
+	};
+};
+
+/**
+ * Asymmetric Modular exponentiation transform data
+ *
+ * Structure describing modular exponentation xform param
+ *
+ */
+struct rte_crypto_modex_xform {
+	rte_crypto_param modulus;
+	/**< modulus
+	 * Prime modulus of the modexp transform operation in octet-string
+	 * network byte order format.
+	 */
+
+	rte_crypto_param exponent;
+	/**< exponent
+	 * Private exponent of the modexp transform operation in
+	 * octet-string network byte order format.
+	 */
+};
+
+/**
+ * Asymmetric modular inverse transform operation
+ *
+ * Structure describing modulus inverse xform params
+ *
+ */
+struct rte_crypto_modinv_xform {
+	rte_crypto_param modulus;
+	/**<
+	 * Pointer to the prime modulus data for modular
+	 * inverse operation in octet-string network byte
+	 * order format.
+	 */
+};
+
+/**
+ * Asymmetric DH transform data
+ *
+ * Structure describing deffie-hellman xform params
+ *
+ */
+struct rte_crypto_dh_xform {
+	enum rte_crypto_asym_op_type type;
+	/**< Setup xform for key generate or shared secret compute */
+
+	rte_crypto_param p;
+	/**< p : Prime modulus data
+	 * DH prime modulous data in octet-string network byte order format.
+	 *
+	 */
+
+	rte_crypto_param g;
+	/**< g : Generator
+	 * DH group generator data in octet-string network byte order
+	 * format.
+	 *
+	 */
+};
+
+/**
+ * Asymmetric Digital Signature transform operation
+ *
+ * Structure describing DSA xform params
+ *
+ */
+struct rte_crypto_dsa_xform {
+	rte_crypto_param p;
+	/**< p - Prime modulus
+	 * Prime modulus data for DSA operation in Octet-string network byte
+	 * order format.
+	 */
+	rte_crypto_param q;
+	/**< q : Order of the subgroup.
+	 * Order of the subgroup data in Octet-string network byte order
+	 * format.
+	 * (p-1) % q = 0
+	 */
+	rte_crypto_param g;
+	/**< g: Generator of the subgroup
+	 * Generator  data in Octet-string network byte order format.
+	 */
+	rte_crypto_param x;
+	/**< x: Private key of the signer in octet-string network
+	 * byte order format.
+	 * Used when app has pre-defined private key.
+	 * Valid only when xform chain is DSA ONLY.
+	 * if xform chain is DH private key generate + DSA, then DSA sign
+	 * compute will use internally generated key.
+	 */
+};
+
+/**
+ * Operations params for modular operations:
+ * exponentiation and invert
+ *
+ */
+struct rte_crypto_mod_op_param {
+	rte_crypto_param base;
+	/**<
+	 * Pointer to base of modular exponentiation/inversion data in
+	 * Octet-string network byte order format.
+	 */
+};
+
+/**
+ * Asymmetric crypto transform data
+ *
+ * Structure describing asym xforms.
+ */
+struct rte_crypto_asym_xform {
+	struct rte_crypto_asym_xform *next;
+	/**< Pointer to next xform to set up xform chain.*/
+	enum rte_crypto_asym_xform_type xform_type;
+	/**< Asymmetric crypto transform */
+
+	__extension__
+	union {
+		struct rte_crypto_rsa_xform rsa;
+		/**< RSA xform parameters */
+
+		struct rte_crypto_modex_xform modex;
+		/**< Modular Exponentiation xform parameters */
+
+		struct rte_crypto_modinv_xform modinv;
+		/**< Modulus Inverse xform parameters */
+
+		struct rte_crypto_dh_xform dh;
+		/**< DH xform parameters */
+
+		struct rte_crypto_dsa_xform dsa;
+		/**< DSA xform parameters */
+	};
+};
+
+struct rte_cryptodev_asym_session;
+
+/**
+ * RSA operation params
+ *
+ */
+struct rte_crypto_rsa_op_param {
+	enum rte_crypto_asym_op_type op_type;
+	/**< Type of RSA operation for transform */;
+
+	rte_crypto_param message;
+	/**<
+	 * Pointer to data
+	 * - to be encrypted for RSA public encrypt.
+	 * - to be decrypted for RSA private decrypt.
+	 * - to be signed for RSA sign generation.
+	 * - to be authenticated for RSA sign verification.
+	 */
+
+	rte_crypto_param sign;
+	/**<
+	 * Pointer to RSA signature data. If operation is RSA
+	 * sign @ref RTE_CRYPTO_ASYM_OP_SIGN, buffer will be
+	 * over-written with generated signature.
+	 *
+	 * Length of the signature data will be equal to the
+	 * RSA prime modulus length.
+	 */
+
+	enum rte_crypto_rsa_padding_type pad;
+	/**< RSA padding scheme to be used for transform */
+
+	enum rte_crypto_auth_algorithm md;
+	/**< Hash algorithm to be used for data hash if padding
+	 * scheme is either OAEP or PSS. Valid hash algorithms
+	 * are:
+	 * MD5, SHA1, SHA224, SHA256, SHA384, SHA512
+	 */
+
+	enum rte_crypto_auth_algorithm mgf1md;
+	/**<
+	 * Hash algorithm to be used for mask generation if
+	 * padding scheme is either OAEP or PSS. If padding
+	 * scheme is unspecified data hash algorithm is used
+	 * for mask generation. Valid hash algorithms are:
+	 * MD5, SHA1, SHA224, SHA256, SHA384, SHA512
+	 */
+};
+
+/**
+ * Diffie-Hellman Operations params.
+ * @note:
+ */
+struct rte_crypto_dh_op_param {
+	rte_crypto_param pub_key;
+	/**<
+	 * Output generated public key when xform type is
+	 * DH PUB_KEY_GENERATION.
+	 * Input peer public key when xform type is DH
+	 * SHARED_SECRET_COMPUTATION
+	 * pub_key is in octet-string network byte order format.
+	 *
+	 */
+
+	rte_crypto_param priv_key;
+	/**<
+	 * Output generated private key if xform type is
+	 * DH PRIVATE_KEY_GENERATION
+	 * Input when xform type is DH SHARED_SECRET_COMPUTATION.
+	 * priv_key is in octet-string network byte order format.
+	 *
+	 */
+
+	rte_crypto_param shared_secret;
+	/**<
+	 * Output with calculated shared secret
+	 * when dh xform set up with op type = SHARED_SECRET_COMPUTATION.
+	 * shared_secret is an octet-string network byte order format.
+	 *
+	 */
+};
+
+/**
+ * DSA Operations params
+ *
+ */
+struct rte_crypto_dsa_op_param {
+	enum rte_crypto_asym_op_type op_type;
+	/**< Signature Generation or Verification */
+	rte_crypto_param message;
+	/**< input message to be signed or verified */
+	rte_crypto_param r;
+	/**< dsa sign component 'r' value
+	 *
+	 * output if op_type = sign generate,
+	 * input if op_type = sign verify
+	 */
+	rte_crypto_param s;
+	/**< dsa sign component 's' value
+	 *
+	 * output if op_type = sign generate,
+	 * input if op_type = sign verify
+	 */
+	rte_crypto_param y;
+	/**< y : Public key of the signer.
+	 * Public key data of the signer in Octet-string network byte order
+	 * format.
+	 * y = g^x mod p
+	 */
+};
+
+/**
+ * Asymmetric Cryptographic Operation.
+ *
+ * Structure describing asymmetric crypto operation params.
+ *
+ */
+struct rte_crypto_asym_op {
+	struct rte_cryptodev_asym_session *session;
+	/**< Handle for the initialised session context */
+
+	__extension__
+	union {
+		struct rte_crypto_rsa_op_param rsa;
+		struct rte_crypto_mod_op_param modex;
+		struct rte_crypto_mod_op_param modinv;
+		struct rte_crypto_dh_op_param dh;
+		struct rte_crypto_dsa_op_param dsa;
+	};
+} __rte_cache_aligned;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_CRYPTO_ASYM_H_ */
diff --git a/lib/librte_cryptodev/rte_cryptodev.c b/lib/librte_cryptodev/rte_cryptodev.c
index 7e582124..63ae23f0 100644
--- a/lib/librte_cryptodev/rte_cryptodev.c
+++ b/lib/librte_cryptodev/rte_cryptodev.c
@@ -166,6 +166,31 @@ rte_crypto_aead_operation_strings[] = {
 	[RTE_CRYPTO_AEAD_OP_DECRYPT]	= "decrypt"
 };
 
+/**
+ * Asymmetric crypto transform operation strings identifiers.
+ */
+const char *rte_crypto_asym_xform_strings[] = {
+	[RTE_CRYPTO_ASYM_XFORM_NONE]	= "none",
+	[RTE_CRYPTO_ASYM_XFORM_RSA]	= "rsa",
+	[RTE_CRYPTO_ASYM_XFORM_MODEX]	= "modexp",
+	[RTE_CRYPTO_ASYM_XFORM_MODINV]	= "modinv",
+	[RTE_CRYPTO_ASYM_XFORM_DH]	= "dh",
+	[RTE_CRYPTO_ASYM_XFORM_DSA]	= "dsa",
+};
+
+/**
+ * Asymmetric crypto operation strings identifiers.
+ */
+const char *rte_crypto_asym_op_strings[] = {
+	[RTE_CRYPTO_ASYM_OP_ENCRYPT]	= "encrypt",
+	[RTE_CRYPTO_ASYM_OP_DECRYPT]	= "decrypt",
+	[RTE_CRYPTO_ASYM_OP_SIGN]	= "sign",
+	[RTE_CRYPTO_ASYM_OP_VERIFY]	= "verify",
+	[RTE_CRYPTO_ASYM_OP_PRIVATE_KEY_GENERATE]	= "priv_key_generate",
+	[RTE_CRYPTO_ASYM_OP_PUBLIC_KEY_GENERATE] = "pub_key_generate",
+	[RTE_CRYPTO_ASYM_OP_SHARED_SECRET_COMPUTE] = "sharedsecret_compute",
+};
+
 int
 rte_cryptodev_get_cipher_algo_enum(enum rte_crypto_cipher_algorithm *algo_enum,
 		const char *algo_string)
@@ -217,6 +242,24 @@ rte_cryptodev_get_aead_algo_enum(enum rte_crypto_aead_algorithm *algo_enum,
 	return -1;
 }
 
+int __rte_experimental
+rte_cryptodev_asym_get_xform_enum(enum rte_crypto_asym_xform_type *xform_enum,
+		const char *xform_string)
+{
+	unsigned int i;
+
+	for (i = 1; i < RTE_DIM(rte_crypto_asym_xform_strings); i++) {
+		if (strcmp(xform_string,
+			rte_crypto_asym_xform_strings[i]) == 0) {
+			*xform_enum = (enum rte_crypto_asym_xform_type) i;
+			return 0;
+		}
+	}
+
+	/* Invalid string */
+	return -1;
+}
+
 /**
  * The crypto auth operation strings identifiers.
  * It could be used in application command line.
@@ -287,6 +330,28 @@ param_range_check(uint16_t size, const struct rte_crypto_param_range *range)
 	return -1;
 }
 
+const struct rte_cryptodev_asymmetric_xform_capability * __rte_experimental
+rte_cryptodev_asym_capability_get(uint8_t dev_id,
+		const struct rte_cryptodev_asym_capability_idx *idx)
+{
+	const struct rte_cryptodev_capabilities *capability;
+	struct rte_cryptodev_info dev_info;
+	unsigned int i = 0;
+
+	memset(&dev_info, 0, sizeof(struct rte_cryptodev_info));
+	rte_cryptodev_info_get(dev_id, &dev_info);
+
+	while ((capability = &dev_info.capabilities[i++])->op !=
+			RTE_CRYPTO_OP_TYPE_UNDEFINED) {
+		if (capability->op != RTE_CRYPTO_OP_TYPE_ASYMMETRIC)
+			continue;
+
+		if (capability->asym.xform_capa.xform_type == idx->type)
+			return &capability->asym.xform_capa;
+	}
+	return NULL;
+};
+
 int
 rte_cryptodev_sym_capability_check_cipher(
 		const struct rte_cryptodev_symmetric_capability *capability,
@@ -338,6 +403,42 @@ rte_cryptodev_sym_capability_check_aead(
 
 	return 0;
 }
+int __rte_experimental
+rte_cryptodev_asym_xform_capability_check_optype(
+	const struct rte_cryptodev_asymmetric_xform_capability *capability,
+	enum rte_crypto_asym_op_type op_type)
+{
+	if (capability->op_types & (1 << op_type))
+		return 1;
+
+	return 0;
+}
+
+int __rte_experimental
+rte_cryptodev_asym_xform_capability_check_modlen(
+	const struct rte_cryptodev_asymmetric_xform_capability *capability,
+	uint16_t modlen)
+{
+	/* no need to check for limits, if min or max = 0 */
+	if (capability->modlen.min != 0) {
+		if (modlen < capability->modlen.min)
+			return -1;
+	}
+
+	if (capability->modlen.max != 0) {
+		if (modlen > capability->modlen.max)
+			return -1;
+	}
+
+	/* in any case, check if given modlen is module increment */
+	if (capability->modlen.increment != 0) {
+		if (modlen % (capability->modlen.increment))
+			return -1;
+	}
+
+	return 0;
+}
+
 
 const char *
 rte_cryptodev_get_feature_name(uint64_t flag)
@@ -361,8 +462,16 @@ rte_cryptodev_get_feature_name(uint64_t flag)
 		return "CPU_AESNI";
 	case RTE_CRYPTODEV_FF_HW_ACCELERATED:
 		return "HW_ACCELERATED";
-	case RTE_CRYPTODEV_FF_MBUF_SCATTER_GATHER:
-		return "MBUF_SCATTER_GATHER";
+	case RTE_CRYPTODEV_FF_IN_PLACE_SGL:
+		return "IN_PLACE_SGL";
+	case RTE_CRYPTODEV_FF_OOP_SGL_IN_SGL_OUT:
+		return "OOP_SGL_IN_SGL_OUT";
+	case RTE_CRYPTODEV_FF_OOP_SGL_IN_LB_OUT:
+		return "OOP_SGL_IN_LB_OUT";
+	case RTE_CRYPTODEV_FF_OOP_LB_IN_SGL_OUT:
+		return "OOP_LB_IN_SGL_OUT";
+	case RTE_CRYPTODEV_FF_OOP_LB_IN_LB_OUT:
+		return "OOP_LB_IN_LB_OUT";
 	case RTE_CRYPTODEV_FF_CPU_NEON:
 		return "CPU_NEON";
 	case RTE_CRYPTODEV_FF_CPU_ARM_CE:
@@ -703,50 +812,6 @@ rte_cryptodev_queue_pairs_config(struct rte_cryptodev *dev, uint16_t nb_qpairs,
 }
 
 int
-rte_cryptodev_queue_pair_start(uint8_t dev_id, uint16_t queue_pair_id)
-{
-	struct rte_cryptodev *dev;
-
-	if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) {
-		CDEV_LOG_ERR("Invalid dev_id=%" PRIu8, dev_id);
-		return -EINVAL;
-	}
-
-	dev = &rte_crypto_devices[dev_id];
-	if (queue_pair_id >= dev->data->nb_queue_pairs) {
-		CDEV_LOG_ERR("Invalid queue_pair_id=%d", queue_pair_id);
-		return -EINVAL;
-	}
-
-	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->queue_pair_start, -ENOTSUP);
-
-	return dev->dev_ops->queue_pair_start(dev, queue_pair_id);
-
-}
-
-int
-rte_cryptodev_queue_pair_stop(uint8_t dev_id, uint16_t queue_pair_id)
-{
-	struct rte_cryptodev *dev;
-
-	if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) {
-		CDEV_LOG_ERR("Invalid dev_id=%" PRIu8, dev_id);
-		return -EINVAL;
-	}
-
-	dev = &rte_crypto_devices[dev_id];
-	if (queue_pair_id >= dev->data->nb_queue_pairs) {
-		CDEV_LOG_ERR("Invalid queue_pair_id=%d", queue_pair_id);
-		return -EINVAL;
-	}
-
-	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->queue_pair_stop, -ENOTSUP);
-
-	return dev->dev_ops->queue_pair_stop(dev, queue_pair_id);
-
-}
-
-int
 rte_cryptodev_configure(uint8_t dev_id, struct rte_cryptodev_config *config)
 {
 	struct rte_cryptodev *dev;
@@ -966,6 +1031,7 @@ rte_cryptodev_info_get(uint8_t dev_id, struct rte_cryptodev_info *dev_info)
 	(*dev->dev_ops->dev_infos_get)(dev, dev_info);
 
 	dev_info->driver_name = dev->device->driver->name;
+	dev_info->device = dev->device;
 }
 
 
@@ -1098,8 +1164,46 @@ rte_cryptodev_sym_session_init(uint8_t dev_id,
 
 	index = dev->driver_id;
 
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->sym_session_configure, -ENOTSUP);
+
 	if (sess->sess_private_data[index] == NULL) {
-		ret = dev->dev_ops->session_configure(dev, xforms, sess, mp);
+		ret = dev->dev_ops->sym_session_configure(dev, xforms,
+							sess, mp);
+		if (ret < 0) {
+			CDEV_LOG_ERR(
+				"dev_id %d failed to configure session details",
+				dev_id);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+int __rte_experimental
+rte_cryptodev_asym_session_init(uint8_t dev_id,
+		struct rte_cryptodev_asym_session *sess,
+		struct rte_crypto_asym_xform *xforms,
+		struct rte_mempool *mp)
+{
+	struct rte_cryptodev *dev;
+	uint8_t index;
+	int ret;
+
+	dev = rte_cryptodev_pmd_get_dev(dev_id);
+
+	if (sess == NULL || xforms == NULL || dev == NULL)
+		return -EINVAL;
+
+	index = dev->driver_id;
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->asym_session_configure,
+				-ENOTSUP);
+
+	if (sess->sess_private_data[index] == NULL) {
+		ret = dev->dev_ops->asym_session_configure(dev,
+							xforms,
+							sess, mp);
 		if (ret < 0) {
 			CDEV_LOG_ERR(
 				"dev_id %d failed to configure session details",
@@ -1123,70 +1227,53 @@ rte_cryptodev_sym_session_create(struct rte_mempool *mp)
 	}
 
 	/* Clear device session pointer.
-	 * Include the flag indicating presence of private data
+	 * Include the flag indicating presence of user data
 	 */
 	memset(sess, 0, (sizeof(void *) * nb_drivers) + sizeof(uint8_t));
 
 	return sess;
 }
 
-int
-rte_cryptodev_queue_pair_attach_sym_session(uint8_t dev_id, uint16_t qp_id,
-		struct rte_cryptodev_sym_session *sess)
+struct rte_cryptodev_asym_session * __rte_experimental
+rte_cryptodev_asym_session_create(struct rte_mempool *mp)
 {
-	struct rte_cryptodev *dev;
+	struct rte_cryptodev_asym_session *sess;
 
-	if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) {
-		CDEV_LOG_ERR("Invalid dev_id=%d", dev_id);
-		return -EINVAL;
+	/* Allocate a session structure from the session pool */
+	if (rte_mempool_get(mp, (void **)&sess)) {
+		CDEV_LOG_ERR("couldn't get object from session mempool");
+		return NULL;
 	}
 
-	dev = &rte_crypto_devices[dev_id];
-
-	/* The API is optional, not returning error if driver do not suuport */
-	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->qp_attach_session, 0);
-
-	void *sess_priv = get_session_private_data(sess, dev->driver_id);
-
-	if (dev->dev_ops->qp_attach_session(dev, qp_id, sess_priv)) {
-		CDEV_LOG_ERR("dev_id %d failed to attach qp: %d with session",
-				dev_id, qp_id);
-		return -EPERM;
-	}
+	/* Clear device session pointer.
+	 * Include the flag indicating presence of private data
+	 */
+	memset(sess, 0, (sizeof(void *) * nb_drivers) + sizeof(uint8_t));
 
-	return 0;
+	return sess;
 }
 
 int
-rte_cryptodev_queue_pair_detach_sym_session(uint8_t dev_id, uint16_t qp_id,
+rte_cryptodev_sym_session_clear(uint8_t dev_id,
 		struct rte_cryptodev_sym_session *sess)
 {
 	struct rte_cryptodev *dev;
 
-	if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) {
-		CDEV_LOG_ERR("Invalid dev_id=%d", dev_id);
-		return -EINVAL;
-	}
-
-	dev = &rte_crypto_devices[dev_id];
+	dev = rte_cryptodev_pmd_get_dev(dev_id);
 
-	/* The API is optional, not returning error if driver do not suuport */
-	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->qp_detach_session, 0);
+	if (dev == NULL || sess == NULL)
+		return -EINVAL;
 
-	void *sess_priv = get_session_private_data(sess, dev->driver_id);
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->sym_session_clear, -ENOTSUP);
 
-	if (dev->dev_ops->qp_detach_session(dev, qp_id, sess_priv)) {
-		CDEV_LOG_ERR("dev_id %d failed to detach qp: %d from session",
-				dev_id, qp_id);
-		return -EPERM;
-	}
+	dev->dev_ops->sym_session_clear(dev, sess);
 
 	return 0;
 }
 
-int
-rte_cryptodev_sym_session_clear(uint8_t dev_id,
-		struct rte_cryptodev_sym_session *sess)
+int __rte_experimental
+rte_cryptodev_asym_session_clear(uint8_t dev_id,
+		struct rte_cryptodev_asym_session *sess)
 {
 	struct rte_cryptodev *dev;
 
@@ -1195,7 +1282,9 @@ rte_cryptodev_sym_session_clear(uint8_t dev_id,
 	if (dev == NULL || sess == NULL)
 		return -EINVAL;
 
-	dev->dev_ops->session_clear(dev, sess);
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->asym_session_clear, -ENOTSUP);
+
+	dev->dev_ops->asym_session_clear(dev, sess);
 
 	return 0;
 }
@@ -1212,7 +1301,7 @@ rte_cryptodev_sym_session_free(struct rte_cryptodev_sym_session *sess)
 
 	/* Check that all device private data has been freed */
 	for (i = 0; i < nb_drivers; i++) {
-		sess_priv = get_session_private_data(sess, i);
+		sess_priv = get_sym_session_private_data(sess, i);
 		if (sess_priv != NULL)
 			return -EBUSY;
 	}
@@ -1224,27 +1313,51 @@ rte_cryptodev_sym_session_free(struct rte_cryptodev_sym_session *sess)
 	return 0;
 }
 
-unsigned int
-rte_cryptodev_get_header_session_size(void)
+int __rte_experimental
+rte_cryptodev_asym_session_free(struct rte_cryptodev_asym_session *sess)
 {
-	return rte_cryptodev_sym_get_header_session_size();
+	uint8_t i;
+	void *sess_priv;
+	struct rte_mempool *sess_mp;
+
+	if (sess == NULL)
+		return -EINVAL;
+
+	/* Check that all device private data has been freed */
+	for (i = 0; i < nb_drivers; i++) {
+		sess_priv = get_asym_session_private_data(sess, i);
+		if (sess_priv != NULL)
+			return -EBUSY;
+	}
+
+	/* Return session to mempool */
+	sess_mp = rte_mempool_from_obj(sess);
+	rte_mempool_put(sess_mp, sess);
+
+	return 0;
 }
 
+
 unsigned int
 rte_cryptodev_sym_get_header_session_size(void)
 {
 	/*
 	 * Header contains pointers to the private data
 	 * of all registered drivers, and a flag which
-	 * indicates presence of private data
+	 * indicates presence of user data
 	 */
 	return ((sizeof(void *) * nb_drivers) + sizeof(uint8_t));
 }
 
-unsigned int
-rte_cryptodev_get_private_session_size(uint8_t dev_id)
+unsigned int __rte_experimental
+rte_cryptodev_asym_get_header_session_size(void)
 {
-	return rte_cryptodev_sym_get_private_session_size(dev_id);
+	/*
+	 * Header contains pointers to the private data
+	 * of all registered drivers, and a flag which
+	 * indicates presence of private data
+	 */
+	return ((sizeof(void *) * nb_drivers) + sizeof(uint8_t));
 }
 
 unsigned int
@@ -1259,10 +1372,10 @@ rte_cryptodev_sym_get_private_session_size(uint8_t dev_id)
 
 	dev = rte_cryptodev_pmd_get_dev(dev_id);
 
-	if (*dev->dev_ops->session_get_size == NULL)
+	if (*dev->dev_ops->sym_session_get_size == NULL)
 		return 0;
 
-	priv_sess_size = (*dev->dev_ops->session_get_size)(dev);
+	priv_sess_size = (*dev->dev_ops->sym_session_get_size)(dev);
 
 	/*
 	 * If size is less than session header size,
@@ -1276,32 +1389,55 @@ rte_cryptodev_sym_get_private_session_size(uint8_t dev_id)
 
 }
 
+unsigned int __rte_experimental
+rte_cryptodev_asym_get_private_session_size(uint8_t dev_id)
+{
+	struct rte_cryptodev *dev;
+	unsigned int header_size = sizeof(void *) * nb_drivers;
+	unsigned int priv_sess_size;
+
+	if (!rte_cryptodev_pmd_is_valid_dev(dev_id))
+		return 0;
+
+	dev = rte_cryptodev_pmd_get_dev(dev_id);
+
+	if (*dev->dev_ops->asym_session_get_size == NULL)
+		return 0;
+
+	priv_sess_size = (*dev->dev_ops->asym_session_get_size)(dev);
+	if (priv_sess_size < header_size)
+		return header_size;
+
+	return priv_sess_size;
+
+}
+
 int __rte_experimental
-rte_cryptodev_sym_session_set_private_data(
+rte_cryptodev_sym_session_set_user_data(
 					struct rte_cryptodev_sym_session *sess,
 					void *data,
 					uint16_t size)
 {
 	uint16_t off_set = sizeof(void *) * nb_drivers;
-	uint8_t *private_data_present = (uint8_t *)sess + off_set;
+	uint8_t *user_data_present = (uint8_t *)sess + off_set;
 
 	if (sess == NULL)
 		return -EINVAL;
 
-	*private_data_present = 1;
+	*user_data_present = 1;
 	off_set += sizeof(uint8_t);
 	rte_memcpy((uint8_t *)sess + off_set, data, size);
 	return 0;
 }
 
 void * __rte_experimental
-rte_cryptodev_sym_session_get_private_data(
+rte_cryptodev_sym_session_get_user_data(
 					struct rte_cryptodev_sym_session *sess)
 {
 	uint16_t off_set = sizeof(void *) * nb_drivers;
-	uint8_t *private_data_present = (uint8_t *)sess + off_set;
+	uint8_t *user_data_present = (uint8_t *)sess + off_set;
 
-	if (sess == NULL || !*private_data_present)
+	if (sess == NULL || !*user_data_present)
 		return NULL;
 
 	off_set += sizeof(uint8_t);
@@ -1335,9 +1471,17 @@ rte_crypto_op_pool_create(const char *name, enum rte_crypto_op_type type,
 	struct rte_crypto_op_pool_private *priv;
 
 	unsigned elt_size = sizeof(struct rte_crypto_op) +
-			sizeof(struct rte_crypto_sym_op) +
 			priv_size;
 
+	if (type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) {
+		elt_size += sizeof(struct rte_crypto_sym_op);
+	} else if (type == RTE_CRYPTO_OP_TYPE_ASYMMETRIC) {
+		elt_size += sizeof(struct rte_crypto_asym_op);
+	} else {
+		CDEV_LOG_ERR("Invalid op_type\n");
+		return NULL;
+	}
+
 	/* lookup mempool in case already allocated */
 	struct rte_mempool *mp = rte_mempool_lookup(name);
 
diff --git a/lib/librte_cryptodev/rte_cryptodev.h b/lib/librte_cryptodev/rte_cryptodev.h
index 92ce6d49..4099823f 100644
--- a/lib/librte_cryptodev/rte_cryptodev.h
+++ b/lib/librte_cryptodev/rte_cryptodev.h
@@ -1,32 +1,5 @@
-/*-
- *
- *   Copyright(c) 2015-2017 Intel Corporation. All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015-2017 Intel Corporation.
  */
 
 #ifndef _RTE_CRYPTODEV_H_
@@ -65,7 +38,6 @@ extern const char **rte_cyptodev_names;
 		RTE_FMT(RTE_FMT_HEAD(__VA_ARGS__,) "\n", \
 			RTE_FMT_TAIL(__VA_ARGS__,)))
 
-#ifdef RTE_LIBRTE_CRYPTODEV_DEBUG
 #define CDEV_LOG_DEBUG(...) \
 	RTE_LOG(DEBUG, CRYPTODEV, \
 		RTE_FMT("%s() line %u: " RTE_FMT_HEAD(__VA_ARGS__,) "\n", \
@@ -76,13 +48,6 @@ extern const char **rte_cyptodev_names;
 		RTE_FMT("[%s] %s: " RTE_FMT_HEAD(__VA_ARGS__,) "\n", \
 			dev, __func__, RTE_FMT_TAIL(__VA_ARGS__,)))
 
-#else
-#define CDEV_LOG_DEBUG(...) (void)0
-#define CDEV_PMD_TRACE(...) (void)0
-#endif
-
-
-
 /**
  * A macro that points to an offset from the start
  * of the crypto operation structure (rte_crypto_op)
@@ -178,6 +143,35 @@ struct rte_cryptodev_symmetric_capability {
 	};
 };
 
+/**
+ * Asymmetric Xform Crypto Capability
+ *
+ */
+struct rte_cryptodev_asymmetric_xform_capability {
+	enum rte_crypto_asym_xform_type xform_type;
+	/**< Transform type: RSA/MODEXP/DH/DSA/MODINV */
+
+	uint32_t op_types;
+	/**< bitmask for supported rte_crypto_asym_op_type */
+
+	__extension__
+	union {
+		struct rte_crypto_param_range modlen;
+		/**< Range of modulus length supported by modulus based xform.
+		 * Value 0 mean implementation default
+		 */
+	};
+};
+
+/**
+ * Asymmetric Crypto Capability
+ *
+ */
+struct rte_cryptodev_asymmetric_capability {
+	struct rte_cryptodev_asymmetric_xform_capability xform_capa;
+};
+
+
 /** Structure used to capture a capability of a crypto device */
 struct rte_cryptodev_capabilities {
 	enum rte_crypto_op_type op;
@@ -187,6 +181,8 @@ struct rte_cryptodev_capabilities {
 	union {
 		struct rte_cryptodev_symmetric_capability sym;
 		/**< Symmetric operation capability parameters */
+		struct rte_cryptodev_asymmetric_capability asym;
+		/**< Asymmetric operation capability parameters */
 	};
 };
 
@@ -201,7 +197,17 @@ struct rte_cryptodev_sym_capability_idx {
 };
 
 /**
- *  Provide capabilities available for defined device and algorithm
+ * Structure used to describe asymmetric crypto xforms
+ * Each xform maps to one asym algorithm.
+ *
+ */
+struct rte_cryptodev_asym_capability_idx {
+	enum rte_crypto_asym_xform_type type;
+	/**< Asymmetric xform (algo) type */
+};
+
+/**
+ * Provide capabilities available for defined device and algorithm
  *
  * @param	dev_id		The identifier of the device.
  * @param	idx		Description of crypto algorithms.
@@ -215,6 +221,20 @@ rte_cryptodev_sym_capability_get(uint8_t dev_id,
 		const struct rte_cryptodev_sym_capability_idx *idx);
 
 /**
+ *  Provide capabilities available for defined device and xform
+ *
+ * @param	dev_id		The identifier of the device.
+ * @param	idx		Description of asym crypto xform.
+ *
+ * @return
+ *   - Return description of the asymmetric crypto capability if exist.
+ *   - Return NULL if the capability not exist.
+ */
+const struct rte_cryptodev_asymmetric_xform_capability * __rte_experimental
+rte_cryptodev_asym_capability_get(uint8_t dev_id,
+		const struct rte_cryptodev_asym_capability_idx *idx);
+
+/**
  * Check if key size and initial vector are supported
  * in crypto cipher capability
  *
@@ -270,6 +290,36 @@ rte_cryptodev_sym_capability_check_aead(
 		uint16_t iv_size);
 
 /**
+ * Check if op type is supported
+ *
+ * @param	capability	Description of the asymmetric crypto capability.
+ * @param	op_type		op type
+ *
+ * @return
+ *   - Return 1 if the op type is supported
+ *   - Return 0 if unsupported
+ */
+int __rte_experimental
+rte_cryptodev_asym_xform_capability_check_optype(
+	const struct rte_cryptodev_asymmetric_xform_capability *capability,
+		enum rte_crypto_asym_op_type op_type);
+
+/**
+ * Check if modulus length is in supported range
+ *
+ * @param	capability	Description of the asymmetric crypto capability.
+ * @param	modlen		modulus length.
+ *
+ * @return
+ *   - Return 0 if the parameters are in range of the capability.
+ *   - Return -1 if the parameters are out of range of the capability.
+ */
+int __rte_experimental
+rte_cryptodev_asym_xform_capability_check_modlen(
+	const struct rte_cryptodev_asymmetric_xform_capability *capability,
+		uint16_t modlen);
+
+/**
  * Provide the cipher algorithm enum, given an algorithm string
  *
  * @param	algo_enum	A pointer to the cipher algorithm
@@ -314,6 +364,22 @@ int
 rte_cryptodev_get_aead_algo_enum(enum rte_crypto_aead_algorithm *algo_enum,
 		const char *algo_string);
 
+/**
+ * Provide the Asymmetric xform enum, given an xform string
+ *
+ * @param	xform_enum	A pointer to the xform type
+ *				enum to be filled
+ * @param	xform_string	xform string
+ *
+ * @return
+ * - Return -1 if string is not valid
+ * - Return 0 if the string is valid
+ */
+int __rte_experimental
+rte_cryptodev_asym_get_xform_enum(enum rte_crypto_asym_xform_type *xform_enum,
+		const char *xform_string);
+
+
 /** Macro used at end of crypto PMD list */
 #define RTE_CRYPTODEV_END_OF_CAPABILITIES_LIST() \
 	{ RTE_CRYPTO_OP_TYPE_UNDEFINED }
@@ -327,31 +393,50 @@ rte_cryptodev_get_aead_algo_enum(enum rte_crypto_aead_algorithm *algo_enum,
  *
  * Keep these flags synchronised with rte_cryptodev_get_feature_name()
  */
-#define	RTE_CRYPTODEV_FF_SYMMETRIC_CRYPTO	(1ULL << 0)
+#define	RTE_CRYPTODEV_FF_SYMMETRIC_CRYPTO		(1ULL << 0)
 /**< Symmetric crypto operations are supported */
-#define	RTE_CRYPTODEV_FF_ASYMMETRIC_CRYPTO	(1ULL << 1)
+#define	RTE_CRYPTODEV_FF_ASYMMETRIC_CRYPTO		(1ULL << 1)
 /**< Asymmetric crypto operations are supported */
-#define	RTE_CRYPTODEV_FF_SYM_OPERATION_CHAINING	(1ULL << 2)
+#define	RTE_CRYPTODEV_FF_SYM_OPERATION_CHAINING		(1ULL << 2)
 /**< Chaining symmetric crypto operations are supported */
-#define	RTE_CRYPTODEV_FF_CPU_SSE		(1ULL << 3)
+#define	RTE_CRYPTODEV_FF_CPU_SSE			(1ULL << 3)
 /**< Utilises CPU SIMD SSE instructions */
-#define	RTE_CRYPTODEV_FF_CPU_AVX		(1ULL << 4)
+#define	RTE_CRYPTODEV_FF_CPU_AVX			(1ULL << 4)
 /**< Utilises CPU SIMD AVX instructions */
-#define	RTE_CRYPTODEV_FF_CPU_AVX2		(1ULL << 5)
+#define	RTE_CRYPTODEV_FF_CPU_AVX2			(1ULL << 5)
 /**< Utilises CPU SIMD AVX2 instructions */
-#define	RTE_CRYPTODEV_FF_CPU_AESNI		(1ULL << 6)
+#define	RTE_CRYPTODEV_FF_CPU_AESNI			(1ULL << 6)
 /**< Utilises CPU AES-NI instructions */
-#define	RTE_CRYPTODEV_FF_HW_ACCELERATED		(1ULL << 7)
-/**< Operations are off-loaded to an external hardware accelerator */
-#define	RTE_CRYPTODEV_FF_CPU_AVX512		(1ULL << 8)
+#define	RTE_CRYPTODEV_FF_HW_ACCELERATED			(1ULL << 7)
+/**< Operations are off-loaded to an
+ * external hardware accelerator
+ */
+#define	RTE_CRYPTODEV_FF_CPU_AVX512			(1ULL << 8)
 /**< Utilises CPU SIMD AVX512 instructions */
-#define	RTE_CRYPTODEV_FF_MBUF_SCATTER_GATHER	(1ULL << 9)
-/**< Scatter-gather mbufs are supported */
-#define	RTE_CRYPTODEV_FF_CPU_NEON		(1ULL << 10)
+#define	RTE_CRYPTODEV_FF_IN_PLACE_SGL			(1ULL << 9)
+/**< In-place Scatter-gather (SGL) buffers, with multiple segments,
+ * are supported
+ */
+#define RTE_CRYPTODEV_FF_OOP_SGL_IN_SGL_OUT		(1ULL << 10)
+/**< Out-of-place Scatter-gather (SGL) buffers are
+ * supported in input and output
+ */
+#define RTE_CRYPTODEV_FF_OOP_SGL_IN_LB_OUT		(1ULL << 11)
+/**< Out-of-place Scatter-gather (SGL) buffers are supported
+ * in input, combined with linear buffers (LB), with a
+ * single segment in output
+ */
+#define RTE_CRYPTODEV_FF_OOP_LB_IN_SGL_OUT		(1ULL << 12)
+/**< Out-of-place Scatter-gather (SGL) buffers are supported
+ * in output, combined with linear buffers (LB) in input
+ */
+#define RTE_CRYPTODEV_FF_OOP_LB_IN_LB_OUT		(1ULL << 13)
+/**< Out-of-place linear buffers (LB) are supported in input and output */
+#define	RTE_CRYPTODEV_FF_CPU_NEON			(1ULL << 14)
 /**< Utilises CPU NEON instructions */
-#define	RTE_CRYPTODEV_FF_CPU_ARM_CE		(1ULL << 11)
+#define	RTE_CRYPTODEV_FF_CPU_ARM_CE			(1ULL << 15)
 /**< Utilises ARM CPU Cryptographic Extensions */
-#define	RTE_CRYPTODEV_FF_SECURITY		(1ULL << 12)
+#define	RTE_CRYPTODEV_FF_SECURITY			(1ULL << 16)
 /**< Support Security Protocol Processing */
 
 
@@ -369,9 +454,9 @@ rte_cryptodev_get_feature_name(uint64_t flag);
 
 /**  Crypto device information */
 struct rte_cryptodev_info {
-	const char *driver_name;		/**< Driver name. */
-	uint8_t driver_id;			/**< Driver identifier */
-	struct rte_pci_device *pci_dev;		/**< PCI information. */
+	const char *driver_name;	/**< Driver name. */
+	uint8_t driver_id;		/**< Driver identifier */
+	struct rte_device *device;	/**< Generic device information. */
 
 	uint64_t feature_flags;
 	/**< Feature flags exposes HW/SW features for the given device */
@@ -382,12 +467,17 @@ struct rte_cryptodev_info {
 	unsigned max_nb_queue_pairs;
 	/**< Maximum number of queues pairs supported by device. */
 
+	uint16_t min_mbuf_headroom_req;
+	/**< Minimum mbuf headroom required by device */
+
+	uint16_t min_mbuf_tailroom_req;
+	/**< Minimum mbuf tailroom required by device */
+
 	struct {
 		unsigned max_nb_sessions;
-		/**< Maximum number of sessions supported by device. */
-		unsigned int max_nb_sessions_per_qp;
-		/**< Maximum number of sessions per queue pair.
-		 * Default 0 for infinite sessions
+		/**< Maximum number of sessions supported by device.
+		 * If 0, the device does not have any limitation in
+		 * number of sessions that can be used.
 		 */
 	} sym;
 };
@@ -603,43 +693,6 @@ rte_cryptodev_queue_pair_setup(uint8_t dev_id, uint16_t queue_pair_id,
 		struct rte_mempool *session_pool);
 
 /**
- * @deprecated
- * Start a specified queue pair of a device. It is used
- * when deferred_start flag of the specified queue is true.
- *
- * @param	dev_id		The identifier of the device
- * @param	queue_pair_id	The index of the queue pair to start. The value
- *				must be in the range [0, nb_queue_pair - 1]
- *				previously supplied to
- *				rte_crypto_dev_configure().
- * @return
- *   - 0: Success, the transmit queue is correctly set up.
- *   - -EINVAL: The dev_id or the queue_id out of range.
- *   - -ENOTSUP: The function not supported in PMD driver.
- */
-__rte_deprecated
-extern int
-rte_cryptodev_queue_pair_start(uint8_t dev_id, uint16_t queue_pair_id);
-
-/**
- * @deprecated
- * Stop specified queue pair of a device
- *
- * @param	dev_id		The identifier of the device
- * @param	queue_pair_id	The index of the queue pair to stop. The value
- *				must be in the range [0, nb_queue_pair - 1]
- *				previously supplied to
- *				rte_cryptodev_configure().
- * @return
- *   - 0: Success, the transmit queue is correctly set up.
- *   - -EINVAL: The dev_id or the queue_id out of range.
- *   - -ENOTSUP: The function not supported in PMD driver.
- */
-__rte_deprecated
-extern int
-rte_cryptodev_queue_pair_stop(uint8_t dev_id, uint16_t queue_pair_id);
-
-/**
  * Get the number of queue pairs on a specific crypto device
  *
  * @param	dev_id		Crypto device identifier.
@@ -902,9 +955,14 @@ rte_cryptodev_enqueue_burst(uint8_t dev_id, uint16_t qp_id,
  */
 struct rte_cryptodev_sym_session {
 	__extension__ void *sess_private_data[0];
-	/**< Private session material */
+	/**< Private symmetric session material */
 };
 
+/** Cryptodev asymmetric crypto session */
+struct rte_cryptodev_asym_session {
+	__extension__ void *sess_private_data[0];
+	/**< Private asymmetric session material */
+};
 
 /**
  * Create symmetric crypto session header (generic with no private data)
@@ -919,6 +977,18 @@ struct rte_cryptodev_sym_session *
 rte_cryptodev_sym_session_create(struct rte_mempool *mempool);
 
 /**
+ * Create asymmetric crypto session header (generic with no private data)
+ *
+ * @param   mempool    mempool to allocate asymmetric session
+ *                     objects from
+ * @return
+ *  - On success return pointer to asym-session
+ *  - On failure returns NULL
+ */
+struct rte_cryptodev_asym_session * __rte_experimental
+rte_cryptodev_asym_session_create(struct rte_mempool *mempool);
+
+/**
  * Frees symmetric crypto session header, after checking that all
  * the device private data has been freed, returning it
  * to its original mempool.
@@ -934,6 +1004,21 @@ int
 rte_cryptodev_sym_session_free(struct rte_cryptodev_sym_session *sess);
 
 /**
+ * Frees asymmetric crypto session header, after checking that all
+ * the device private data has been freed, returning it
+ * to its original mempool.
+ *
+ * @param   sess     Session header to be freed.
+ *
+ * @return
+ *  - 0 if successful.
+ *  - -EINVAL if session is NULL.
+ *  - -EBUSY if not all device private data has been freed.
+ */
+int __rte_experimental
+rte_cryptodev_asym_session_free(struct rte_cryptodev_asym_session *sess);
+
+/**
  * Fill out private data for the device id, based on its device type.
  *
  * @param   dev_id   ID of device that we want the session to be used on
@@ -945,7 +1030,8 @@ rte_cryptodev_sym_session_free(struct rte_cryptodev_sym_session *sess);
  * @return
  *  - On success, zero.
  *  - -EINVAL if input parameters are invalid.
- *  - -ENOTSUP if crypto device does not support the crypto transform.
+ *  - -ENOTSUP if crypto device does not support the crypto transform or
+ *    does not support symmetric operations.
  *  - -ENOMEM if the private session could not be allocated.
  */
 int
@@ -955,6 +1041,27 @@ rte_cryptodev_sym_session_init(uint8_t dev_id,
 			struct rte_mempool *mempool);
 
 /**
+ * Initialize asymmetric session on a device with specific asymmetric xform
+ *
+ * @param   dev_id   ID of device that we want the session to be used on
+ * @param   sess     Session to be set up on a device
+ * @param   xforms   Asymmetric crypto transform operations to apply on flow
+ *                   processed with this session
+ * @param   mempool  Mempool to be used for internal allocation.
+ *
+ * @return
+ *  - On success, zero.
+ *  - -EINVAL if input parameters are invalid.
+ *  - -ENOTSUP if crypto device does not support the crypto transform.
+ *  - -ENOMEM if the private session could not be allocated.
+ */
+int __rte_experimental
+rte_cryptodev_asym_session_init(uint8_t dev_id,
+			struct rte_cryptodev_asym_session *sess,
+			struct rte_crypto_asym_xform *xforms,
+			struct rte_mempool *mempool);
+
+/**
  * Frees private data for the device id, based on its device type,
  * returning it to its mempool. It is the application's responsibility
  * to ensure that private session data is not cleared while there are
@@ -966,44 +1073,43 @@ rte_cryptodev_sym_session_init(uint8_t dev_id,
  * @return
  *  - 0 if successful.
  *  - -EINVAL if device is invalid or session is NULL.
+ *  - -ENOTSUP if crypto device does not support symmetric operations.
  */
 int
 rte_cryptodev_sym_session_clear(uint8_t dev_id,
 			struct rte_cryptodev_sym_session *sess);
 
 /**
- * @deprecated
- * Get the size of the header session, for all registered drivers.
+ * Frees resources held by asymmetric session during rte_cryptodev_session_init
  *
+ * @param   dev_id   ID of device that uses the asymmetric session.
+ * @param   sess     Asymmetric session setup on device using
+ *					 rte_cryptodev_session_init
  * @return
- *   Size of the header session.
+ *  - 0 if successful.
+ *  - -EINVAL if device is invalid or session is NULL.
  */
-__rte_deprecated
-unsigned int
-rte_cryptodev_get_header_session_size(void);
+int __rte_experimental
+rte_cryptodev_asym_session_clear(uint8_t dev_id,
+			struct rte_cryptodev_asym_session *sess);
 
 /**
- * @deprecated
- * Get the size of the private session data for a device.
- *
- * @param	dev_id		The device identifier.
+ * Get the size of the header session, for all registered drivers.
  *
  * @return
- *   - Size of the private data, if successful
- *   - 0 if device is invalid or does not have private session
+ *   Size of the symmetric eader session.
  */
-__rte_deprecated
 unsigned int
-rte_cryptodev_get_private_session_size(uint8_t dev_id);
+rte_cryptodev_sym_get_header_session_size(void);
 
 /**
- * Get the size of the header session, for all registered drivers.
+ * Get the size of the asymmetric session header, for all registered drivers.
  *
  * @return
- *   Size of the symmetric eader session.
+ *   Size of the asymmetric header session.
  */
-unsigned int
-rte_cryptodev_sym_get_header_session_size(void);
+unsigned int __rte_experimental
+rte_cryptodev_asym_get_header_session_size(void);
 
 /**
  * Get the size of the private symmetric session data
@@ -1020,40 +1126,17 @@ unsigned int
 rte_cryptodev_sym_get_private_session_size(uint8_t dev_id);
 
 /**
- * @deprecated
- * Attach queue pair with sym session.
- *
- * @param	dev_id		Device to which the session will be attached.
- * @param	qp_id		Queue pair to which the session will be attached.
- * @param	session		Session pointer previously allocated by
- *				*rte_cryptodev_sym_session_create*.
- *
- * @return
- *  - On success, zero.
- *  - On failure, a negative value.
- */
-__rte_deprecated
-int
-rte_cryptodev_queue_pair_attach_sym_session(uint8_t dev_id, uint16_t qp_id,
-		struct rte_cryptodev_sym_session *session);
-
-/**
- * @deprecated
- * Detach queue pair with sym session.
+ * Get the size of the private data for asymmetric session
+ * on device
  *
- * @param	dev_id		Device to which the session is attached.
- * @param	qp_id		Queue pair to which the session is attached.
- * @param	session		Session pointer previously allocated by
- *				*rte_cryptodev_sym_session_create*.
+ * @param	dev_id		The device identifier.
  *
  * @return
- *  - On success, zero.
- *  - On failure, a negative value.
+ *   - Size of the asymmetric private data, if successful
+ *   - 0 if device is invalid or does not have private session
  */
-__rte_deprecated
-int
-rte_cryptodev_queue_pair_detach_sym_session(uint8_t dev_id, uint16_t qp_id,
-		struct rte_cryptodev_sym_session *session);
+unsigned int __rte_experimental
+rte_cryptodev_asym_get_private_session_size(uint8_t dev_id);
 
 /**
  * Provide driver identifier.
@@ -1076,35 +1159,35 @@ int rte_cryptodev_driver_id_get(const char *name);
 const char *rte_cryptodev_driver_name_get(uint8_t driver_id);
 
 /**
- * Set private data for a session.
+ * Store user data in a session.
  *
  * @param	sess		Session pointer allocated by
  *				*rte_cryptodev_sym_session_create*.
- * @param	data		Pointer to the private data.
- * @param	size		Size of the private data.
+ * @param	data		Pointer to the user data.
+ * @param	size		Size of the user data.
  *
  * @return
  *  - On success, zero.
  *  - On failure, a negative value.
  */
 int __rte_experimental
-rte_cryptodev_sym_session_set_private_data(
+rte_cryptodev_sym_session_set_user_data(
 					struct rte_cryptodev_sym_session *sess,
 					void *data,
 					uint16_t size);
 
 /**
- * Get private data of a session.
+ * Get user data stored in a session.
  *
  * @param	sess		Session pointer allocated by
  *				*rte_cryptodev_sym_session_create*.
  *
  * @return
- *  - On success return pointer to private data.
+ *  - On success return pointer to user data.
  *  - On failure returns NULL.
  */
 void * __rte_experimental
-rte_cryptodev_sym_session_get_private_data(
+rte_cryptodev_sym_session_get_user_data(
 					struct rte_cryptodev_sym_session *sess);
 
 #ifdef __cplusplus
diff --git a/lib/librte_cryptodev/rte_cryptodev_pmd.c b/lib/librte_cryptodev/rte_cryptodev_pmd.c
index f2aac24b..2088ac3f 100644
--- a/lib/librte_cryptodev/rte_cryptodev_pmd.c
+++ b/lib/librte_cryptodev/rte_cryptodev_pmd.c
@@ -66,13 +66,6 @@ rte_cryptodev_pmd_parse_input_args(
 			goto free_kvlist;
 
 		ret = rte_kvargs_process(kvlist,
-				RTE_CRYPTODEV_PMD_MAX_NB_SESS_ARG,
-				&rte_cryptodev_pmd_parse_uint_arg,
-				&params->max_nb_sessions);
-		if (ret < 0)
-			goto free_kvlist;
-
-		ret = rte_kvargs_process(kvlist,
 				RTE_CRYPTODEV_PMD_SOCKET_ID_ARG,
 				&rte_cryptodev_pmd_parse_uint_arg,
 				&params->socket_id);
@@ -109,10 +102,9 @@ rte_cryptodev_pmd_create(const char *name,
 			device->driver->name, name);
 
 	CDEV_LOG_INFO("[%s] - Initialisation parameters - name: %s,"
-			"socket id: %d, max queue pairs: %u, max sessions: %u",
+			"socket id: %d, max queue pairs: %u",
 			device->driver->name, name,
-			params->socket_id, params->max_nb_queue_pairs,
-			params->max_nb_sessions);
+			params->socket_id, params->max_nb_queue_pairs);
 
 	/* allocate device structure */
 	cryptodev = rte_cryptodev_pmd_allocate(name, params->socket_id);
diff --git a/lib/librte_cryptodev/rte_cryptodev_pmd.h b/lib/librte_cryptodev/rte_cryptodev_pmd.h
index 69d77693..6ff49d64 100644
--- a/lib/librte_cryptodev/rte_cryptodev_pmd.h
+++ b/lib/librte_cryptodev/rte_cryptodev_pmd.h
@@ -1,32 +1,5 @@
-/*-
- *
- *   Copyright(c) 2015-2016 Intel Corporation. All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015-2016 Intel Corporation.
  */
 
 #ifndef _RTE_CRYPTODEV_PMD_H_
@@ -59,18 +32,15 @@ extern "C" {
 
 
 #define RTE_CRYPTODEV_PMD_DEFAULT_MAX_NB_QUEUE_PAIRS	8
-#define RTE_CRYPTODEV_PMD_DEFAULT_MAX_NB_SESSIONS	2048
 
 #define RTE_CRYPTODEV_PMD_NAME_ARG			("name")
 #define RTE_CRYPTODEV_PMD_MAX_NB_QP_ARG			("max_nb_queue_pairs")
-#define RTE_CRYPTODEV_PMD_MAX_NB_SESS_ARG		("max_nb_sessions")
 #define RTE_CRYPTODEV_PMD_SOCKET_ID_ARG			("socket_id")
 
 
 static const char * const cryptodev_pmd_valid_params[] = {
 	RTE_CRYPTODEV_PMD_NAME_ARG,
 	RTE_CRYPTODEV_PMD_MAX_NB_QP_ARG,
-	RTE_CRYPTODEV_PMD_MAX_NB_SESS_ARG,
 	RTE_CRYPTODEV_PMD_SOCKET_ID_ARG
 };
 
@@ -83,7 +53,6 @@ struct rte_cryptodev_pmd_init_params {
 	size_t private_data_size;
 	int socket_id;
 	unsigned int max_nb_queue_pairs;
-	unsigned int max_nb_sessions;
 };
 
 /** Global structure used for maintaining state of allocated crypto devices */
@@ -216,28 +185,6 @@ typedef void (*cryptodev_info_get_t)(struct rte_cryptodev *dev,
 				struct rte_cryptodev_info *dev_info);
 
 /**
- * Start queue pair of a device.
- *
- * @param	dev	Crypto device pointer
- * @param	qp_id	Queue Pair Index
- *
- * @return	Returns 0 on success.
- */
-typedef int (*cryptodev_queue_pair_start_t)(struct rte_cryptodev *dev,
-				uint16_t qp_id);
-
-/**
- * Stop queue pair of a device.
- *
- * @param	dev	Crypto device pointer
- * @param	qp_id	Queue Pair Index
- *
- * @return	Returns 0 on success.
- */
-typedef int (*cryptodev_queue_pair_stop_t)(struct rte_cryptodev *dev,
-				uint16_t qp_id);
-
-/**
  * Setup a queue pair for a device.
  *
  * @param	dev		Crypto device pointer
@@ -302,6 +249,17 @@ typedef int (*cryptodev_sym_create_session_pool_t)(
  */
 typedef unsigned (*cryptodev_sym_get_session_private_size_t)(
 		struct rte_cryptodev *dev);
+/**
+ * Get the size of a asymmetric cryptodev session
+ *
+ * @param	dev		Crypto device pointer
+ *
+ * @return
+ *  - On success returns the size of the session structure for device
+ *  - On failure returns 0
+ */
+typedef unsigned int (*cryptodev_asym_get_session_private_size_t)(
+		struct rte_cryptodev *dev);
 
 /**
  * Configure a Crypto session on a device.
@@ -321,7 +279,24 @@ typedef int (*cryptodev_sym_configure_session_t)(struct rte_cryptodev *dev,
 		struct rte_crypto_sym_xform *xform,
 		struct rte_cryptodev_sym_session *session,
 		struct rte_mempool *mp);
-
+/**
+ * Configure a Crypto asymmetric session on a device.
+ *
+ * @param	dev		Crypto device pointer
+ * @param	xform		Single or chain of crypto xforms
+ * @param	priv_sess	Pointer to cryptodev's private session structure
+ * @param	mp		Mempool where the private session is allocated
+ *
+ * @return
+ *  - Returns 0 if private session structure have been created successfully.
+ *  - Returns -EINVAL if input parameters are invalid.
+ *  - Returns -ENOTSUP if crypto device does not support the crypto transform.
+ *  - Returns -ENOMEM if the private session could not be allocated.
+ */
+typedef int (*cryptodev_asym_configure_session_t)(struct rte_cryptodev *dev,
+		struct rte_crypto_asym_xform *xform,
+		struct rte_cryptodev_asym_session *session,
+		struct rte_mempool *mp);
 /**
  * Free driver private session data.
  *
@@ -330,32 +305,14 @@ typedef int (*cryptodev_sym_configure_session_t)(struct rte_cryptodev *dev,
  */
 typedef void (*cryptodev_sym_free_session_t)(struct rte_cryptodev *dev,
 		struct rte_cryptodev_sym_session *sess);
-
-/**
- * Optional API for drivers to attach sessions with queue pair.
- * @param	dev		Crypto device pointer
- * @param	qp_id		queue pair id for attaching session
- * @param	priv_sess       Pointer to cryptodev's private session structure
- * @return
- *  - Return 0 on success
- */
-typedef int (*cryptodev_sym_queue_pair_attach_session_t)(
-		  struct rte_cryptodev *dev,
-		  uint16_t qp_id,
-		  void *session_private);
-
 /**
- * Optional API for drivers to detach sessions from queue pair.
+ * Free asymmetric session private data.
+ *
  * @param	dev		Crypto device pointer
- * @param	qp_id		queue pair id for detaching session
- * @param	priv_sess       Pointer to cryptodev's private session structure
- * @return
- *  - Return 0 on success
+ * @param	sess		Cryptodev session structure
  */
-typedef int (*cryptodev_sym_queue_pair_detach_session_t)(
-		  struct rte_cryptodev *dev,
-		  uint16_t qp_id,
-		  void *session_private);
+typedef void (*cryptodev_asym_free_session_t)(struct rte_cryptodev *dev,
+		struct rte_cryptodev_asym_session *sess);
 
 /** Crypto device operations function pointer table */
 struct rte_cryptodev_ops {
@@ -375,23 +332,21 @@ struct rte_cryptodev_ops {
 	/**< Set up a device queue pair. */
 	cryptodev_queue_pair_release_t queue_pair_release;
 	/**< Release a queue pair. */
-	cryptodev_queue_pair_start_t queue_pair_start;
-	/**< Start a queue pair. */
-	cryptodev_queue_pair_stop_t queue_pair_stop;
-	/**< Stop a queue pair. */
 	cryptodev_queue_pair_count_t queue_pair_count;
 	/**< Get count of the queue pairs. */
 
-	cryptodev_sym_get_session_private_size_t session_get_size;
+	cryptodev_sym_get_session_private_size_t sym_session_get_size;
 	/**< Return private session. */
-	cryptodev_sym_configure_session_t session_configure;
+	cryptodev_asym_get_session_private_size_t asym_session_get_size;
+	/**< Return asym session private size. */
+	cryptodev_sym_configure_session_t sym_session_configure;
 	/**< Configure a Crypto session. */
-	cryptodev_sym_free_session_t session_clear;
+	cryptodev_asym_configure_session_t asym_session_configure;
+	/**< Configure asymmetric Crypto session. */
+	cryptodev_sym_free_session_t sym_session_clear;
+	/**< Clear a Crypto sessions private data. */
+	cryptodev_asym_free_session_t asym_session_clear;
 	/**< Clear a Crypto sessions private data. */
-	cryptodev_sym_queue_pair_attach_session_t qp_attach_session;
-	/**< Attach session to queue pair. */
-	cryptodev_sym_queue_pair_detach_session_t qp_detach_session;
-	/**< Detach session from queue pair. */
 };
 
 
@@ -516,20 +471,32 @@ uint8_t rte_cryptodev_allocate_driver(struct cryptodev_driver *crypto_drv,
 
 
 #define RTE_PMD_REGISTER_CRYPTO_DRIVER(crypto_drv, drv, driver_id)\
-RTE_INIT(init_ ##driver_id);\
-static void init_ ##driver_id(void)\
+RTE_INIT(init_ ##driver_id)\
 {\
 	driver_id = rte_cryptodev_allocate_driver(&crypto_drv, &(drv));\
 }
 
 static inline void *
-get_session_private_data(const struct rte_cryptodev_sym_session *sess,
+get_sym_session_private_data(const struct rte_cryptodev_sym_session *sess,
+		uint8_t driver_id) {
+	return sess->sess_private_data[driver_id];
+}
+
+static inline void
+set_sym_session_private_data(struct rte_cryptodev_sym_session *sess,
+		uint8_t driver_id, void *private_data)
+{
+	sess->sess_private_data[driver_id] = private_data;
+}
+
+static inline void *
+get_asym_session_private_data(const struct rte_cryptodev_asym_session *sess,
 		uint8_t driver_id) {
 	return sess->sess_private_data[driver_id];
 }
 
 static inline void
-set_session_private_data(struct rte_cryptodev_sym_session *sess,
+set_asym_session_private_data(struct rte_cryptodev_asym_session *sess,
 		uint8_t driver_id, void *private_data)
 {
 	sess->sess_private_data[driver_id] = private_data;
diff --git a/lib/librte_cryptodev/rte_cryptodev_version.map b/lib/librte_cryptodev/rte_cryptodev_version.map
index be8f4c1a..7ca00735 100644
--- a/lib/librte_cryptodev/rte_cryptodev_version.map
+++ b/lib/librte_cryptodev/rte_cryptodev_version.map
@@ -22,8 +22,6 @@ DPDK_16.04 {
 	rte_cryptodev_stop;
 	rte_cryptodev_queue_pair_count;
 	rte_cryptodev_queue_pair_setup;
-	rte_cryptodev_queue_pair_start;
-	rte_cryptodev_queue_pair_stop;
 	rte_crypto_op_pool_create;
 
 	local: *;
@@ -52,8 +50,6 @@ DPDK_17.05 {
 
 	rte_cryptodev_get_auth_algo_enum;
 	rte_cryptodev_get_cipher_algo_enum;
-	rte_cryptodev_queue_pair_attach_sym_session;
-	rte_cryptodev_queue_pair_detach_sym_session;
 
 } DPDK_17.02;
 
@@ -65,8 +61,6 @@ DPDK_17.08 {
 	rte_cryptodev_driver_id_get;
 	rte_cryptodev_driver_name_get;
 	rte_cryptodev_get_aead_algo_enum;
-	rte_cryptodev_get_header_session_size;
-	rte_cryptodev_get_private_session_size;
 	rte_cryptodev_sym_capability_check_aead;
 	rte_cryptodev_sym_session_init;
 	rte_cryptodev_sym_session_clear;
@@ -97,6 +91,18 @@ DPDK_18.05 {
 EXPERIMENTAL {
         global:
 
-	rte_cryptodev_sym_session_get_private_data;
-	rte_cryptodev_sym_session_set_private_data;
+	rte_cryptodev_asym_capability_get;
+	rte_cryptodev_asym_get_header_session_size;
+	rte_cryptodev_asym_get_private_session_size;
+	rte_cryptodev_asym_get_xform_enum;
+	rte_cryptodev_asym_session_clear;
+	rte_cryptodev_asym_session_create;
+	rte_cryptodev_asym_session_free;
+	rte_cryptodev_asym_session_init;
+	rte_cryptodev_asym_xform_capability_check_modlen;
+	rte_cryptodev_asym_xform_capability_check_optype;
+	rte_cryptodev_sym_session_get_user_data;
+	rte_cryptodev_sym_session_set_user_data;
+	rte_crypto_asym_op_strings;
+	rte_crypto_asym_xform_strings;
 };
diff --git a/lib/librte_eal/bsdapp/eal/Makefile b/lib/librte_eal/bsdapp/eal/Makefile
index 3fd33f1e..d27da3d1 100644
--- a/lib/librte_eal/bsdapp/eal/Makefile
+++ b/lib/librte_eal/bsdapp/eal/Makefile
@@ -18,10 +18,11 @@ CFLAGS += $(WERROR_FLAGS) -O3
 LDLIBS += -lexecinfo
 LDLIBS += -lpthread
 LDLIBS += -lgcc_s
+LDLIBS += -lrte_kvargs
 
 EXPORT_MAP := ../../rte_eal_version.map
 
-LIBABIVER := 7
+LIBABIVER := 8
 
 # specific to bsdapp exec-env
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) := eal.c
@@ -52,12 +53,14 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_hypervisor.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_string_fns.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_hexdump.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_devargs.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_class.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_bus.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_dev.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_options.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_thread.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_proc.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_fbarray.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_uuid.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_malloc.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_elem.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_heap.c
diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c
index dc279542..d7ae9d68 100644
--- a/lib/librte_eal/bsdapp/eal/eal.c
+++ b/lib/librte_eal/bsdapp/eal/eal.c
@@ -147,19 +147,9 @@ eal_get_runtime_dir(void)
 }
 
 /* Return user provided mbuf pool ops name */
-const char * __rte_experimental
-rte_eal_mbuf_user_pool_ops(void)
-{
-	return internal_config.user_mbuf_pool_ops_name;
-}
-
-/* Return mbuf pool ops name */
 const char *
-rte_eal_mbuf_default_mempool_ops(void)
+rte_eal_mbuf_user_pool_ops(void)
 {
-	if (internal_config.user_mbuf_pool_ops_name == NULL)
-		return RTE_MBUF_DEFAULT_MEMPOOL_OPS;
-
 	return internal_config.user_mbuf_pool_ops_name;
 }
 
@@ -286,12 +276,17 @@ eal_proc_type_detect(void)
 	enum rte_proc_type_t ptype = RTE_PROC_PRIMARY;
 	const char *pathname = eal_runtime_config_path();
 
-	/* if we can open the file but not get a write-lock we are a secondary
-	 * process. NOTE: if we get a file handle back, we keep that open
-	 * and don't close it to prevent a race condition between multiple opens */
-	if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) &&
-			(fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0))
-		ptype = RTE_PROC_SECONDARY;
+	/* if there no shared config, there can be no secondary processes */
+	if (!internal_config.no_shconf) {
+		/* if we can open the file but not get a write-lock we are a
+		 * secondary process. NOTE: if we get a file handle back, we
+		 * keep that open and don't close it to prevent a race condition
+		 * between multiple opens.
+		 */
+		if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) &&
+				(fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0))
+			ptype = RTE_PROC_SECONDARY;
+	}
 
 	RTE_LOG(INFO, EAL, "Auto-detected process type: %s\n",
 			ptype == RTE_PROC_PRIMARY ? "PRIMARY" : "SECONDARY");
@@ -468,6 +463,14 @@ eal_parse_args(int argc, char **argv)
 		}
 	}
 
+	/* create runtime data directory */
+	if (internal_config.no_shconf == 0 &&
+			eal_create_runtime_dir() < 0) {
+		RTE_LOG(ERR, EAL, "Cannot create runtime directory\n");
+		ret = -1;
+		goto out;
+	}
+
 	if (eal_adjust_config(&internal_config) != 0) {
 		ret = -1;
 		goto out;
@@ -600,13 +603,6 @@ rte_eal_init(int argc, char **argv)
 		return -1;
 	}
 
-	/* create runtime data directory */
-	if (eal_create_runtime_dir() < 0) {
-		rte_eal_init_alert("Cannot create runtime directory\n");
-		rte_errno = EACCES;
-		return -1;
-	}
-
 	/* FreeBSD always uses legacy memory model */
 	internal_config.legacy_mem = true;
 
@@ -625,6 +621,11 @@ rte_eal_init(int argc, char **argv)
 
 	rte_config_init();
 
+	if (rte_eal_intr_init() < 0) {
+		rte_eal_init_alert("Cannot init interrupt-handling thread\n");
+		return -1;
+	}
+
 	/* Put mp channel init before bus scan so that we can init the vdev
 	 * bus through mp channel in the secondary process before the bus scan.
 	 */
@@ -713,11 +714,6 @@ rte_eal_init(int argc, char **argv)
 		return -1;
 	}
 
-	if (rte_eal_intr_init() < 0) {
-		rte_eal_init_alert("Cannot init interrupt-handling thread\n");
-		return -1;
-	}
-
 	if (rte_eal_timer_init() < 0) {
 		rte_eal_init_alert("Cannot init HPET or TSC timers\n");
 		rte_errno = ENOTSUP;
@@ -866,21 +862,21 @@ int rte_vfio_clear_group(__rte_unused int vfio_group_fd)
 	return 0;
 }
 
-int __rte_experimental
+int
 rte_vfio_dma_map(uint64_t __rte_unused vaddr, __rte_unused uint64_t iova,
 		  __rte_unused uint64_t len)
 {
 	return -1;
 }
 
-int __rte_experimental
+int
 rte_vfio_dma_unmap(uint64_t __rte_unused vaddr, uint64_t __rte_unused iova,
 		    __rte_unused uint64_t len)
 {
 	return -1;
 }
 
-int __rte_experimental
+int
 rte_vfio_get_group_num(__rte_unused const char *sysfs_base,
 		       __rte_unused const char *dev_addr,
 		       __rte_unused int *iommu_group_num)
@@ -888,45 +884,45 @@ rte_vfio_get_group_num(__rte_unused const char *sysfs_base,
 	return -1;
 }
 
-int  __rte_experimental
+int
 rte_vfio_get_container_fd(void)
 {
 	return -1;
 }
 
-int  __rte_experimental
+int
 rte_vfio_get_group_fd(__rte_unused int iommu_group_num)
 {
 	return -1;
 }
 
-int __rte_experimental
+int
 rte_vfio_container_create(void)
 {
 	return -1;
 }
 
-int __rte_experimental
+int
 rte_vfio_container_destroy(__rte_unused int container_fd)
 {
 	return -1;
 }
 
-int __rte_experimental
+int
 rte_vfio_container_group_bind(__rte_unused int container_fd,
 		__rte_unused int iommu_group_num)
 {
 	return -1;
 }
 
-int __rte_experimental
+int
 rte_vfio_container_group_unbind(__rte_unused int container_fd,
 		__rte_unused int iommu_group_num)
 {
 	return -1;
 }
 
-int __rte_experimental
+int
 rte_vfio_container_dma_map(__rte_unused int container_fd,
 			__rte_unused uint64_t vaddr,
 			__rte_unused uint64_t iova,
@@ -935,7 +931,7 @@ rte_vfio_container_dma_map(__rte_unused int container_fd,
 	return -1;
 }
 
-int __rte_experimental
+int
 rte_vfio_container_dma_unmap(__rte_unused int container_fd,
 			__rte_unused uint64_t vaddr,
 			__rte_unused uint64_t iova,
diff --git a/lib/librte_eal/bsdapp/eal/eal_alarm.c b/lib/librte_eal/bsdapp/eal/eal_alarm.c
index eb3913c9..51ea4b8c 100644
--- a/lib/librte_eal/bsdapp/eal/eal_alarm.c
+++ b/lib/librte_eal/bsdapp/eal/eal_alarm.c
@@ -1,31 +1,314 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2010-2014 Intel Corporation
+ * Copyright(c) 2010-2018 Intel Corporation
  */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
 #include <stdlib.h>
+#include <string.h>
+#include <time.h>
 #include <errno.h>
 
 #include <rte_alarm.h>
+#include <rte_cycles.h>
 #include <rte_common.h>
+#include <rte_errno.h>
+#include <rte_interrupts.h>
+#include <rte_spinlock.h>
+
 #include "eal_private.h"
+#include "eal_alarm_private.h"
+
+#define NS_PER_US 1000
+
+#ifdef CLOCK_MONOTONIC_RAW /* Defined in glibc bits/time.h */
+#define CLOCK_TYPE_ID CLOCK_MONOTONIC_RAW
+#else
+#define CLOCK_TYPE_ID CLOCK_MONOTONIC
+#endif
+
+struct alarm_entry {
+	LIST_ENTRY(alarm_entry) next;
+	struct rte_intr_handle handle;
+	struct timespec time;
+	rte_eal_alarm_callback cb_fn;
+	void *cb_arg;
+	volatile uint8_t executing;
+	volatile pthread_t executing_id;
+};
+
+static LIST_HEAD(alarm_list, alarm_entry) alarm_list = LIST_HEAD_INITIALIZER();
+static rte_spinlock_t alarm_list_lk = RTE_SPINLOCK_INITIALIZER;
+
+static struct rte_intr_handle intr_handle = {.fd = -1 };
+static void eal_alarm_callback(void *arg);
 
 int
 rte_eal_alarm_init(void)
 {
+	intr_handle.type = RTE_INTR_HANDLE_ALARM;
+
+	/* on FreeBSD, timers don't use fd's, and their identifiers are stored
+	 * in separate namespace from fd's, so using any value is OK. however,
+	 * EAL interrupts handler expects fd's to be unique, so use an actual fd
+	 * to guarantee unique timer identifier.
+	 */
+	intr_handle.fd = open("/dev/zero", O_RDONLY);
+
+	return 0;
+}
+
+static inline int
+timespec_cmp(const struct timespec *now, const struct timespec *at)
+{
+	if (now->tv_sec < at->tv_sec)
+		return -1;
+	if (now->tv_sec > at->tv_sec)
+		return 1;
+	if (now->tv_nsec < at->tv_nsec)
+		return -1;
+	if (now->tv_nsec > at->tv_nsec)
+		return 1;
 	return 0;
 }
 
+static inline uint64_t
+diff_ns(struct timespec *now, struct timespec *at)
+{
+	uint64_t now_ns, at_ns;
+
+	if (timespec_cmp(now, at) >= 0)
+		return 0;
+
+	now_ns = now->tv_sec * NS_PER_S + now->tv_nsec;
+	at_ns = at->tv_sec * NS_PER_S + at->tv_nsec;
+
+	return at_ns - now_ns;
+}
 
 int
-rte_eal_alarm_set(uint64_t us __rte_unused,
-		rte_eal_alarm_callback cb_fn __rte_unused,
-		void *cb_arg __rte_unused)
+eal_alarm_get_timeout_ns(uint64_t *val)
 {
-	return -ENOTSUP;
+	struct alarm_entry *ap;
+	struct timespec now;
+
+	if (clock_gettime(CLOCK_TYPE_ID, &now) < 0)
+		return -1;
+
+	if (LIST_EMPTY(&alarm_list))
+		return -1;
+
+	ap = LIST_FIRST(&alarm_list);
+
+	*val = diff_ns(&now, &ap->time);
+
+	return 0;
+}
+
+static int
+unregister_current_callback(void)
+{
+	struct alarm_entry *ap;
+	int ret = 0;
+
+	if (!LIST_EMPTY(&alarm_list)) {
+		ap = LIST_FIRST(&alarm_list);
+
+		do {
+			ret = rte_intr_callback_unregister(&intr_handle,
+				eal_alarm_callback, &ap->time);
+		} while (ret == -EAGAIN);
+	}
+
+	return ret;
 }
 
+static int
+register_first_callback(void)
+{
+	struct alarm_entry *ap;
+	int ret = 0;
+
+	if (!LIST_EMPTY(&alarm_list)) {
+		ap = LIST_FIRST(&alarm_list);
+
+		/* register a new callback */
+		ret = rte_intr_callback_register(&intr_handle,
+				eal_alarm_callback, &ap->time);
+	}
+	return ret;
+}
+
+static void
+eal_alarm_callback(void *arg __rte_unused)
+{
+	struct timespec now;
+	struct alarm_entry *ap;
+
+	rte_spinlock_lock(&alarm_list_lk);
+	ap = LIST_FIRST(&alarm_list);
+
+	if (clock_gettime(CLOCK_TYPE_ID, &now) < 0)
+		return;
+
+	while (ap != NULL && timespec_cmp(&now, &ap->time) >= 0) {
+		ap->executing = 1;
+		ap->executing_id = pthread_self();
+		rte_spinlock_unlock(&alarm_list_lk);
+
+		ap->cb_fn(ap->cb_arg);
+
+		rte_spinlock_lock(&alarm_list_lk);
+
+		LIST_REMOVE(ap, next);
+		free(ap);
+
+		ap = LIST_FIRST(&alarm_list);
+	}
+
+	/* timer has been deleted from the kqueue, so recreate it if needed */
+	register_first_callback();
+
+	rte_spinlock_unlock(&alarm_list_lk);
+}
+
+
 int
-rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn __rte_unused,
-		void *cb_arg __rte_unused)
+rte_eal_alarm_set(uint64_t us, rte_eal_alarm_callback cb_fn, void *cb_arg)
 {
-	return -ENOTSUP;
+	struct alarm_entry *ap, *new_alarm;
+	struct timespec now;
+	uint64_t ns;
+	int ret = 0;
+
+	/* check parameters, also ensure us won't cause a uint64_t overflow */
+	if (us < 1 || us > (UINT64_MAX - US_PER_S) || cb_fn == NULL)
+		return -EINVAL;
+
+	new_alarm = calloc(1, sizeof(*new_alarm));
+	if (new_alarm == NULL)
+		return -ENOMEM;
+
+	/* use current time to calculate absolute time of alarm */
+	clock_gettime(CLOCK_TYPE_ID, &now);
+
+	ns = us * NS_PER_US;
+
+	new_alarm->cb_fn = cb_fn;
+	new_alarm->cb_arg = cb_arg;
+	new_alarm->time.tv_nsec = (now.tv_nsec + ns) % NS_PER_S;
+	new_alarm->time.tv_sec = now.tv_sec + ((now.tv_nsec + ns) / NS_PER_S);
+
+	rte_spinlock_lock(&alarm_list_lk);
+
+	if (LIST_EMPTY(&alarm_list))
+		LIST_INSERT_HEAD(&alarm_list, new_alarm, next);
+	else {
+		LIST_FOREACH(ap, &alarm_list, next) {
+			if (timespec_cmp(&new_alarm->time, &ap->time) < 0) {
+				LIST_INSERT_BEFORE(ap, new_alarm, next);
+				break;
+			}
+			if (LIST_NEXT(ap, next) == NULL) {
+				LIST_INSERT_AFTER(ap, new_alarm, next);
+				break;
+			}
+		}
+	}
+
+	/* re-register first callback just in case */
+	register_first_callback();
+
+	rte_spinlock_unlock(&alarm_list_lk);
+
+	return ret;
+}
+
+int
+rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn, void *cb_arg)
+{
+	struct alarm_entry *ap, *ap_prev;
+	int count = 0;
+	int err = 0;
+	int executing;
+
+	if (!cb_fn) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	do {
+		executing = 0;
+		rte_spinlock_lock(&alarm_list_lk);
+		/* remove any matches at the start of the list */
+		while (1) {
+			ap = LIST_FIRST(&alarm_list);
+			if (ap == NULL)
+				break;
+			if (cb_fn != ap->cb_fn)
+				break;
+			if (cb_arg != ap->cb_arg && cb_arg != (void *) -1)
+				break;
+			if (ap->executing == 0) {
+				LIST_REMOVE(ap, next);
+				free(ap);
+				count++;
+			} else {
+				/* If calling from other context, mark that
+				 * alarm is executing so loop can spin till it
+				 * finish. Otherwise we are trying to cancel
+				 * ourselves - mark it by EINPROGRESS.
+				 */
+				if (pthread_equal(ap->executing_id,
+						pthread_self()) == 0)
+					executing++;
+				else
+					err = EINPROGRESS;
+
+				break;
+			}
+		}
+		ap_prev = ap;
+
+		/* now go through list, removing entries not at start */
+		LIST_FOREACH(ap, &alarm_list, next) {
+			/* this won't be true first time through */
+			if (cb_fn == ap->cb_fn &&
+					(cb_arg == (void *)-1 ||
+					 cb_arg == ap->cb_arg)) {
+				if (ap->executing == 0) {
+					LIST_REMOVE(ap, next);
+					free(ap);
+					count++;
+					ap = ap_prev;
+				} else if (pthread_equal(ap->executing_id,
+							 pthread_self()) == 0) {
+					executing++;
+				} else {
+					err = EINPROGRESS;
+				}
+			}
+			ap_prev = ap;
+		}
+		rte_spinlock_unlock(&alarm_list_lk);
+	} while (executing != 0);
+
+	if (count == 0 && err == 0)
+		rte_errno = ENOENT;
+	else if (err)
+		rte_errno = err;
+
+	rte_spinlock_lock(&alarm_list_lk);
+
+	/* unregister if no alarms left, otherwise re-register first */
+	if (LIST_EMPTY(&alarm_list))
+		unregister_current_callback();
+	else
+		register_first_callback();
+
+	rte_spinlock_unlock(&alarm_list_lk);
+
+	return count;
 }
diff --git a/lib/librte_eal/bsdapp/eal/eal_alarm_private.h b/lib/librte_eal/bsdapp/eal/eal_alarm_private.h
new file mode 100644
index 00000000..65c71151
--- /dev/null
+++ b/lib/librte_eal/bsdapp/eal/eal_alarm_private.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef EAL_ALARM_PRIVATE_H
+#define EAL_ALARM_PRIVATE_H
+
+#include <inttypes.h>
+
+/*
+ * FreeBSD needs a back-channel communication mechanism between interrupt and
+ * alarm thread, because on FreeBSD, timer period is set up inside the interrupt
+ * API and not inside alarm API like on Linux.
+ */
+
+int
+eal_alarm_get_timeout_ns(uint64_t *val);
+
+#endif // EAL_ALARM_PRIVATE_H
diff --git a/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c b/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c
index 836feb67..1e8f5df2 100644
--- a/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c
+++ b/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c
@@ -101,6 +101,10 @@ eal_hugepage_info_init(void)
 	hpi->num_pages[0] = num_buffers;
 	hpi->lock_descriptor = fd;
 
+	/* for no shared files mode, do not create shared memory config */
+	if (internal_config.no_shconf)
+		return 0;
+
 	tmp_hpi = create_shared_memory(eal_hugepage_info_path(),
 			sizeof(internal_config.hugepage_info));
 	if (tmp_hpi == NULL ) {
diff --git a/lib/librte_eal/bsdapp/eal/eal_interrupts.c b/lib/librte_eal/bsdapp/eal/eal_interrupts.c
index 290d53ab..2feee2d5 100644
--- a/lib/librte_eal/bsdapp/eal/eal_interrupts.c
+++ b/lib/librte_eal/bsdapp/eal/eal_interrupts.c
@@ -1,51 +1,479 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2010-2014 Intel Corporation
+ * Copyright(c) 2010-2018 Intel Corporation
  */
 
+#include <string.h>
+#include <sys/types.h>
+#include <sys/event.h>
+#include <sys/queue.h>
+#include <unistd.h>
+
+#include <rte_errno.h>
+#include <rte_lcore.h>
+#include <rte_spinlock.h>
 #include <rte_common.h>
 #include <rte_interrupts.h>
+
 #include "eal_private.h"
+#include "eal_alarm_private.h"
+
+#define MAX_INTR_EVENTS 16
+
+/**
+ * union buffer for reading on different devices
+ */
+union rte_intr_read_buffer {
+	char charbuf[16];                /* for others */
+};
+
+TAILQ_HEAD(rte_intr_cb_list, rte_intr_callback);
+TAILQ_HEAD(rte_intr_source_list, rte_intr_source);
+
+struct rte_intr_callback {
+	TAILQ_ENTRY(rte_intr_callback) next;
+	rte_intr_callback_fn cb_fn;  /**< callback address */
+	void *cb_arg;                /**< parameter for callback */
+};
+
+struct rte_intr_source {
+	TAILQ_ENTRY(rte_intr_source) next;
+	struct rte_intr_handle intr_handle; /**< interrupt handle */
+	struct rte_intr_cb_list callbacks;  /**< user callbacks */
+	uint32_t active;
+};
+
+/* global spinlock for interrupt data operation */
+static rte_spinlock_t intr_lock = RTE_SPINLOCK_INITIALIZER;
+
+/* interrupt sources list */
+static struct rte_intr_source_list intr_sources;
+
+/* interrupt handling thread */
+static pthread_t intr_thread;
+
+static volatile int kq = -1;
+
+static int
+intr_source_to_kevent(const struct rte_intr_handle *ih, struct kevent *ke)
+{
+	/* alarm callbacks are special case */
+	if (ih->type == RTE_INTR_HANDLE_ALARM) {
+		uint64_t timeout_ns;
+
+		/* get soonest alarm timeout */
+		if (eal_alarm_get_timeout_ns(&timeout_ns) < 0)
+			return -1;
+
+		ke->filter = EVFILT_TIMER;
+		/* timers are one shot */
+		ke->flags |= EV_ONESHOT;
+		ke->fflags = NOTE_NSECONDS;
+		ke->data = timeout_ns;
+	} else {
+		ke->filter = EVFILT_READ;
+	}
+	ke->ident = ih->fd;
+
+	return 0;
+}
 
 int
 rte_intr_callback_register(const struct rte_intr_handle *intr_handle,
-			rte_intr_callback_fn cb,
-			void *cb_arg)
+		rte_intr_callback_fn cb, void *cb_arg)
 {
-	RTE_SET_USED(intr_handle);
-	RTE_SET_USED(cb);
-	RTE_SET_USED(cb_arg);
+	struct rte_intr_callback *callback = NULL;
+	struct rte_intr_source *src = NULL;
+	int ret, add_event;
 
-	return -ENOTSUP;
+	/* first do parameter checking */
+	if (intr_handle == NULL || intr_handle->fd < 0 || cb == NULL) {
+		RTE_LOG(ERR, EAL,
+			"Registering with invalid input parameter\n");
+		return -EINVAL;
+	}
+	if (kq < 0) {
+		RTE_LOG(ERR, EAL, "Kqueue is not active: %d\n", kq);
+		return -ENODEV;
+	}
+
+	/* allocate a new interrupt callback entity */
+	callback = calloc(1, sizeof(*callback));
+	if (callback == NULL) {
+		RTE_LOG(ERR, EAL, "Can not allocate memory\n");
+		return -ENOMEM;
+	}
+	callback->cb_fn = cb;
+	callback->cb_arg = cb_arg;
+
+	rte_spinlock_lock(&intr_lock);
+
+	/* check if there is at least one callback registered for the fd */
+	TAILQ_FOREACH(src, &intr_sources, next) {
+		if (src->intr_handle.fd == intr_handle->fd) {
+			/* we had no interrupts for this */
+			if (TAILQ_EMPTY(&src->callbacks))
+				add_event = 1;
+
+			TAILQ_INSERT_TAIL(&(src->callbacks), callback, next);
+			ret = 0;
+			break;
+		}
+	}
+
+	/* no existing callbacks for this - add new source */
+	if (src == NULL) {
+		src = calloc(1, sizeof(*src));
+		if (src == NULL) {
+			RTE_LOG(ERR, EAL, "Can not allocate memory\n");
+			ret = -ENOMEM;
+			goto fail;
+		} else {
+			src->intr_handle = *intr_handle;
+			TAILQ_INIT(&src->callbacks);
+			TAILQ_INSERT_TAIL(&(src->callbacks), callback, next);
+			TAILQ_INSERT_TAIL(&intr_sources, src, next);
+			add_event = 1;
+			ret = 0;
+		}
+	}
+
+	/* add events to the queue. timer events are special as we need to
+	 * re-set the timer.
+	 */
+	if (add_event || src->intr_handle.type == RTE_INTR_HANDLE_ALARM) {
+		struct kevent ke;
+
+		memset(&ke, 0, sizeof(ke));
+		ke.flags = EV_ADD; /* mark for addition to the queue */
+
+		if (intr_source_to_kevent(intr_handle, &ke) < 0) {
+			RTE_LOG(ERR, EAL, "Cannot convert interrupt handle to kevent\n");
+			ret = -ENODEV;
+			goto fail;
+		}
+
+		/**
+		 * add the intr file descriptor into wait list.
+		 */
+		if (kevent(kq, &ke, 1, NULL, 0, NULL) < 0) {
+			/* currently, nic_uio does not support interrupts, so
+			 * this error will always be triggered and output to the
+			 * user. so, don't output it unless debug log level set.
+			 */
+			if (errno == ENODEV)
+				RTE_LOG(DEBUG, EAL, "Interrupt handle %d not supported\n",
+					src->intr_handle.fd);
+			else
+				RTE_LOG(ERR, EAL, "Error adding fd %d "
+						"kevent, %s\n",
+						src->intr_handle.fd,
+						strerror(errno));
+			ret = -errno;
+			goto fail;
+		}
+	}
+	rte_spinlock_unlock(&intr_lock);
+
+	return ret;
+fail:
+	/* clean up */
+	if (src != NULL) {
+		TAILQ_REMOVE(&(src->callbacks), callback, next);
+		if (TAILQ_EMPTY(&(src->callbacks))) {
+			TAILQ_REMOVE(&intr_sources, src, next);
+			free(src);
+		}
+	}
+	free(callback);
+	rte_spinlock_unlock(&intr_lock);
+	return ret;
 }
 
 int
 rte_intr_callback_unregister(const struct rte_intr_handle *intr_handle,
-			rte_intr_callback_fn cb,
-			void *cb_arg)
+		rte_intr_callback_fn cb_fn, void *cb_arg)
 {
-	RTE_SET_USED(intr_handle);
-	RTE_SET_USED(cb);
-	RTE_SET_USED(cb_arg);
+	int ret;
+	struct rte_intr_source *src;
+	struct rte_intr_callback *cb, *next;
 
-	return -ENOTSUP;
+	/* do parameter checking first */
+	if (intr_handle == NULL || intr_handle->fd < 0) {
+		RTE_LOG(ERR, EAL,
+		"Unregistering with invalid input parameter\n");
+		return -EINVAL;
+	}
+	if (kq < 0) {
+		RTE_LOG(ERR, EAL, "Kqueue is not active\n");
+		return -ENODEV;
+	}
+
+	rte_spinlock_lock(&intr_lock);
+
+	/* check if the insterrupt source for the fd is existent */
+	TAILQ_FOREACH(src, &intr_sources, next)
+		if (src->intr_handle.fd == intr_handle->fd)
+			break;
+
+	/* No interrupt source registered for the fd */
+	if (src == NULL) {
+		ret = -ENOENT;
+
+	/* interrupt source has some active callbacks right now. */
+	} else if (src->active != 0) {
+		ret = -EAGAIN;
+
+	/* ok to remove. */
+	} else {
+		struct kevent ke;
+
+		ret = 0;
+
+		/* remove it from the kqueue */
+		memset(&ke, 0, sizeof(ke));
+		ke.flags = EV_DELETE; /* mark for deletion from the queue */
+
+		if (intr_source_to_kevent(intr_handle, &ke) < 0) {
+			RTE_LOG(ERR, EAL, "Cannot convert to kevent\n");
+			ret = -ENODEV;
+			goto out;
+		}
+
+		/**
+		 * remove intr file descriptor from wait list.
+		 */
+		if (kevent(kq, &ke, 1, NULL, 0, NULL) < 0) {
+			RTE_LOG(ERR, EAL, "Error removing fd %d kevent, %s\n",
+				src->intr_handle.fd, strerror(errno));
+			/* removing non-existent even is an expected condition
+			 * in some circumstances (e.g. oneshot events).
+			 */
+		}
+
+		/*walk through the callbacks and remove all that match. */
+		for (cb = TAILQ_FIRST(&src->callbacks); cb != NULL; cb = next) {
+			next = TAILQ_NEXT(cb, next);
+			if (cb->cb_fn == cb_fn && (cb_arg == (void *)-1 ||
+					cb->cb_arg == cb_arg)) {
+				TAILQ_REMOVE(&src->callbacks, cb, next);
+				free(cb);
+				ret++;
+			}
+		}
+
+		/* all callbacks for that source are removed. */
+		if (TAILQ_EMPTY(&src->callbacks)) {
+			TAILQ_REMOVE(&intr_sources, src, next);
+			free(src);
+		}
+	}
+out:
+	rte_spinlock_unlock(&intr_lock);
+
+	return ret;
 }
 
 int
-rte_intr_enable(const struct rte_intr_handle *intr_handle __rte_unused)
+rte_intr_enable(const struct rte_intr_handle *intr_handle)
 {
-	return -ENOTSUP;
+	if (intr_handle && intr_handle->type == RTE_INTR_HANDLE_VDEV)
+		return 0;
+
+	if (!intr_handle || intr_handle->fd < 0 || intr_handle->uio_cfg_fd < 0)
+		return -1;
+
+	switch (intr_handle->type) {
+	/* not used at this moment */
+	case RTE_INTR_HANDLE_ALARM:
+		return -1;
+	/* not used at this moment */
+	case RTE_INTR_HANDLE_DEV_EVENT:
+		return -1;
+	/* unknown handle type */
+	default:
+		RTE_LOG(ERR, EAL,
+			"Unknown handle type of fd %d\n",
+					intr_handle->fd);
+		return -1;
+	}
+
+	return 0;
 }
 
 int
-rte_intr_disable(const struct rte_intr_handle *intr_handle __rte_unused)
+rte_intr_disable(const struct rte_intr_handle *intr_handle)
 {
-	return -ENOTSUP;
+	if (intr_handle && intr_handle->type == RTE_INTR_HANDLE_VDEV)
+		return 0;
+
+	if (!intr_handle || intr_handle->fd < 0 || intr_handle->uio_cfg_fd < 0)
+		return -1;
+
+	switch (intr_handle->type) {
+	/* not used at this moment */
+	case RTE_INTR_HANDLE_ALARM:
+		return -1;
+	/* not used at this moment */
+	case RTE_INTR_HANDLE_DEV_EVENT:
+		return -1;
+	/* unknown handle type */
+	default:
+		RTE_LOG(ERR, EAL,
+			"Unknown handle type of fd %d\n",
+					intr_handle->fd);
+		return -1;
+	}
+
+	return 0;
+}
+
+static void
+eal_intr_process_interrupts(struct kevent *events, int nfds)
+{
+	struct rte_intr_callback active_cb;
+	union rte_intr_read_buffer buf;
+	struct rte_intr_callback *cb;
+	struct rte_intr_source *src;
+	bool call = false;
+	int n, bytes_read;
+
+	for (n = 0; n < nfds; n++) {
+		int event_fd = events[n].ident;
+
+		rte_spinlock_lock(&intr_lock);
+		TAILQ_FOREACH(src, &intr_sources, next)
+			if (src->intr_handle.fd == event_fd)
+				break;
+		if (src == NULL) {
+			rte_spinlock_unlock(&intr_lock);
+			continue;
+		}
+
+		/* mark this interrupt source as active and release the lock. */
+		src->active = 1;
+		rte_spinlock_unlock(&intr_lock);
+
+		/* set the length to be read dor different handle type */
+		switch (src->intr_handle.type) {
+		case RTE_INTR_HANDLE_ALARM:
+			bytes_read = 0;
+			call = true;
+			break;
+		case RTE_INTR_HANDLE_VDEV:
+		case RTE_INTR_HANDLE_EXT:
+			bytes_read = 0;
+			call = true;
+			break;
+		case RTE_INTR_HANDLE_DEV_EVENT:
+			bytes_read = 0;
+			call = true;
+			break;
+		default:
+			bytes_read = 1;
+			break;
+		}
+
+		if (bytes_read > 0) {
+			/**
+			 * read out to clear the ready-to-be-read flag
+			 * for epoll_wait.
+			 */
+			bytes_read = read(event_fd, &buf, bytes_read);
+			if (bytes_read < 0) {
+				if (errno == EINTR || errno == EWOULDBLOCK)
+					continue;
+
+				RTE_LOG(ERR, EAL, "Error reading from file "
+					"descriptor %d: %s\n",
+					event_fd,
+					strerror(errno));
+			} else if (bytes_read == 0)
+				RTE_LOG(ERR, EAL, "Read nothing from file "
+					"descriptor %d\n", event_fd);
+			else
+				call = true;
+		}
+
+		/* grab a lock, again to call callbacks and update status. */
+		rte_spinlock_lock(&intr_lock);
+
+		if (call) {
+			/* Finally, call all callbacks. */
+			TAILQ_FOREACH(cb, &src->callbacks, next) {
+
+				/* make a copy and unlock. */
+				active_cb = *cb;
+				rte_spinlock_unlock(&intr_lock);
+
+				/* call the actual callback */
+				active_cb.cb_fn(active_cb.cb_arg);
+
+				/*get the lock back. */
+				rte_spinlock_lock(&intr_lock);
+			}
+		}
+
+		/* we done with that interrupt source, release it. */
+		src->active = 0;
+		rte_spinlock_unlock(&intr_lock);
+	}
+}
+
+static void *
+eal_intr_thread_main(void *arg __rte_unused)
+{
+	struct kevent events[MAX_INTR_EVENTS];
+	int nfds;
+
+	/* host thread, never break out */
+	for (;;) {
+		/* do not change anything, just wait */
+		nfds = kevent(kq, NULL, 0, events, MAX_INTR_EVENTS, NULL);
+
+		/* kevent fail */
+		if (nfds < 0) {
+			if (errno == EINTR)
+				continue;
+			RTE_LOG(ERR, EAL,
+				"kevent returns with fail\n");
+			break;
+		}
+		/* kevent timeout, will never happen here */
+		else if (nfds == 0)
+			continue;
+
+		/* kevent has at least one fd ready to read */
+		eal_intr_process_interrupts(events, nfds);
+	}
+	close(kq);
+	kq = -1;
+	return NULL;
 }
 
 int
 rte_eal_intr_init(void)
 {
-	return 0;
+	int ret = 0;
+
+	/* init the global interrupt source head */
+	TAILQ_INIT(&intr_sources);
+
+	kq = kqueue();
+	if (kq < 0) {
+		RTE_LOG(ERR, EAL, "Cannot create kqueue instance\n");
+		return -1;
+	}
+
+	/* create the host thread to wait/handle the interrupt */
+	ret = rte_ctrl_thread_create(&intr_thread, "eal-intr-thread", NULL,
+			eal_intr_thread_main, NULL);
+	if (ret != 0) {
+		rte_errno = -ret;
+		RTE_LOG(ERR, EAL,
+			"Failed to create thread for interrupt handling\n");
+	}
+
+	return ret;
 }
 
 int
diff --git a/lib/librte_eal/bsdapp/eal/eal_memory.c b/lib/librte_eal/bsdapp/eal/eal_memory.c
index a5e03478..16d2bc7c 100644
--- a/lib/librte_eal/bsdapp/eal/eal_memory.c
+++ b/lib/librte_eal/bsdapp/eal/eal_memory.c
@@ -12,6 +12,7 @@
 
 #include <rte_eal.h>
 #include <rte_eal_memconfig.h>
+#include <rte_errno.h>
 #include <rte_log.h>
 #include <rte_string_fns.h>
 #include "eal_private.h"
@@ -104,6 +105,8 @@ rte_eal_hugepage_init(void)
 	/* map all hugepages and sort them */
 	for (i = 0; i < internal_config.num_hugepage_sizes; i ++){
 		struct hugepage_info *hpi;
+		rte_iova_t prev_end = 0;
+		int prev_ms_idx = -1;
 		uint64_t page_sz, mem_needed;
 		unsigned int n_pages, max_pages;
 
@@ -124,10 +127,27 @@ rte_eal_hugepage_init(void)
 			int error;
 			size_t sysctl_size = sizeof(physaddr);
 			char physaddr_str[64];
+			bool is_adjacent;
+
+			/* first, check if this segment is IOVA-adjacent to
+			 * the previous one.
+			 */
+			snprintf(physaddr_str, sizeof(physaddr_str),
+					"hw.contigmem.physaddr.%d", j);
+			error = sysctlbyname(physaddr_str, &physaddr,
+					&sysctl_size, NULL, 0);
+			if (error < 0) {
+				RTE_LOG(ERR, EAL, "Failed to get physical addr for buffer %u "
+						"from %s\n", j, hpi->hugedir);
+				return -1;
+			}
+
+			is_adjacent = prev_end != 0 && physaddr == prev_end;
+			prev_end = physaddr + hpi->hugepage_sz;
 
 			for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS;
 					msl_idx++) {
-				bool empty;
+				bool empty, need_hole;
 				msl = &mcfg->memsegs[msl_idx];
 				arr = &msl->memseg_arr;
 
@@ -136,20 +156,23 @@ rte_eal_hugepage_init(void)
 
 				empty = arr->count == 0;
 
-				/* we need 1, plus hole if not empty */
+				/* we need a hole if this isn't an empty memseg
+				 * list, and if previous segment was not
+				 * adjacent to current one.
+				 */
+				need_hole = !empty && !is_adjacent;
+
+				/* we need 1, plus hole if not adjacent */
 				ms_idx = rte_fbarray_find_next_n_free(arr,
-						0, 1 + (empty ? 1 : 0));
+						0, 1 + (need_hole ? 1 : 0));
 
 				/* memseg list is full? */
 				if (ms_idx < 0)
 					continue;
 
-				/* leave some space between memsegs, they are
-				 * not IOVA contiguous, so they shouldn't be VA
-				 * contiguous either.
-				 */
-				if (!empty)
+				if (need_hole && prev_ms_idx == ms_idx - 1)
 					ms_idx++;
+				prev_ms_idx = ms_idx;
 
 				break;
 			}
@@ -178,16 +201,6 @@ rte_eal_hugepage_init(void)
 				return -1;
 			}
 
-			snprintf(physaddr_str, sizeof(physaddr_str), "hw.contigmem"
-					".physaddr.%d", j);
-			error = sysctlbyname(physaddr_str, &physaddr, &sysctl_size,
-					NULL, 0);
-			if (error < 0) {
-				RTE_LOG(ERR, EAL, "Failed to get physical addr for buffer %u "
-						"from %s\n", j, hpi->hugedir);
-				return -1;
-			}
-
 			seg->addr = addr;
 			seg->iova = physaddr;
 			seg->hugepage_sz = page_sz;
@@ -200,7 +213,7 @@ rte_eal_hugepage_init(void)
 
 			RTE_LOG(INFO, EAL, "Mapped memory segment %u @ %p: physaddr:0x%"
 					PRIx64", len %zu\n",
-					seg_idx, addr, physaddr, page_sz);
+					seg_idx++, addr, physaddr, page_sz);
 
 			total_mem += seg->len;
 		}
@@ -288,3 +301,217 @@ rte_eal_using_phys_addrs(void)
 {
 	return 0;
 }
+
+static uint64_t
+get_mem_amount(uint64_t page_sz, uint64_t max_mem)
+{
+	uint64_t area_sz, max_pages;
+
+	/* limit to RTE_MAX_MEMSEG_PER_LIST pages or RTE_MAX_MEM_MB_PER_LIST */
+	max_pages = RTE_MAX_MEMSEG_PER_LIST;
+	max_mem = RTE_MIN((uint64_t)RTE_MAX_MEM_MB_PER_LIST << 20, max_mem);
+
+	area_sz = RTE_MIN(page_sz * max_pages, max_mem);
+
+	/* make sure the list isn't smaller than the page size */
+	area_sz = RTE_MAX(area_sz, page_sz);
+
+	return RTE_ALIGN(area_sz, page_sz);
+}
+
+#define MEMSEG_LIST_FMT "memseg-%" PRIu64 "k-%i-%i"
+static int
+alloc_memseg_list(struct rte_memseg_list *msl, uint64_t page_sz,
+		int n_segs, int socket_id, int type_msl_idx)
+{
+	char name[RTE_FBARRAY_NAME_LEN];
+
+	snprintf(name, sizeof(name), MEMSEG_LIST_FMT, page_sz >> 10, socket_id,
+		 type_msl_idx);
+	if (rte_fbarray_init(&msl->memseg_arr, name, n_segs,
+			sizeof(struct rte_memseg))) {
+		RTE_LOG(ERR, EAL, "Cannot allocate memseg list: %s\n",
+			rte_strerror(rte_errno));
+		return -1;
+	}
+
+	msl->page_sz = page_sz;
+	msl->socket_id = socket_id;
+	msl->base_va = NULL;
+
+	RTE_LOG(DEBUG, EAL, "Memseg list allocated: 0x%zxkB at socket %i\n",
+			(size_t)page_sz >> 10, socket_id);
+
+	return 0;
+}
+
+static int
+alloc_va_space(struct rte_memseg_list *msl)
+{
+	uint64_t page_sz;
+	size_t mem_sz;
+	void *addr;
+	int flags = 0;
+
+#ifdef RTE_ARCH_PPC_64
+	flags |= MAP_HUGETLB;
+#endif
+
+	page_sz = msl->page_sz;
+	mem_sz = page_sz * msl->memseg_arr.len;
+
+	addr = eal_get_virtual_area(msl->base_va, &mem_sz, page_sz, 0, flags);
+	if (addr == NULL) {
+		if (rte_errno == EADDRNOTAVAIL)
+			RTE_LOG(ERR, EAL, "Could not mmap %llu bytes at [%p] - please use '--base-virtaddr' option\n",
+				(unsigned long long)mem_sz, msl->base_va);
+		else
+			RTE_LOG(ERR, EAL, "Cannot reserve memory\n");
+		return -1;
+	}
+	msl->base_va = addr;
+
+	return 0;
+}
+
+
+static int
+memseg_primary_init(void)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	int hpi_idx, msl_idx = 0;
+	struct rte_memseg_list *msl;
+	uint64_t max_mem, total_mem;
+
+	/* no-huge does not need this at all */
+	if (internal_config.no_hugetlbfs)
+		return 0;
+
+	/* FreeBSD has an issue where core dump will dump the entire memory
+	 * contents, including anonymous zero-page memory. Therefore, while we
+	 * will be limiting total amount of memory to RTE_MAX_MEM_MB, we will
+	 * also be further limiting total memory amount to whatever memory is
+	 * available to us through contigmem driver (plus spacing blocks).
+	 *
+	 * so, at each stage, we will be checking how much memory we are
+	 * preallocating, and adjust all the values accordingly.
+	 */
+
+	max_mem = (uint64_t)RTE_MAX_MEM_MB << 20;
+	total_mem = 0;
+
+	/* create memseg lists */
+	for (hpi_idx = 0; hpi_idx < (int) internal_config.num_hugepage_sizes;
+			hpi_idx++) {
+		uint64_t max_type_mem, total_type_mem = 0;
+		uint64_t avail_mem;
+		int type_msl_idx, max_segs, avail_segs, total_segs = 0;
+		struct hugepage_info *hpi;
+		uint64_t hugepage_sz;
+
+		hpi = &internal_config.hugepage_info[hpi_idx];
+		hugepage_sz = hpi->hugepage_sz;
+
+		/* no NUMA support on FreeBSD */
+
+		/* check if we've already exceeded total memory amount */
+		if (total_mem >= max_mem)
+			break;
+
+		/* first, calculate theoretical limits according to config */
+		max_type_mem = RTE_MIN(max_mem - total_mem,
+			(uint64_t)RTE_MAX_MEM_MB_PER_TYPE << 20);
+		max_segs = RTE_MAX_MEMSEG_PER_TYPE;
+
+		/* now, limit all of that to whatever will actually be
+		 * available to us, because without dynamic allocation support,
+		 * all of that extra memory will be sitting there being useless
+		 * and slowing down core dumps in case of a crash.
+		 *
+		 * we need (N*2)-1 segments because we cannot guarantee that
+		 * each segment will be IOVA-contiguous with the previous one,
+		 * so we will allocate more and put spaces inbetween segments
+		 * that are non-contiguous.
+		 */
+		avail_segs = (hpi->num_pages[0] * 2) - 1;
+		avail_mem = avail_segs * hugepage_sz;
+
+		max_type_mem = RTE_MIN(avail_mem, max_type_mem);
+		max_segs = RTE_MIN(avail_segs, max_segs);
+
+		type_msl_idx = 0;
+		while (total_type_mem < max_type_mem &&
+				total_segs < max_segs) {
+			uint64_t cur_max_mem, cur_mem;
+			unsigned int n_segs;
+
+			if (msl_idx >= RTE_MAX_MEMSEG_LISTS) {
+				RTE_LOG(ERR, EAL,
+					"No more space in memseg lists, please increase %s\n",
+					RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS));
+				return -1;
+			}
+
+			msl = &mcfg->memsegs[msl_idx++];
+
+			cur_max_mem = max_type_mem - total_type_mem;
+
+			cur_mem = get_mem_amount(hugepage_sz,
+					cur_max_mem);
+			n_segs = cur_mem / hugepage_sz;
+
+			if (alloc_memseg_list(msl, hugepage_sz, n_segs,
+					0, type_msl_idx))
+				return -1;
+
+			total_segs += msl->memseg_arr.len;
+			total_type_mem = total_segs * hugepage_sz;
+			type_msl_idx++;
+
+			if (alloc_va_space(msl)) {
+				RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list\n");
+				return -1;
+			}
+		}
+		total_mem += total_type_mem;
+	}
+	return 0;
+}
+
+static int
+memseg_secondary_init(void)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	int msl_idx = 0;
+	struct rte_memseg_list *msl;
+
+	for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) {
+
+		msl = &mcfg->memsegs[msl_idx];
+
+		/* skip empty memseg lists */
+		if (msl->memseg_arr.len == 0)
+			continue;
+
+		if (rte_fbarray_attach(&msl->memseg_arr)) {
+			RTE_LOG(ERR, EAL, "Cannot attach to primary process memseg lists\n");
+			return -1;
+		}
+
+		/* preallocate VA space */
+		if (alloc_va_space(msl)) {
+			RTE_LOG(ERR, EAL, "Cannot preallocate VA space for hugepage memory\n");
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+int
+rte_eal_memseg_init(void)
+{
+	return rte_eal_process_type() == RTE_PROC_PRIMARY ?
+			memseg_primary_init() :
+			memseg_secondary_init();
+}
diff --git a/lib/librte_eal/bsdapp/eal/meson.build b/lib/librte_eal/bsdapp/eal/meson.build
index 47e16a64..3945b529 100644
--- a/lib/librte_eal/bsdapp/eal/meson.build
+++ b/lib/librte_eal/bsdapp/eal/meson.build
@@ -16,3 +16,5 @@ env_sources = files('eal_alarm.c',
 		'eal_memory.c',
 		'eal_dev.c'
 )
+
+deps += ['kvargs']
diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
index 48f870f2..cca68826 100644
--- a/lib/librte_eal/common/Makefile
+++ b/lib/librte_eal/common/Makefile
@@ -11,12 +11,12 @@ INC += rte_per_lcore.h rte_random.h
 INC += rte_tailq.h rte_interrupts.h rte_alarm.h
 INC += rte_string_fns.h rte_version.h
 INC += rte_eal_memconfig.h rte_malloc_heap.h
-INC += rte_hexdump.h rte_devargs.h rte_bus.h rte_dev.h
+INC += rte_hexdump.h rte_devargs.h rte_bus.h rte_dev.h rte_class.h
 INC += rte_pci_dev_feature_defs.h rte_pci_dev_features.h
 INC += rte_malloc.h rte_keepalive.h rte_time.h
 INC += rte_service.h rte_service_component.h
 INC += rte_bitmap.h rte_vfio.h rte_hypervisor.h rte_test.h
-INC += rte_reciprocal.h rte_fbarray.h
+INC += rte_reciprocal.h rte_fbarray.h rte_uuid.h
 
 GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h rte_prefetch.h
 GENERIC_INC += rte_spinlock.h rte_memcpy.h rte_cpuflags.h rte_rwlock.h
diff --git a/lib/librte_eal/common/eal_common_class.c b/lib/librte_eal/common/eal_common_class.c
new file mode 100644
index 00000000..404a9065
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_class.c
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 Gaëtan Rivet
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/queue.h>
+
+#include <rte_class.h>
+#include <rte_debug.h>
+
+struct rte_class_list rte_class_list =
+	TAILQ_HEAD_INITIALIZER(rte_class_list);
+
+__rte_experimental void
+rte_class_register(struct rte_class *class)
+{
+	RTE_VERIFY(class);
+	RTE_VERIFY(class->name && strlen(class->name));
+
+	TAILQ_INSERT_TAIL(&rte_class_list, class, next);
+	RTE_LOG(DEBUG, EAL, "Registered [%s] device class.\n", class->name);
+}
+
+__rte_experimental void
+rte_class_unregister(struct rte_class *class)
+{
+	TAILQ_REMOVE(&rte_class_list, class, next);
+	RTE_LOG(DEBUG, EAL, "Unregistered [%s] device class.\n", class->name);
+}
+
+__rte_experimental
+struct rte_class *
+rte_class_find(const struct rte_class *start, rte_class_cmp_t cmp,
+	       const void *data)
+{
+	struct rte_class *cls;
+
+	if (start != NULL)
+		cls = TAILQ_NEXT(start, next);
+	else
+		cls = TAILQ_FIRST(&rte_class_list);
+	while (cls != NULL) {
+		if (cmp(cls, data) == 0)
+			break;
+		cls = TAILQ_NEXT(cls, next);
+	}
+	return cls;
+}
+
+static int
+cmp_class_name(const struct rte_class *class, const void *_name)
+{
+	const char *name = _name;
+
+	return strcmp(class->name, name);
+}
+
+__rte_experimental
+struct rte_class *
+rte_class_find_by_name(const char *name)
+{
+	return rte_class_find(NULL, cmp_class_name, (const void *)name);
+}
diff --git a/lib/librte_eal/common/eal_common_dev.c b/lib/librte_eal/common/eal_common_dev.c
index 61cb3b16..678dbcac 100644
--- a/lib/librte_eal/common/eal_common_dev.c
+++ b/lib/librte_eal/common/eal_common_dev.c
@@ -10,9 +10,12 @@
 
 #include <rte_compat.h>
 #include <rte_bus.h>
+#include <rte_class.h>
 #include <rte_dev.h>
 #include <rte_devargs.h>
 #include <rte_debug.h>
+#include <rte_errno.h>
+#include <rte_kvargs.h>
 #include <rte_log.h>
 #include <rte_spinlock.h>
 #include <rte_malloc.h>
@@ -42,17 +45,27 @@ static struct dev_event_cb_list dev_event_cbs;
 /* spinlock for device callbacks */
 static rte_spinlock_t dev_event_lock = RTE_SPINLOCK_INITIALIZER;
 
-static int cmp_detached_dev_name(const struct rte_device *dev,
-	const void *_name)
-{
-	const char *name = _name;
+struct dev_next_ctx {
+	struct rte_dev_iterator *it;
+	const char *bus_str;
+	const char *cls_str;
+};
 
-	/* skip attached devices */
-	if (dev->driver != NULL)
-		return 1;
+#define CTX(it, bus_str, cls_str) \
+	(&(const struct dev_next_ctx){ \
+		.it = it, \
+		.bus_str = bus_str, \
+		.cls_str = cls_str, \
+	})
 
-	return strcmp(dev->name, name);
-}
+#define ITCTX(ptr) \
+	(((struct dev_next_ctx *)(intptr_t)ptr)->it)
+
+#define BUSCTX(ptr) \
+	(((struct dev_next_ctx *)(intptr_t)ptr)->bus_str)
+
+#define CLSCTX(ptr) \
+	(((struct dev_next_ctx *)(intptr_t)ptr)->cls_str)
 
 static int cmp_dev_name(const struct rte_device *dev, const void *_name)
 {
@@ -138,8 +151,8 @@ int __rte_experimental rte_eal_hotplug_add(const char *busname, const char *devn
 	if (da == NULL)
 		return -ENOMEM;
 
-	ret = rte_devargs_parse(da, "%s:%s,%s",
-				    busname, devname, devargs);
+	ret = rte_devargs_parsef(da, "%s:%s,%s",
+				 busname, devname, devargs);
 	if (ret)
 		goto err_devarg;
 
@@ -151,14 +164,19 @@ int __rte_experimental rte_eal_hotplug_add(const char *busname, const char *devn
 	if (ret)
 		goto err_devarg;
 
-	dev = bus->find_device(NULL, cmp_detached_dev_name, devname);
+	dev = bus->find_device(NULL, cmp_dev_name, devname);
 	if (dev == NULL) {
-		RTE_LOG(ERR, EAL, "Cannot find unplugged device (%s)\n",
+		RTE_LOG(ERR, EAL, "Cannot find device (%s)\n",
 			devname);
 		ret = -ENODEV;
 		goto err_devarg;
 	}
 
+	if (dev->driver != NULL) {
+		RTE_LOG(ERR, EAL, "Device is already plugged\n");
+		return -EEXIST;
+	}
+
 	ret = bus->plug(dev);
 	if (ret) {
 		RTE_LOG(ERR, EAL, "Driver cannot attach the device (%s)\n",
@@ -200,6 +218,11 @@ rte_eal_hotplug_remove(const char *busname, const char *devname)
 		return -EINVAL;
 	}
 
+	if (dev->driver == NULL) {
+		RTE_LOG(ERR, EAL, "Device is already unplugged\n");
+		return -ENOENT;
+	}
+
 	ret = bus->unplug(dev);
 	if (ret)
 		RTE_LOG(ERR, EAL, "Driver cannot detach the device (%s)\n",
@@ -343,3 +366,201 @@ dev_callback_process(char *device_name, enum rte_dev_event_type event)
 	}
 	rte_spinlock_unlock(&dev_event_lock);
 }
+
+__rte_experimental
+int
+rte_dev_iterator_init(struct rte_dev_iterator *it,
+		      const char *dev_str)
+{
+	struct rte_devargs devargs;
+	struct rte_class *cls = NULL;
+	struct rte_bus *bus = NULL;
+
+	/* Having both bus_str and cls_str NULL is illegal,
+	 * marking this iterator as invalid unless
+	 * everything goes well.
+	 */
+	it->bus_str = NULL;
+	it->cls_str = NULL;
+
+	devargs.data = dev_str;
+	if (rte_devargs_layers_parse(&devargs, dev_str))
+		goto get_out;
+
+	bus = devargs.bus;
+	cls = devargs.cls;
+	/* The string should have at least
+	 * one layer specified.
+	 */
+	if (bus == NULL && cls == NULL) {
+		RTE_LOG(ERR, EAL,
+			"Either bus or class must be specified.\n");
+		rte_errno = EINVAL;
+		goto get_out;
+	}
+	if (bus != NULL && bus->dev_iterate == NULL) {
+		RTE_LOG(ERR, EAL, "Bus %s not supported\n", bus->name);
+		rte_errno = ENOTSUP;
+		goto get_out;
+	}
+	if (cls != NULL && cls->dev_iterate == NULL) {
+		RTE_LOG(ERR, EAL, "Class %s not supported\n", cls->name);
+		rte_errno = ENOTSUP;
+		goto get_out;
+	}
+	it->bus_str = devargs.bus_str;
+	it->cls_str = devargs.cls_str;
+	it->dev_str = dev_str;
+	it->bus = bus;
+	it->cls = cls;
+	it->device = NULL;
+	it->class_device = NULL;
+get_out:
+	return -rte_errno;
+}
+
+static char *
+dev_str_sane_copy(const char *str)
+{
+	size_t end;
+	char *copy;
+
+	end = strcspn(str, ",/");
+	if (str[end] == ',') {
+		copy = strdup(&str[end + 1]);
+	} else {
+		/* '/' or '\0' */
+		copy = strdup("");
+	}
+	if (copy == NULL) {
+		rte_errno = ENOMEM;
+	} else {
+		char *slash;
+
+		slash = strchr(copy, '/');
+		if (slash != NULL)
+			slash[0] = '\0';
+	}
+	return copy;
+}
+
+static int
+class_next_dev_cmp(const struct rte_class *cls,
+		   const void *ctx)
+{
+	struct rte_dev_iterator *it;
+	const char *cls_str = NULL;
+	void *dev;
+
+	if (cls->dev_iterate == NULL)
+		return 1;
+	it = ITCTX(ctx);
+	cls_str = CLSCTX(ctx);
+	dev = it->class_device;
+	/* it->cls_str != NULL means a class
+	 * was specified in the devstr.
+	 */
+	if (it->cls_str != NULL && cls != it->cls)
+		return 1;
+	/* If an error occurred previously,
+	 * no need to test further.
+	 */
+	if (rte_errno != 0)
+		return -1;
+	dev = cls->dev_iterate(dev, cls_str, it);
+	it->class_device = dev;
+	return dev == NULL;
+}
+
+static int
+bus_next_dev_cmp(const struct rte_bus *bus,
+		 const void *ctx)
+{
+	struct rte_device *dev = NULL;
+	struct rte_class *cls = NULL;
+	struct rte_dev_iterator *it;
+	const char *bus_str = NULL;
+
+	if (bus->dev_iterate == NULL)
+		return 1;
+	it = ITCTX(ctx);
+	bus_str = BUSCTX(ctx);
+	dev = it->device;
+	/* it->bus_str != NULL means a bus
+	 * was specified in the devstr.
+	 */
+	if (it->bus_str != NULL && bus != it->bus)
+		return 1;
+	/* If an error occurred previously,
+	 * no need to test further.
+	 */
+	if (rte_errno != 0)
+		return -1;
+	if (it->cls_str == NULL) {
+		dev = bus->dev_iterate(dev, bus_str, it);
+		goto end;
+	}
+	/* cls_str != NULL */
+	if (dev == NULL) {
+next_dev_on_bus:
+		dev = bus->dev_iterate(dev, bus_str, it);
+		it->device = dev;
+	}
+	if (dev == NULL)
+		return 1;
+	if (it->cls != NULL)
+		cls = TAILQ_PREV(it->cls, rte_class_list, next);
+	cls = rte_class_find(cls, class_next_dev_cmp, ctx);
+	if (cls != NULL) {
+		it->cls = cls;
+		goto end;
+	}
+	goto next_dev_on_bus;
+end:
+	it->device = dev;
+	return dev == NULL;
+}
+__rte_experimental
+struct rte_device *
+rte_dev_iterator_next(struct rte_dev_iterator *it)
+{
+	struct rte_bus *bus = NULL;
+	int old_errno = rte_errno;
+	char *bus_str = NULL;
+	char *cls_str = NULL;
+
+	rte_errno = 0;
+	if (it->bus_str == NULL && it->cls_str == NULL) {
+		/* Invalid iterator. */
+		rte_errno = EINVAL;
+		return NULL;
+	}
+	if (it->bus != NULL)
+		bus = TAILQ_PREV(it->bus, rte_bus_list, next);
+	if (it->bus_str != NULL) {
+		bus_str = dev_str_sane_copy(it->bus_str);
+		if (bus_str == NULL)
+			goto out;
+	}
+	if (it->cls_str != NULL) {
+		cls_str = dev_str_sane_copy(it->cls_str);
+		if (cls_str == NULL)
+			goto out;
+	}
+	while ((bus = rte_bus_find(bus, bus_next_dev_cmp,
+				   CTX(it, bus_str, cls_str)))) {
+		if (it->device != NULL) {
+			it->bus = bus;
+			goto out;
+		}
+		if (it->bus_str != NULL ||
+		    rte_errno != 0)
+			break;
+	}
+	if (rte_errno == 0)
+		rte_errno = old_errno;
+out:
+	free(bus_str);
+	free(cls_str);
+	return it->device;
+}
diff --git a/lib/librte_eal/common/eal_common_devargs.c b/lib/librte_eal/common/eal_common_devargs.c
index b0434158..dac2402a 100644
--- a/lib/librte_eal/common/eal_common_devargs.c
+++ b/lib/librte_eal/common/eal_common_devargs.c
@@ -13,9 +13,14 @@
 #include <string.h>
 #include <stdarg.h>
 
+#include <rte_bus.h>
+#include <rte_class.h>
 #include <rte_compat.h>
 #include <rte_dev.h>
 #include <rte_devargs.h>
+#include <rte_errno.h>
+#include <rte_kvargs.h>
+#include <rte_log.h>
 #include <rte_tailq.h>
 #include "eal_private.h"
 
@@ -56,30 +61,164 @@ rte_eal_parse_devargs_str(const char *devargs_str,
 	return 0;
 }
 
+static size_t
+devargs_layer_count(const char *s)
+{
+	size_t i = s ? 1 : 0;
+
+	while (s != NULL && s[0] != '\0') {
+		i += s[0] == '/';
+		s++;
+	}
+	return i;
+}
+
+int
+rte_devargs_layers_parse(struct rte_devargs *devargs,
+			 const char *devstr)
+{
+	struct {
+		const char *key;
+		const char *str;
+		struct rte_kvargs *kvlist;
+	} layers[] = {
+		{ "bus=",    NULL, NULL, },
+		{ "class=",  NULL, NULL, },
+		{ "driver=", NULL, NULL, },
+	};
+	struct rte_kvargs_pair *kv = NULL;
+	struct rte_class *cls = NULL;
+	struct rte_bus *bus = NULL;
+	const char *s = devstr;
+	size_t nblayer;
+	size_t i = 0;
+	int ret = 0;
+
+	/* Split each sub-lists. */
+	nblayer = devargs_layer_count(devstr);
+	if (nblayer > RTE_DIM(layers)) {
+		RTE_LOG(ERR, EAL, "Invalid format: too many layers (%zu)\n",
+			nblayer);
+		ret = -E2BIG;
+		goto get_out;
+	}
+
+	/* If the devargs points the devstr
+	 * as source data, then it should not allocate
+	 * anything and keep referring only to it.
+	 */
+	if (devargs->data != devstr) {
+		devargs->data = strdup(devstr);
+		if (devargs->data == NULL) {
+			RTE_LOG(ERR, EAL, "OOM\n");
+			ret = -ENOMEM;
+			goto get_out;
+		}
+		s = devargs->data;
+	}
+
+	while (s != NULL) {
+		if (i >= RTE_DIM(layers)) {
+			RTE_LOG(ERR, EAL, "Unrecognized layer %s\n", s);
+			ret = -EINVAL;
+			goto get_out;
+		}
+		/*
+		 * The last layer is free-form.
+		 * The "driver" key is not required (but accepted).
+		 */
+		if (strncmp(layers[i].key, s, strlen(layers[i].key)) &&
+				i != RTE_DIM(layers) - 1)
+			goto next_layer;
+		layers[i].str = s;
+		layers[i].kvlist = rte_kvargs_parse_delim(s, NULL, "/");
+		if (layers[i].kvlist == NULL) {
+			RTE_LOG(ERR, EAL, "Could not parse %s\n", s);
+			ret = -EINVAL;
+			goto get_out;
+		}
+		s = strchr(s, '/');
+		if (s != NULL)
+			s++;
+next_layer:
+		i++;
+	}
+
+	/* Parse each sub-list. */
+	for (i = 0; i < RTE_DIM(layers); i++) {
+		if (layers[i].kvlist == NULL)
+			continue;
+		kv = &layers[i].kvlist->pairs[0];
+		if (strcmp(kv->key, "bus") == 0) {
+			bus = rte_bus_find_by_name(kv->value);
+			if (bus == NULL) {
+				RTE_LOG(ERR, EAL, "Could not find bus \"%s\"\n",
+					kv->value);
+				ret = -EFAULT;
+				goto get_out;
+			}
+		} else if (strcmp(kv->key, "class") == 0) {
+			cls = rte_class_find_by_name(kv->value);
+			if (cls == NULL) {
+				RTE_LOG(ERR, EAL, "Could not find class \"%s\"\n",
+					kv->value);
+				ret = -EFAULT;
+				goto get_out;
+			}
+		} else if (strcmp(kv->key, "driver") == 0) {
+			/* Ignore */
+			continue;
+		}
+	}
+
+	/* Fill devargs fields. */
+	devargs->bus_str = layers[0].str;
+	devargs->cls_str = layers[1].str;
+	devargs->drv_str = layers[2].str;
+	devargs->bus = bus;
+	devargs->cls = cls;
+
+	/* If we own the data, clean up a bit
+	 * the several layers string, to ease
+	 * their parsing afterward.
+	 */
+	if (devargs->data != devstr) {
+		char *s = (void *)(intptr_t)(devargs->data);
+
+		while ((s = strchr(s, '/'))) {
+			*s = '\0';
+			s++;
+		}
+	}
+
+get_out:
+	for (i = 0; i < RTE_DIM(layers); i++) {
+		if (layers[i].kvlist)
+			rte_kvargs_free(layers[i].kvlist);
+	}
+	if (ret != 0)
+		rte_errno = -ret;
+	return ret;
+}
+
 static int
 bus_name_cmp(const struct rte_bus *bus, const void *name)
 {
 	return strncmp(bus->name, name, strlen(bus->name));
 }
 
-int __rte_experimental
-rte_devargs_parse(struct rte_devargs *da, const char *format, ...)
+__rte_experimental
+int
+rte_devargs_parse(struct rte_devargs *da, const char *dev)
 {
 	struct rte_bus *bus = NULL;
-	va_list ap;
-	va_start(ap, format);
-	char dev[vsnprintf(NULL, 0, format, ap) + 1];
 	const char *devname;
 	const size_t maxlen = sizeof(da->name);
 	size_t i;
 
-	va_end(ap);
 	if (da == NULL)
 		return -EINVAL;
 
-	va_start(ap, format);
-	vsnprintf(dev, sizeof(dev), format, ap);
-	va_end(ap);
 	/* Retrieve eventual bus info */
 	do {
 		devname = dev;
@@ -96,7 +235,7 @@ rte_devargs_parse(struct rte_devargs *da, const char *format, ...)
 		da->name[i] = devname[i];
 		i++;
 		if (i == maxlen) {
-			fprintf(stderr, "WARNING: Parsing \"%s\": device name should be shorter than %zu\n",
+			RTE_LOG(WARNING, EAL, "Parsing \"%s\": device name should be shorter than %zu\n",
 				dev, maxlen);
 			da->name[i - 1] = '\0';
 			return -EINVAL;
@@ -106,7 +245,7 @@ rte_devargs_parse(struct rte_devargs *da, const char *format, ...)
 	if (bus == NULL) {
 		bus = rte_bus_find_by_device_name(da->name);
 		if (bus == NULL) {
-			fprintf(stderr, "ERROR: failed to parse device \"%s\"\n",
+			RTE_LOG(ERR, EAL, "failed to parse device \"%s\"\n",
 				da->name);
 			return -EFAULT;
 		}
@@ -118,12 +257,40 @@ rte_devargs_parse(struct rte_devargs *da, const char *format, ...)
 	else
 		da->args = strdup("");
 	if (da->args == NULL) {
-		fprintf(stderr, "ERROR: not enough memory to parse arguments\n");
+		RTE_LOG(ERR, EAL, "not enough memory to parse arguments\n");
 		return -ENOMEM;
 	}
 	return 0;
 }
 
+__rte_experimental
+int
+rte_devargs_parsef(struct rte_devargs *da, const char *format, ...)
+{
+	va_list ap;
+	size_t len;
+	char *dev;
+
+	if (da == NULL)
+		return -EINVAL;
+
+	va_start(ap, format);
+	len = vsnprintf(NULL, 0, format, ap);
+	va_end(ap);
+
+	dev = calloc(1, len + 1);
+	if (dev == NULL) {
+		RTE_LOG(ERR, EAL, "not enough memory to parse device\n");
+		return -ENOMEM;
+	}
+
+	va_start(ap, format);
+	vsnprintf(dev, len + 1, format, ap);
+	va_end(ap);
+
+	return rte_devargs_parse(da, dev);
+}
+
 int __rte_experimental
 rte_devargs_insert(struct rte_devargs *da)
 {
@@ -150,7 +317,7 @@ rte_devargs_add(enum rte_devtype devtype, const char *devargs_str)
 	if (devargs == NULL)
 		goto fail;
 
-	if (rte_devargs_parse(devargs, "%s", dev))
+	if (rte_devargs_parse(devargs, dev))
 		goto fail;
 	devargs->type = devtype;
 	bus = devargs->bus;
diff --git a/lib/librte_eal/common/eal_common_fbarray.c b/lib/librte_eal/common/eal_common_fbarray.c
index 019f84c1..43caf3ce 100644
--- a/lib/librte_eal/common/eal_common_fbarray.c
+++ b/lib/librte_eal/common/eal_common_fbarray.c
@@ -231,7 +231,7 @@ find_next_n(const struct rte_fbarray *arr, unsigned int start, unsigned int n,
 		return MASK_GET_IDX(msk_idx, run_start);
 	}
 	/* we didn't find anything */
-	rte_errno = used ? -ENOENT : -ENOSPC;
+	rte_errno = used ? ENOENT : ENOSPC;
 	return -1;
 }
 
@@ -287,7 +287,7 @@ find_next(const struct rte_fbarray *arr, unsigned int start, bool used)
 		return MASK_GET_IDX(idx, found);
 	}
 	/* we didn't find anything */
-	rte_errno = used ? -ENOENT : -ENOSPC;
+	rte_errno = used ? ENOENT : ENOSPC;
 	return -1;
 }
 
@@ -353,6 +353,277 @@ find_contig(const struct rte_fbarray *arr, unsigned int start, bool used)
 }
 
 static int
+find_prev_n(const struct rte_fbarray *arr, unsigned int start, unsigned int n,
+		bool used)
+{
+	const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
+			arr->len);
+	unsigned int msk_idx, lookbehind_idx, first, first_mod;
+	uint64_t ignore_msk;
+
+	/*
+	 * mask only has granularity of MASK_ALIGN, but start may not be aligned
+	 * on that boundary, so construct a special mask to exclude anything we
+	 * don't want to see to avoid confusing ctz.
+	 */
+	first = MASK_LEN_TO_IDX(start);
+	first_mod = MASK_LEN_TO_MOD(start);
+	/* we're going backwards, so mask must start from the top */
+	ignore_msk = first_mod == MASK_ALIGN - 1 ?
+				-1ULL : /* prevent overflow */
+				~(-1ULL << (first_mod + 1));
+
+	/* go backwards, include zero */
+	msk_idx = first;
+	do {
+		uint64_t cur_msk, lookbehind_msk;
+		unsigned int run_start, run_end, ctz, left;
+		bool found = false;
+		/*
+		 * The process of getting n consecutive bits from the top for
+		 * arbitrary n is a bit involved, but here it is in a nutshell:
+		 *
+		 *  1. let n be the number of consecutive bits we're looking for
+		 *  2. check if n can fit in one mask, and if so, do n-1
+		 *     lshift-ands to see if there is an appropriate run inside
+		 *     our current mask
+		 *    2a. if we found a run, bail out early
+		 *    2b. if we didn't find a run, proceed
+		 *  3. invert the mask and count trailing zeroes (that is, count
+		 *     how many consecutive set bits we had starting from the
+		 *     start of current mask) as k
+		 *    3a. if k is 0, continue to next mask
+		 *    3b. if k is not 0, we have a potential run
+		 *  4. to satisfy our requirements, next mask must have n-k
+		 *     consecutive set bits at the end, so we will do (n-k-1)
+		 *     lshift-ands and check if last bit is set.
+		 *
+		 * Step 4 will need to be repeated if (n-k) > MASK_ALIGN until
+		 * we either run out of masks, lose the run, or find what we
+		 * were looking for.
+		 */
+		cur_msk = msk->data[msk_idx];
+		left = n;
+
+		/* if we're looking for free spaces, invert the mask */
+		if (!used)
+			cur_msk = ~cur_msk;
+
+		/* if we have an ignore mask, ignore once */
+		if (ignore_msk) {
+			cur_msk &= ignore_msk;
+			ignore_msk = 0;
+		}
+
+		/* if n can fit in within a single mask, do a search */
+		if (n <= MASK_ALIGN) {
+			uint64_t tmp_msk = cur_msk;
+			unsigned int s_idx;
+			for (s_idx = 0; s_idx < n - 1; s_idx++)
+				tmp_msk &= tmp_msk << 1ULL;
+			/* we found what we were looking for */
+			if (tmp_msk != 0) {
+				/* clz will give us offset from end of mask, and
+				 * we only get the end of our run, not start,
+				 * so adjust result to point to where start
+				 * would have been.
+				 */
+				run_start = MASK_ALIGN -
+						__builtin_clzll(tmp_msk) - n;
+				return MASK_GET_IDX(msk_idx, run_start);
+			}
+		}
+
+		/*
+		 * we didn't find our run within the mask, or n > MASK_ALIGN,
+		 * so we're going for plan B.
+		 */
+
+		/* count trailing zeroes on inverted mask */
+		if (~cur_msk == 0)
+			ctz = sizeof(cur_msk) * 8;
+		else
+			ctz = __builtin_ctzll(~cur_msk);
+
+		/* if there aren't any runs at the start either, just
+		 * continue
+		 */
+		if (ctz == 0)
+			continue;
+
+		/* we have a partial run at the start, so try looking behind */
+		run_end = MASK_GET_IDX(msk_idx, ctz);
+		left -= ctz;
+
+		/* go backwards, include zero */
+		lookbehind_idx = msk_idx - 1;
+
+		/* we can't lookbehind as we've run out of masks, so stop */
+		if (msk_idx == 0)
+			break;
+
+		do {
+			const uint64_t last_bit = 1ULL << (MASK_ALIGN - 1);
+			unsigned int s_idx, need;
+
+			lookbehind_msk = msk->data[lookbehind_idx];
+
+			/* if we're looking for free space, invert the mask */
+			if (!used)
+				lookbehind_msk = ~lookbehind_msk;
+
+			/* figure out how many consecutive bits we need here */
+			need = RTE_MIN(left, MASK_ALIGN);
+
+			for (s_idx = 0; s_idx < need - 1; s_idx++)
+				lookbehind_msk &= lookbehind_msk << 1ULL;
+
+			/* if last bit is not set, we've lost the run */
+			if ((lookbehind_msk & last_bit) == 0) {
+				/*
+				 * we've scanned this far, so we know there are
+				 * no runs in the space we've lookbehind-scanned
+				 * as well, so skip that on next iteration.
+				 */
+				ignore_msk = -1ULL << need;
+				msk_idx = lookbehind_idx;
+				break;
+			}
+
+			left -= need;
+
+			/* check if we've found what we were looking for */
+			if (left == 0) {
+				found = true;
+				break;
+			}
+		} while ((lookbehind_idx--) != 0); /* decrement after check to
+						    * include zero
+						    */
+
+		/* we didn't find anything, so continue */
+		if (!found)
+			continue;
+
+		/* we've found what we were looking for, but we only know where
+		 * the run ended, so calculate start position.
+		 */
+		return run_end - n;
+	} while (msk_idx-- != 0); /* decrement after check to include zero */
+	/* we didn't find anything */
+	rte_errno = used ? ENOENT : ENOSPC;
+	return -1;
+}
+
+static int
+find_prev(const struct rte_fbarray *arr, unsigned int start, bool used)
+{
+	const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
+			arr->len);
+	unsigned int idx, first, first_mod;
+	uint64_t ignore_msk;
+
+	/*
+	 * mask only has granularity of MASK_ALIGN, but start may not be aligned
+	 * on that boundary, so construct a special mask to exclude anything we
+	 * don't want to see to avoid confusing clz.
+	 */
+	first = MASK_LEN_TO_IDX(start);
+	first_mod = MASK_LEN_TO_MOD(start);
+	/* we're going backwards, so mask must start from the top */
+	ignore_msk = first_mod == MASK_ALIGN - 1 ?
+				-1ULL : /* prevent overflow */
+				~(-1ULL << (first_mod + 1));
+
+	/* go backwards, include zero */
+	idx = first;
+	do {
+		uint64_t cur = msk->data[idx];
+		int found;
+
+		/* if we're looking for free entries, invert mask */
+		if (!used)
+			cur = ~cur;
+
+		/* ignore everything before start on first iteration */
+		if (idx == first)
+			cur &= ignore_msk;
+
+		/* check if we have any entries */
+		if (cur == 0)
+			continue;
+
+		/*
+		 * find last set bit - that will correspond to whatever it is
+		 * that we're looking for. we're counting trailing zeroes, thus
+		 * the value we get is counted from end of mask, so calculate
+		 * position from start of mask.
+		 */
+		found = MASK_ALIGN - __builtin_clzll(cur) - 1;
+
+		return MASK_GET_IDX(idx, found);
+	} while (idx-- != 0); /* decrement after check  to include zero*/
+
+	/* we didn't find anything */
+	rte_errno = used ? ENOENT : ENOSPC;
+	return -1;
+}
+
+static int
+find_rev_contig(const struct rte_fbarray *arr, unsigned int start, bool used)
+{
+	const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
+			arr->len);
+	unsigned int idx, first, first_mod;
+	unsigned int need_len, result = 0;
+
+	first = MASK_LEN_TO_IDX(start);
+	first_mod = MASK_LEN_TO_MOD(start);
+
+	/* go backwards, include zero */
+	idx = first;
+	do {
+		uint64_t cur = msk->data[idx];
+		unsigned int run_len;
+
+		need_len = MASK_ALIGN;
+
+		/* if we're looking for free entries, invert mask */
+		if (!used)
+			cur = ~cur;
+
+		/* ignore everything after start on first iteration */
+		if (idx == first) {
+			unsigned int end_len = MASK_ALIGN - first_mod - 1;
+			cur <<= end_len;
+			/* at the start, we don't need the full mask len */
+			need_len -= end_len;
+		}
+
+		/* we will be looking for zeroes, so invert the mask */
+		cur = ~cur;
+
+		/* if mask is zero, we have a complete run */
+		if (cur == 0)
+			goto endloop;
+
+		/*
+		 * see where run ends, starting from the end.
+		 */
+		run_len = __builtin_clzll(cur);
+
+		/* add however many zeroes we've had in the last run and quit */
+		if (run_len < need_len) {
+			result += run_len;
+			break;
+		}
+endloop:
+		result += need_len;
+	} while (idx-- != 0); /* decrement after check to include zero */
+	return result;
+}
+
+static int
 set_used(struct rte_fbarray *arr, unsigned int idx, bool used)
 {
 	struct used_mask *msk;
@@ -434,39 +705,52 @@ rte_fbarray_init(struct rte_fbarray *arr, const char *name, unsigned int len,
 	if (data == NULL)
 		goto fail;
 
-	eal_get_fbarray_path(path, sizeof(path), name);
+	if (internal_config.no_shconf) {
+		/* remap virtual area as writable */
+		void *new_data = mmap(data, mmap_len, PROT_READ | PROT_WRITE,
+				MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+		if (new_data == MAP_FAILED) {
+			RTE_LOG(DEBUG, EAL, "%s(): couldn't remap anonymous memory: %s\n",
+					__func__, strerror(errno));
+			goto fail;
+		}
+	} else {
+		eal_get_fbarray_path(path, sizeof(path), name);
 
-	/*
-	 * Each fbarray is unique to process namespace, i.e. the filename
-	 * depends on process prefix. Try to take out a lock and see if we
-	 * succeed. If we don't, someone else is using it already.
-	 */
-	fd = open(path, O_CREAT | O_RDWR, 0600);
-	if (fd < 0) {
-		RTE_LOG(DEBUG, EAL, "%s(): couldn't open %s: %s\n", __func__,
-				path, strerror(errno));
-		rte_errno = errno;
-		goto fail;
-	} else if (flock(fd, LOCK_EX | LOCK_NB)) {
-		RTE_LOG(DEBUG, EAL, "%s(): couldn't lock %s: %s\n", __func__,
-				path, strerror(errno));
-		rte_errno = EBUSY;
-		goto fail;
-	}
+		/*
+		 * Each fbarray is unique to process namespace, i.e. the
+		 * filename depends on process prefix. Try to take out a lock
+		 * and see if we succeed. If we don't, someone else is using it
+		 * already.
+		 */
+		fd = open(path, O_CREAT | O_RDWR, 0600);
+		if (fd < 0) {
+			RTE_LOG(DEBUG, EAL, "%s(): couldn't open %s: %s\n",
+					__func__, path, strerror(errno));
+			rte_errno = errno;
+			goto fail;
+		} else if (flock(fd, LOCK_EX | LOCK_NB)) {
+			RTE_LOG(DEBUG, EAL, "%s(): couldn't lock %s: %s\n",
+					__func__, path, strerror(errno));
+			rte_errno = EBUSY;
+			goto fail;
+		}
 
-	/* take out a non-exclusive lock, so that other processes could still
-	 * attach to it, but no other process could reinitialize it.
-	 */
-	if (flock(fd, LOCK_SH | LOCK_NB)) {
-		rte_errno = errno;
-		goto fail;
-	}
+		/* take out a non-exclusive lock, so that other processes could
+		 * still attach to it, but no other process could reinitialize
+		 * it.
+		 */
+		if (flock(fd, LOCK_SH | LOCK_NB)) {
+			rte_errno = errno;
+			goto fail;
+		}
 
-	if (resize_and_map(fd, data, mmap_len))
-		goto fail;
+		if (resize_and_map(fd, data, mmap_len))
+			goto fail;
 
-	/* we've mmap'ed the file, we can now close the fd */
-	close(fd);
+		/* we've mmap'ed the file, we can now close the fd */
+		close(fd);
+	}
 
 	/* initialize the data */
 	memset(data, 0, mmap_len);
@@ -675,8 +959,8 @@ rte_fbarray_is_used(struct rte_fbarray *arr, unsigned int idx)
 	return ret;
 }
 
-int __rte_experimental
-rte_fbarray_find_next_free(struct rte_fbarray *arr, unsigned int start)
+static int
+fbarray_find(struct rte_fbarray *arr, unsigned int start, bool next, bool used)
 {
 	int ret = -1;
 
@@ -688,36 +972,106 @@ rte_fbarray_find_next_free(struct rte_fbarray *arr, unsigned int start)
 	/* prevent array from changing under us */
 	rte_rwlock_read_lock(&arr->rwlock);
 
-	if (arr->len == arr->count) {
-		rte_errno = ENOSPC;
-		goto out;
+	/* cheap checks to prevent doing useless work */
+	if (!used) {
+		if (arr->len == arr->count) {
+			rte_errno = ENOSPC;
+			goto out;
+		}
+		if (arr->count == 0) {
+			ret = start;
+			goto out;
+		}
+	} else {
+		if (arr->count == 0) {
+			rte_errno = ENOENT;
+			goto out;
+		}
+		if (arr->len == arr->count) {
+			ret = start;
+			goto out;
+		}
 	}
-
-	ret = find_next(arr, start, false);
+	if (next)
+		ret = find_next(arr, start, used);
+	else
+		ret = find_prev(arr, start, used);
 out:
 	rte_rwlock_read_unlock(&arr->rwlock);
 	return ret;
 }
 
 int __rte_experimental
+rte_fbarray_find_next_free(struct rte_fbarray *arr, unsigned int start)
+{
+	return fbarray_find(arr, start, true, false);
+}
+
+int __rte_experimental
 rte_fbarray_find_next_used(struct rte_fbarray *arr, unsigned int start)
 {
+	return fbarray_find(arr, start, true, true);
+}
+
+int __rte_experimental
+rte_fbarray_find_prev_free(struct rte_fbarray *arr, unsigned int start)
+{
+	return fbarray_find(arr, start, false, false);
+}
+
+int __rte_experimental
+rte_fbarray_find_prev_used(struct rte_fbarray *arr, unsigned int start)
+{
+	return fbarray_find(arr, start, false, true);
+}
+
+static int
+fbarray_find_n(struct rte_fbarray *arr, unsigned int start, unsigned int n,
+		bool next, bool used)
+{
 	int ret = -1;
 
-	if (arr == NULL || start >= arr->len) {
+	if (arr == NULL || start >= arr->len || n > arr->len || n == 0) {
 		rte_errno = EINVAL;
 		return -1;
 	}
+	if (next && (arr->len - start) < n) {
+		rte_errno = used ? ENOENT : ENOSPC;
+		return -1;
+	}
+	if (!next && start < (n - 1)) {
+		rte_errno = used ? ENOENT : ENOSPC;
+		return -1;
+	}
 
 	/* prevent array from changing under us */
 	rte_rwlock_read_lock(&arr->rwlock);
 
-	if (arr->count == 0) {
-		rte_errno = ENOENT;
-		goto out;
+	/* cheap checks to prevent doing useless work */
+	if (!used) {
+		if (arr->len == arr->count || arr->len - arr->count < n) {
+			rte_errno = ENOSPC;
+			goto out;
+		}
+		if (arr->count == 0) {
+			ret = next ? start : start - n + 1;
+			goto out;
+		}
+	} else {
+		if (arr->count < n) {
+			rte_errno = ENOENT;
+			goto out;
+		}
+		if (arr->count == arr->len) {
+			ret = next ? start : start - n + 1;
+			goto out;
+		}
 	}
 
-	ret = find_next(arr, start, true);
+	if (next)
+		ret = find_next_n(arr, start, n, used);
+	else
+		ret = find_prev_n(arr, start, n, used);
 out:
 	rte_rwlock_read_unlock(&arr->rwlock);
 	return ret;
@@ -727,54 +1081,33 @@ int __rte_experimental
 rte_fbarray_find_next_n_free(struct rte_fbarray *arr, unsigned int start,
 		unsigned int n)
 {
-	int ret = -1;
-
-	if (arr == NULL || start >= arr->len || n > arr->len) {
-		rte_errno = EINVAL;
-		return -1;
-	}
-
-	/* prevent array from changing under us */
-	rte_rwlock_read_lock(&arr->rwlock);
-
-	if (arr->len == arr->count || arr->len - arr->count < n) {
-		rte_errno = ENOSPC;
-		goto out;
-	}
-
-	ret = find_next_n(arr, start, n, false);
-out:
-	rte_rwlock_read_unlock(&arr->rwlock);
-	return ret;
+	return fbarray_find_n(arr, start, n, true, false);
 }
 
 int __rte_experimental
 rte_fbarray_find_next_n_used(struct rte_fbarray *arr, unsigned int start,
 		unsigned int n)
 {
-	int ret = -1;
-
-	if (arr == NULL || start >= arr->len || n > arr->len) {
-		rte_errno = EINVAL;
-		return -1;
-	}
-
-	/* prevent array from changing under us */
-	rte_rwlock_read_lock(&arr->rwlock);
-
-	if (arr->count < n) {
-		rte_errno = ENOENT;
-		goto out;
-	}
+	return fbarray_find_n(arr, start, n, true, true);
+}
 
-	ret = find_next_n(arr, start, n, true);
-out:
-	rte_rwlock_read_unlock(&arr->rwlock);
-	return ret;
+int __rte_experimental
+rte_fbarray_find_prev_n_free(struct rte_fbarray *arr, unsigned int start,
+		unsigned int n)
+{
+	return fbarray_find_n(arr, start, n, false, false);
 }
 
 int __rte_experimental
-rte_fbarray_find_contig_free(struct rte_fbarray *arr, unsigned int start)
+rte_fbarray_find_prev_n_used(struct rte_fbarray *arr, unsigned int start,
+		unsigned int n)
+{
+	return fbarray_find_n(arr, start, n, false, true);
+}
+
+static int
+fbarray_find_contig(struct rte_fbarray *arr, unsigned int start, bool next,
+		bool used)
 {
 	int ret = -1;
 
@@ -786,39 +1119,66 @@ rte_fbarray_find_contig_free(struct rte_fbarray *arr, unsigned int start)
 	/* prevent array from changing under us */
 	rte_rwlock_read_lock(&arr->rwlock);
 
-	if (arr->len == arr->count) {
-		rte_errno = ENOSPC;
-		goto out;
-	}
-
-	if (arr->count == 0) {
-		ret = arr->len - start;
-		goto out;
+	/* cheap checks to prevent doing useless work */
+	if (used) {
+		if (arr->count == 0) {
+			ret = 0;
+			goto out;
+		}
+		if (next && arr->count == arr->len) {
+			ret = arr->len - start;
+			goto out;
+		}
+		if (!next && arr->count == arr->len) {
+			ret = start + 1;
+			goto out;
+		}
+	} else {
+		if (arr->len == arr->count) {
+			ret = 0;
+			goto out;
+		}
+		if (next && arr->count == 0) {
+			ret = arr->len - start;
+			goto out;
+		}
+		if (!next && arr->count == 0) {
+			ret = start + 1;
+			goto out;
+		}
 	}
 
-	ret = find_contig(arr, start, false);
+	if (next)
+		ret = find_contig(arr, start, used);
+	else
+		ret = find_rev_contig(arr, start, used);
 out:
 	rte_rwlock_read_unlock(&arr->rwlock);
 	return ret;
 }
 
 int __rte_experimental
-rte_fbarray_find_contig_used(struct rte_fbarray *arr, unsigned int start)
+rte_fbarray_find_contig_free(struct rte_fbarray *arr, unsigned int start)
 {
-	int ret = -1;
-
-	if (arr == NULL || start >= arr->len) {
-		rte_errno = EINVAL;
-		return -1;
-	}
+	return fbarray_find_contig(arr, start, true, false);
+}
 
-	/* prevent array from changing under us */
-	rte_rwlock_read_lock(&arr->rwlock);
+int __rte_experimental
+rte_fbarray_find_contig_used(struct rte_fbarray *arr, unsigned int start)
+{
+	return fbarray_find_contig(arr, start, true, true);
+}
 
-	ret = find_contig(arr, start, true);
+int __rte_experimental
+rte_fbarray_find_rev_contig_free(struct rte_fbarray *arr, unsigned int start)
+{
+	return fbarray_find_contig(arr, start, false, false);
+}
 
-	rte_rwlock_read_unlock(&arr->rwlock);
-	return ret;
+int __rte_experimental
+rte_fbarray_find_rev_contig_used(struct rte_fbarray *arr, unsigned int start)
+{
+	return fbarray_find_contig(arr, start, false, true);
 }
 
 int __rte_experimental
diff --git a/lib/librte_eal/common/eal_common_log.c b/lib/librte_eal/common/eal_common_log.c
index 81811894..c714a4bd 100644
--- a/lib/librte_eal/common/eal_common_log.c
+++ b/lib/librte_eal/common/eal_common_log.c
@@ -335,9 +335,7 @@ static const struct logtype logtype_strings[] = {
 };
 
 /* Logging should be first initializer (before drivers and bus) */
-RTE_INIT_PRIO(rte_log_init, LOG);
-static void
-rte_log_init(void)
+RTE_INIT_PRIO(rte_log_init, LOG)
 {
 	uint32_t i;
 
diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c
index 4f0688f9..fbfb1b05 100644
--- a/lib/librte_eal/common/eal_common_memory.c
+++ b/lib/librte_eal/common/eal_common_memory.c
@@ -34,7 +34,7 @@
 
 #define MEMSEG_LIST_FMT "memseg-%" PRIu64 "k-%i-%i"
 
-static uint64_t baseaddr_offset;
+static void *next_baseaddr;
 static uint64_t system_page_sz;
 
 void *
@@ -56,21 +56,27 @@ eal_get_virtual_area(void *requested_addr, size_t *size,
 	allow_shrink = (flags & EAL_VIRTUAL_AREA_ALLOW_SHRINK) > 0;
 	unmap = (flags & EAL_VIRTUAL_AREA_UNMAP) > 0;
 
-	if (requested_addr == NULL && internal_config.base_virtaddr != 0) {
-		requested_addr = (void *) (internal_config.base_virtaddr +
-				(size_t)baseaddr_offset);
+	if (next_baseaddr == NULL && internal_config.base_virtaddr != 0 &&
+			rte_eal_process_type() == RTE_PROC_PRIMARY)
+		next_baseaddr = (void *) internal_config.base_virtaddr;
+
+	if (requested_addr == NULL && next_baseaddr != NULL) {
+		requested_addr = next_baseaddr;
 		requested_addr = RTE_PTR_ALIGN(requested_addr, page_sz);
 		addr_is_hint = true;
 	}
 
-	/* if requested address is not aligned by page size, or if requested
-	 * address is NULL, add page size to requested length as we may get an
-	 * address that's aligned by system page size, which can be smaller than
-	 * our requested page size. additionally, we shouldn't try to align if
-	 * system page size is the same as requested page size.
+	/* we don't need alignment of resulting pointer in the following cases:
+	 *
+	 * 1. page size is equal to system size
+	 * 2. we have a requested address, and it is page-aligned, and we will
+	 *    be discarding the address if we get a different one.
+	 *
+	 * for all other cases, alignment is potentially necessary.
 	 */
 	no_align = (requested_addr != NULL &&
-		((uintptr_t)requested_addr & (page_sz - 1)) == 0) ||
+		requested_addr == RTE_PTR_ALIGN(requested_addr, page_sz) &&
+		!addr_is_hint) ||
 		page_sz == system_page_sz;
 
 	do {
@@ -116,6 +122,8 @@ eal_get_virtual_area(void *requested_addr, size_t *size,
 		RTE_LOG(WARNING, EAL, "WARNING! Base virtual address hint (%p != %p) not respected!\n",
 			requested_addr, aligned_addr);
 		RTE_LOG(WARNING, EAL, "   This may cause issues with mapping memory into secondary processes\n");
+	} else if (next_baseaddr != NULL) {
+		next_baseaddr = RTE_PTR_ADD(aligned_addr, *size);
 	}
 
 	RTE_LOG(DEBUG, EAL, "Virtual area found at %p (size = 0x%zx)\n",
@@ -148,387 +156,9 @@ eal_get_virtual_area(void *requested_addr, size_t *size,
 			munmap(aligned_end, after_len);
 	}
 
-	baseaddr_offset += *size;
-
 	return aligned_addr;
 }
 
-static uint64_t
-get_mem_amount(uint64_t page_sz, uint64_t max_mem)
-{
-	uint64_t area_sz, max_pages;
-
-	/* limit to RTE_MAX_MEMSEG_PER_LIST pages or RTE_MAX_MEM_MB_PER_LIST */
-	max_pages = RTE_MAX_MEMSEG_PER_LIST;
-	max_mem = RTE_MIN((uint64_t)RTE_MAX_MEM_MB_PER_LIST << 20, max_mem);
-
-	area_sz = RTE_MIN(page_sz * max_pages, max_mem);
-
-	/* make sure the list isn't smaller than the page size */
-	area_sz = RTE_MAX(area_sz, page_sz);
-
-	return RTE_ALIGN(area_sz, page_sz);
-}
-
-static int
-free_memseg_list(struct rte_memseg_list *msl)
-{
-	if (rte_fbarray_destroy(&msl->memseg_arr)) {
-		RTE_LOG(ERR, EAL, "Cannot destroy memseg list\n");
-		return -1;
-	}
-	memset(msl, 0, sizeof(*msl));
-	return 0;
-}
-
-static int
-alloc_memseg_list(struct rte_memseg_list *msl, uint64_t page_sz,
-		uint64_t max_mem, int socket_id, int type_msl_idx)
-{
-	char name[RTE_FBARRAY_NAME_LEN];
-	uint64_t mem_amount;
-	int max_segs;
-
-	mem_amount = get_mem_amount(page_sz, max_mem);
-	max_segs = mem_amount / page_sz;
-
-	snprintf(name, sizeof(name), MEMSEG_LIST_FMT, page_sz >> 10, socket_id,
-		 type_msl_idx);
-	if (rte_fbarray_init(&msl->memseg_arr, name, max_segs,
-			sizeof(struct rte_memseg))) {
-		RTE_LOG(ERR, EAL, "Cannot allocate memseg list: %s\n",
-			rte_strerror(rte_errno));
-		return -1;
-	}
-
-	msl->page_sz = page_sz;
-	msl->socket_id = socket_id;
-	msl->base_va = NULL;
-
-	RTE_LOG(DEBUG, EAL, "Memseg list allocated: 0x%zxkB at socket %i\n",
-			(size_t)page_sz >> 10, socket_id);
-
-	return 0;
-}
-
-static int
-alloc_va_space(struct rte_memseg_list *msl)
-{
-	uint64_t page_sz;
-	size_t mem_sz;
-	void *addr;
-	int flags = 0;
-
-#ifdef RTE_ARCH_PPC_64
-	flags |= MAP_HUGETLB;
-#endif
-
-	page_sz = msl->page_sz;
-	mem_sz = page_sz * msl->memseg_arr.len;
-
-	addr = eal_get_virtual_area(msl->base_va, &mem_sz, page_sz, 0, flags);
-	if (addr == NULL) {
-		if (rte_errno == EADDRNOTAVAIL)
-			RTE_LOG(ERR, EAL, "Could not mmap %llu bytes at [%p] - please use '--base-virtaddr' option\n",
-				(unsigned long long)mem_sz, msl->base_va);
-		else
-			RTE_LOG(ERR, EAL, "Cannot reserve memory\n");
-		return -1;
-	}
-	msl->base_va = addr;
-
-	return 0;
-}
-
-static int __rte_unused
-memseg_primary_init_32(void)
-{
-	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
-	int active_sockets, hpi_idx, msl_idx = 0;
-	unsigned int socket_id, i;
-	struct rte_memseg_list *msl;
-	uint64_t extra_mem_per_socket, total_extra_mem, total_requested_mem;
-	uint64_t max_mem;
-
-	/* no-huge does not need this at all */
-	if (internal_config.no_hugetlbfs)
-		return 0;
-
-	/* this is a giant hack, but desperate times call for desperate
-	 * measures. in legacy 32-bit mode, we cannot preallocate VA space,
-	 * because having upwards of 2 gigabytes of VA space already mapped will
-	 * interfere with our ability to map and sort hugepages.
-	 *
-	 * therefore, in legacy 32-bit mode, we will be initializing memseg
-	 * lists much later - in eal_memory.c, right after we unmap all the
-	 * unneeded pages. this will not affect secondary processes, as those
-	 * should be able to mmap the space without (too many) problems.
-	 */
-	if (internal_config.legacy_mem)
-		return 0;
-
-	/* 32-bit mode is a very special case. we cannot know in advance where
-	 * the user will want to allocate their memory, so we have to do some
-	 * heuristics.
-	 */
-	active_sockets = 0;
-	total_requested_mem = 0;
-	if (internal_config.force_sockets)
-		for (i = 0; i < rte_socket_count(); i++) {
-			uint64_t mem;
-
-			socket_id = rte_socket_id_by_idx(i);
-			mem = internal_config.socket_mem[socket_id];
-
-			if (mem == 0)
-				continue;
-
-			active_sockets++;
-			total_requested_mem += mem;
-		}
-	else
-		total_requested_mem = internal_config.memory;
-
-	max_mem = (uint64_t)RTE_MAX_MEM_MB << 20;
-	if (total_requested_mem > max_mem) {
-		RTE_LOG(ERR, EAL, "Invalid parameters: 32-bit process can at most use %uM of memory\n",
-				(unsigned int)(max_mem >> 20));
-		return -1;
-	}
-	total_extra_mem = max_mem - total_requested_mem;
-	extra_mem_per_socket = active_sockets == 0 ? total_extra_mem :
-			total_extra_mem / active_sockets;
-
-	/* the allocation logic is a little bit convoluted, but here's how it
-	 * works, in a nutshell:
-	 *  - if user hasn't specified on which sockets to allocate memory via
-	 *    --socket-mem, we allocate all of our memory on master core socket.
-	 *  - if user has specified sockets to allocate memory on, there may be
-	 *    some "unused" memory left (e.g. if user has specified --socket-mem
-	 *    such that not all memory adds up to 2 gigabytes), so add it to all
-	 *    sockets that are in use equally.
-	 *
-	 * page sizes are sorted by size in descending order, so we can safely
-	 * assume that we dispense with bigger page sizes first.
-	 */
-
-	/* create memseg lists */
-	for (i = 0; i < rte_socket_count(); i++) {
-		int hp_sizes = (int) internal_config.num_hugepage_sizes;
-		uint64_t max_socket_mem, cur_socket_mem;
-		unsigned int master_lcore_socket;
-		struct rte_config *cfg = rte_eal_get_configuration();
-		bool skip;
-
-		socket_id = rte_socket_id_by_idx(i);
-
-#ifndef RTE_EAL_NUMA_AWARE_HUGEPAGES
-		if (socket_id > 0)
-			break;
-#endif
-
-		/* if we didn't specifically request memory on this socket */
-		skip = active_sockets != 0 &&
-				internal_config.socket_mem[socket_id] == 0;
-		/* ...or if we didn't specifically request memory on *any*
-		 * socket, and this is not master lcore
-		 */
-		master_lcore_socket = rte_lcore_to_socket_id(cfg->master_lcore);
-		skip |= active_sockets == 0 && socket_id != master_lcore_socket;
-
-		if (skip) {
-			RTE_LOG(DEBUG, EAL, "Will not preallocate memory on socket %u\n",
-					socket_id);
-			continue;
-		}
-
-		/* max amount of memory on this socket */
-		max_socket_mem = (active_sockets != 0 ?
-					internal_config.socket_mem[socket_id] :
-					internal_config.memory) +
-					extra_mem_per_socket;
-		cur_socket_mem = 0;
-
-		for (hpi_idx = 0; hpi_idx < hp_sizes; hpi_idx++) {
-			uint64_t max_pagesz_mem, cur_pagesz_mem = 0;
-			uint64_t hugepage_sz;
-			struct hugepage_info *hpi;
-			int type_msl_idx, max_segs, total_segs = 0;
-
-			hpi = &internal_config.hugepage_info[hpi_idx];
-			hugepage_sz = hpi->hugepage_sz;
-
-			/* check if pages are actually available */
-			if (hpi->num_pages[socket_id] == 0)
-				continue;
-
-			max_segs = RTE_MAX_MEMSEG_PER_TYPE;
-			max_pagesz_mem = max_socket_mem - cur_socket_mem;
-
-			/* make it multiple of page size */
-			max_pagesz_mem = RTE_ALIGN_FLOOR(max_pagesz_mem,
-					hugepage_sz);
-
-			RTE_LOG(DEBUG, EAL, "Attempting to preallocate "
-					"%" PRIu64 "M on socket %i\n",
-					max_pagesz_mem >> 20, socket_id);
-
-			type_msl_idx = 0;
-			while (cur_pagesz_mem < max_pagesz_mem &&
-					total_segs < max_segs) {
-				if (msl_idx >= RTE_MAX_MEMSEG_LISTS) {
-					RTE_LOG(ERR, EAL,
-						"No more space in memseg lists, please increase %s\n",
-						RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS));
-					return -1;
-				}
-
-				msl = &mcfg->memsegs[msl_idx];
-
-				if (alloc_memseg_list(msl, hugepage_sz,
-						max_pagesz_mem, socket_id,
-						type_msl_idx)) {
-					/* failing to allocate a memseg list is
-					 * a serious error.
-					 */
-					RTE_LOG(ERR, EAL, "Cannot allocate memseg list\n");
-					return -1;
-				}
-
-				if (alloc_va_space(msl)) {
-					/* if we couldn't allocate VA space, we
-					 * can try with smaller page sizes.
-					 */
-					RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list, retrying with different page size\n");
-					/* deallocate memseg list */
-					if (free_memseg_list(msl))
-						return -1;
-					break;
-				}
-
-				total_segs += msl->memseg_arr.len;
-				cur_pagesz_mem = total_segs * hugepage_sz;
-				type_msl_idx++;
-				msl_idx++;
-			}
-			cur_socket_mem += cur_pagesz_mem;
-		}
-		if (cur_socket_mem == 0) {
-			RTE_LOG(ERR, EAL, "Cannot allocate VA space on socket %u\n",
-				socket_id);
-			return -1;
-		}
-	}
-
-	return 0;
-}
-
-static int __rte_unused
-memseg_primary_init(void)
-{
-	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
-	int i, socket_id, hpi_idx, msl_idx = 0;
-	struct rte_memseg_list *msl;
-	uint64_t max_mem, total_mem;
-
-	/* no-huge does not need this at all */
-	if (internal_config.no_hugetlbfs)
-		return 0;
-
-	max_mem = (uint64_t)RTE_MAX_MEM_MB << 20;
-	total_mem = 0;
-
-	/* create memseg lists */
-	for (hpi_idx = 0; hpi_idx < (int) internal_config.num_hugepage_sizes;
-			hpi_idx++) {
-		struct hugepage_info *hpi;
-		uint64_t hugepage_sz;
-
-		hpi = &internal_config.hugepage_info[hpi_idx];
-		hugepage_sz = hpi->hugepage_sz;
-
-		for (i = 0; i < (int) rte_socket_count(); i++) {
-			uint64_t max_type_mem, total_type_mem = 0;
-			int type_msl_idx, max_segs, total_segs = 0;
-
-			socket_id = rte_socket_id_by_idx(i);
-
-#ifndef RTE_EAL_NUMA_AWARE_HUGEPAGES
-			if (socket_id > 0)
-				break;
-#endif
-
-			if (total_mem >= max_mem)
-				break;
-
-			max_type_mem = RTE_MIN(max_mem - total_mem,
-				(uint64_t)RTE_MAX_MEM_MB_PER_TYPE << 20);
-			max_segs = RTE_MAX_MEMSEG_PER_TYPE;
-
-			type_msl_idx = 0;
-			while (total_type_mem < max_type_mem &&
-					total_segs < max_segs) {
-				uint64_t cur_max_mem;
-				if (msl_idx >= RTE_MAX_MEMSEG_LISTS) {
-					RTE_LOG(ERR, EAL,
-						"No more space in memseg lists, please increase %s\n",
-						RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS));
-					return -1;
-				}
-
-				msl = &mcfg->memsegs[msl_idx++];
-
-				cur_max_mem = max_type_mem - total_type_mem;
-				if (alloc_memseg_list(msl, hugepage_sz,
-						cur_max_mem, socket_id,
-						type_msl_idx))
-					return -1;
-
-				total_segs += msl->memseg_arr.len;
-				total_type_mem = total_segs * hugepage_sz;
-				type_msl_idx++;
-
-				if (alloc_va_space(msl)) {
-					RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list\n");
-					return -1;
-				}
-			}
-			total_mem += total_type_mem;
-		}
-	}
-	return 0;
-}
-
-static int
-memseg_secondary_init(void)
-{
-	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
-	int msl_idx = 0;
-	struct rte_memseg_list *msl;
-
-	for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) {
-
-		msl = &mcfg->memsegs[msl_idx];
-
-		/* skip empty memseg lists */
-		if (msl->memseg_arr.len == 0)
-			continue;
-
-		if (rte_fbarray_attach(&msl->memseg_arr)) {
-			RTE_LOG(ERR, EAL, "Cannot attach to primary process memseg lists\n");
-			return -1;
-		}
-
-		/* preallocate VA space */
-		if (alloc_va_space(msl)) {
-			RTE_LOG(ERR, EAL, "Cannot preallocate VA space for hugepage memory\n");
-			return -1;
-		}
-	}
-
-	return 0;
-}
-
 static struct rte_memseg *
 virt2memseg(const void *addr, const struct rte_memseg_list *msl)
 {
@@ -536,6 +166,9 @@ virt2memseg(const void *addr, const struct rte_memseg_list *msl)
 	void *start, *end;
 	int ms_idx;
 
+	if (msl == NULL)
+		return NULL;
+
 	/* a memseg list was specified, check if it's the right one */
 	start = msl->base_va;
 	end = RTE_PTR_ADD(start, (size_t)msl->page_sz * msl->memseg_arr.len);
@@ -788,14 +421,11 @@ rte_mem_lock_page(const void *virt)
 }
 
 int __rte_experimental
-rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg)
+rte_memseg_contig_walk_thread_unsafe(rte_memseg_contig_walk_t func, void *arg)
 {
 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
 	int i, ms_idx, ret = 0;
 
-	/* do not allow allocations/frees/init while we iterate */
-	rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
-
 	for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
 		struct rte_memseg_list *msl = &mcfg->memsegs[i];
 		const struct rte_memseg *ms;
@@ -820,30 +450,34 @@ rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg)
 			len = n_segs * msl->page_sz;
 
 			ret = func(msl, ms, len, arg);
-			if (ret < 0) {
-				ret = -1;
-				goto out;
-			} else if (ret > 0) {
-				ret = 1;
-				goto out;
-			}
+			if (ret)
+				return ret;
 			ms_idx = rte_fbarray_find_next_used(arr,
 					ms_idx + n_segs);
 		}
 	}
-out:
-	rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
-	return ret;
+	return 0;
 }
 
 int __rte_experimental
-rte_memseg_walk(rte_memseg_walk_t func, void *arg)
+rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg)
 {
 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
-	int i, ms_idx, ret = 0;
+	int ret = 0;
 
 	/* do not allow allocations/frees/init while we iterate */
 	rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+	ret = rte_memseg_contig_walk_thread_unsafe(func, arg);
+	rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+
+	return ret;
+}
+
+int __rte_experimental
+rte_memseg_walk_thread_unsafe(rte_memseg_walk_t func, void *arg)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	int i, ms_idx, ret = 0;
 
 	for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
 		struct rte_memseg_list *msl = &mcfg->memsegs[i];
@@ -859,29 +493,33 @@ rte_memseg_walk(rte_memseg_walk_t func, void *arg)
 		while (ms_idx >= 0) {
 			ms = rte_fbarray_get(arr, ms_idx);
 			ret = func(msl, ms, arg);
-			if (ret < 0) {
-				ret = -1;
-				goto out;
-			} else if (ret > 0) {
-				ret = 1;
-				goto out;
-			}
+			if (ret)
+				return ret;
 			ms_idx = rte_fbarray_find_next_used(arr, ms_idx + 1);
 		}
 	}
-out:
-	rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
-	return ret;
+	return 0;
 }
 
 int __rte_experimental
-rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg)
+rte_memseg_walk(rte_memseg_walk_t func, void *arg)
 {
 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
-	int i, ret = 0;
+	int ret = 0;
 
 	/* do not allow allocations/frees/init while we iterate */
 	rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+	ret = rte_memseg_walk_thread_unsafe(func, arg);
+	rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+
+	return ret;
+}
+
+int __rte_experimental
+rte_memseg_list_walk_thread_unsafe(rte_memseg_list_walk_t func, void *arg)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	int i, ret = 0;
 
 	for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
 		struct rte_memseg_list *msl = &mcfg->memsegs[i];
@@ -890,17 +528,23 @@ rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg)
 			continue;
 
 		ret = func(msl, arg);
-		if (ret < 0) {
-			ret = -1;
-			goto out;
-		}
-		if (ret > 0) {
-			ret = 1;
-			goto out;
-		}
+		if (ret)
+			return ret;
 	}
-out:
+	return 0;
+}
+
+int __rte_experimental
+rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	int ret = 0;
+
+	/* do not allow allocations/frees/init while we iterate */
+	rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+	ret = rte_memseg_list_walk_thread_unsafe(func, arg);
 	rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+
 	return ret;
 }
 
@@ -918,15 +562,7 @@ rte_eal_memory_init(void)
 	/* lock mem hotplug here, to prevent races while we init */
 	rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
 
-	retval = rte_eal_process_type() == RTE_PROC_PRIMARY ?
-#ifndef RTE_ARCH_64
-			memseg_primary_init_32() :
-#else
-			memseg_primary_init() :
-#endif
-			memseg_secondary_init();
-
-	if (retval < 0)
+	if (rte_eal_memseg_init() < 0)
 		goto fail;
 
 	if (eal_memalloc_init() < 0)
diff --git a/lib/librte_eal/common/eal_common_memzone.c b/lib/librte_eal/common/eal_common_memzone.c
index faa3b061..7300fe05 100644
--- a/lib/librte_eal/common/eal_common_memzone.c
+++ b/lib/librte_eal/common/eal_common_memzone.c
@@ -52,38 +52,6 @@ memzone_lookup_thread_unsafe(const char *name)
 	return NULL;
 }
 
-
-/* This function will return the greatest free block if a heap has been
- * specified. If no heap has been specified, it will return the heap and
- * length of the greatest free block available in all heaps */
-static size_t
-find_heap_max_free_elem(int *s, unsigned align)
-{
-	struct rte_mem_config *mcfg;
-	struct rte_malloc_socket_stats stats;
-	int i, socket = *s;
-	size_t len = 0;
-
-	/* get pointer to global configuration */
-	mcfg = rte_eal_get_configuration()->mem_config;
-
-	for (i = 0; i < RTE_MAX_NUMA_NODES; i++) {
-		if ((socket != SOCKET_ID_ANY) && (socket != i))
-			continue;
-
-		malloc_heap_get_stats(&mcfg->malloc_heaps[i], &stats);
-		if (stats.greatest_free_size > len) {
-			len = stats.greatest_free_size;
-			*s = i;
-		}
-	}
-
-	if (len < MALLOC_ELEM_OVERHEAD + align)
-		return 0;
-
-	return len - MALLOC_ELEM_OVERHEAD - align;
-}
-
 static const struct rte_memzone *
 memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
 		int socket_id, unsigned int flags, unsigned int align,
@@ -92,6 +60,7 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
 	struct rte_memzone *mz;
 	struct rte_mem_config *mcfg;
 	struct rte_fbarray *arr;
+	void *mz_addr;
 	size_t requested_len;
 	int mz_idx;
 	bool contig;
@@ -140,8 +109,7 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
 		return NULL;
 	}
 
-	len += RTE_CACHE_LINE_MASK;
-	len &= ~((size_t) RTE_CACHE_LINE_MASK);
+	len = RTE_ALIGN_CEIL(len, RTE_CACHE_LINE_SIZE);
 
 	/* save minimal requested  length */
 	requested_len = RTE_MAX((size_t)RTE_CACHE_LINE_SIZE,  len);
@@ -165,27 +133,18 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
 	/* malloc only cares about size flags, remove contig flag from flags */
 	flags &= ~RTE_MEMZONE_IOVA_CONTIG;
 
-	if (len == 0) {
-		/* len == 0 is only allowed for non-contiguous zones */
-		if (contig) {
-			RTE_LOG(DEBUG, EAL, "Reserving zero-length contiguous memzones is not supported\n");
-			rte_errno = EINVAL;
-			return NULL;
-		}
-		if (bound != 0)
+	if (len == 0 && bound == 0) {
+		/* no size constraints were placed, so use malloc elem len */
+		requested_len = 0;
+		mz_addr = malloc_heap_alloc_biggest(NULL, socket_id, flags,
+				align, contig);
+	} else {
+		if (len == 0)
 			requested_len = bound;
-		else {
-			requested_len = find_heap_max_free_elem(&socket_id, align);
-			if (requested_len == 0) {
-				rte_errno = ENOMEM;
-				return NULL;
-			}
-		}
+		/* allocate memory on heap */
+		mz_addr = malloc_heap_alloc(NULL, requested_len, socket_id,
+				flags, align, bound, contig);
 	}
-
-	/* allocate memory on heap */
-	void *mz_addr = malloc_heap_alloc(NULL, requested_len, socket_id, flags,
-			align, bound, contig);
 	if (mz_addr == NULL) {
 		rte_errno = ENOMEM;
 		return NULL;
@@ -213,8 +172,9 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
 	snprintf(mz->name, sizeof(mz->name), "%s", name);
 	mz->iova = rte_malloc_virt2iova(mz_addr);
 	mz->addr = mz_addr;
-	mz->len = (requested_len == 0 ?
-			(elem->size - MALLOC_ELEM_OVERHEAD) : requested_len);
+	mz->len = requested_len == 0 ?
+			elem->size - elem->pad - MALLOC_ELEM_OVERHEAD :
+			requested_len;
 	mz->hugepage_sz = elem->msl->page_sz;
 	mz->socket_id = elem->msl->socket_id;
 	mz->flags = 0;
diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
index ecebb292..dd5f9740 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -66,10 +66,12 @@ eal_long_options[] = {
 	{OPT_NO_HUGE,           0, NULL, OPT_NO_HUGE_NUM          },
 	{OPT_NO_PCI,            0, NULL, OPT_NO_PCI_NUM           },
 	{OPT_NO_SHCONF,         0, NULL, OPT_NO_SHCONF_NUM        },
+	{OPT_IN_MEMORY,         0, NULL, OPT_IN_MEMORY_NUM        },
 	{OPT_PCI_BLACKLIST,     1, NULL, OPT_PCI_BLACKLIST_NUM    },
 	{OPT_PCI_WHITELIST,     1, NULL, OPT_PCI_WHITELIST_NUM    },
 	{OPT_PROC_TYPE,         1, NULL, OPT_PROC_TYPE_NUM        },
 	{OPT_SOCKET_MEM,        1, NULL, OPT_SOCKET_MEM_NUM       },
+	{OPT_SOCKET_LIMIT,      1, NULL, OPT_SOCKET_LIMIT_NUM     },
 	{OPT_SYSLOG,            1, NULL, OPT_SYSLOG_NUM           },
 	{OPT_VDEV,              1, NULL, OPT_VDEV_NUM             },
 	{OPT_VFIO_INTR,         1, NULL, OPT_VFIO_INTR_NUM        },
@@ -179,6 +181,10 @@ eal_reset_internal_config(struct internal_config *internal_cfg)
 	/* zero out the NUMA config */
 	for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
 		internal_cfg->socket_mem[i] = 0;
+	internal_cfg->force_socket_limits = 0;
+	/* zero out the NUMA limits config */
+	for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
+		internal_cfg->socket_limit[i] = 0;
 	/* zero out hugedir descriptors */
 	for (i = 0; i < MAX_HUGEPAGE_SIZES; i++) {
 		memset(&internal_cfg->hugepage_info[i], 0,
@@ -315,6 +321,7 @@ eal_parse_service_coremask(const char *coremask)
 	unsigned int count = 0;
 	char c;
 	int val;
+	uint32_t taken_lcore_count = 0;
 
 	if (coremask == NULL)
 		return -1;
@@ -348,7 +355,7 @@ eal_parse_service_coremask(const char *coremask)
 				if (master_lcore_parsed &&
 						cfg->master_lcore == lcore) {
 					RTE_LOG(ERR, EAL,
-						"Error: lcore %u is master lcore, cannot use as service core\n",
+						"lcore %u is master lcore, cannot use as service core\n",
 						idx);
 					return -1;
 				}
@@ -358,6 +365,10 @@ eal_parse_service_coremask(const char *coremask)
 						"lcore %u unavailable\n", idx);
 					return -1;
 				}
+
+				if (cfg->lcore_role[idx] == ROLE_RTE)
+					taken_lcore_count++;
+
 				lcore_config[idx].core_role = ROLE_SERVICE;
 				count++;
 			}
@@ -374,11 +385,28 @@ eal_parse_service_coremask(const char *coremask)
 	if (count == 0)
 		return -1;
 
+	if (core_parsed && taken_lcore_count != count) {
+		RTE_LOG(WARNING, EAL,
+			"Not all service cores are in the coremask. "
+			"Please ensure -c or -l includes service cores\n");
+	}
+
 	cfg->service_lcore_count = count;
 	return 0;
 }
 
 static int
+eal_service_cores_parsed(void)
+{
+	int idx;
+	for (idx = 0; idx < RTE_MAX_LCORE; idx++) {
+		if (lcore_config[idx].core_role == ROLE_SERVICE)
+			return 1;
+	}
+	return 0;
+}
+
+static int
 eal_parse_coremask(const char *coremask)
 {
 	struct rte_config *cfg = rte_eal_get_configuration();
@@ -387,6 +415,11 @@ eal_parse_coremask(const char *coremask)
 	char c;
 	int val;
 
+	if (eal_service_cores_parsed())
+		RTE_LOG(WARNING, EAL,
+			"Service cores parsed before dataplane cores. "
+			"Please ensure -c is before -s or -S\n");
+
 	if (coremask == NULL)
 		return -1;
 	/* Remove all blank characters ahead and after .
@@ -418,6 +451,7 @@ eal_parse_coremask(const char *coremask)
 					        "unavailable\n", idx);
 					return -1;
 				}
+
 				cfg->lcore_role[idx] = ROLE_RTE;
 				lcore_config[idx].core_index = count;
 				count++;
@@ -449,6 +483,7 @@ eal_parse_service_corelist(const char *corelist)
 	unsigned count = 0;
 	char *end = NULL;
 	int min, max;
+	uint32_t taken_lcore_count = 0;
 
 	if (corelist == NULL)
 		return -1;
@@ -490,6 +525,9 @@ eal_parse_service_corelist(const char *corelist)
 							idx);
 						return -1;
 					}
+					if (cfg->lcore_role[idx] == ROLE_RTE)
+						taken_lcore_count++;
+
 					lcore_config[idx].core_role =
 							ROLE_SERVICE;
 					count++;
@@ -504,6 +542,12 @@ eal_parse_service_corelist(const char *corelist)
 	if (count == 0)
 		return -1;
 
+	if (core_parsed && taken_lcore_count != count) {
+		RTE_LOG(WARNING, EAL,
+			"Not all service cores were in the coremask. "
+			"Please ensure -c or -l includes service cores\n");
+	}
+
 	return 0;
 }
 
@@ -516,6 +560,11 @@ eal_parse_corelist(const char *corelist)
 	char *end = NULL;
 	int min, max;
 
+	if (eal_service_cores_parsed())
+		RTE_LOG(WARNING, EAL,
+			"Service cores parsed before dataplane cores. "
+			"Please ensure -l is before -s or -S\n");
+
 	if (corelist == NULL)
 		return -1;
 
@@ -590,7 +639,8 @@ eal_parse_master_lcore(const char *arg)
 
 	/* ensure master core is not used as service core */
 	if (lcore_config[cfg->master_lcore].core_role == ROLE_SERVICE) {
-		RTE_LOG(ERR, EAL, "Error: Master lcore is used as a service core.\n");
+		RTE_LOG(ERR, EAL,
+			"Error: Master lcore is used as a service core\n");
 		return -1;
 	}
 
@@ -1165,6 +1215,13 @@ eal_parse_common_option(int opt, const char *optarg,
 		conf->no_shconf = 1;
 		break;
 
+	case OPT_IN_MEMORY_NUM:
+		conf->in_memory = 1;
+		/* in-memory is a superset of noshconf and huge-unlink */
+		conf->no_shconf = 1;
+		conf->hugepage_unlink = 1;
+		break;
+
 	case OPT_PROC_TYPE_NUM:
 		conf->process_type = eal_parse_proc_type(optarg);
 		break;
@@ -1316,12 +1373,23 @@ eal_check_common_options(struct internal_config *internal_cfg)
 			"be specified together with --"OPT_NO_HUGE"\n");
 		return -1;
 	}
-
-	if (internal_cfg->no_hugetlbfs && internal_cfg->hugepage_unlink) {
+	if (internal_cfg->no_hugetlbfs && internal_cfg->hugepage_unlink &&
+			!internal_cfg->in_memory) {
 		RTE_LOG(ERR, EAL, "Option --"OPT_HUGE_UNLINK" cannot "
 			"be specified together with --"OPT_NO_HUGE"\n");
 		return -1;
 	}
+	if (internal_config.force_socket_limits && internal_config.legacy_mem) {
+		RTE_LOG(ERR, EAL, "Option --"OPT_SOCKET_LIMIT
+			" is only supported in non-legacy memory mode\n");
+	}
+	if (internal_cfg->single_file_segments &&
+			internal_cfg->hugepage_unlink) {
+		RTE_LOG(ERR, EAL, "Option --"OPT_SINGLE_FILE_SEGMENTS" is "
+			"not compatible with neither --"OPT_IN_MEMORY" nor "
+			"--"OPT_HUGE_UNLINK"\n");
+		return -1;
+	}
 
 	return 0;
 }
@@ -1370,6 +1438,8 @@ eal_common_usage(void)
 	       "                      Set specific log level\n"
 	       "  -v                  Display version information on startup\n"
 	       "  -h, --help          This help\n"
+	       "  --"OPT_IN_MEMORY"   Operate entirely in memory. This will\n"
+	       "                      disable secondary process support\n"
 	       "\nEAL options for DEBUG use only:\n"
 	       "  --"OPT_HUGE_UNLINK"       Unlink hugepage files after init\n"
 	       "  --"OPT_NO_HUGE"           Use malloc instead of hugetlbfs\n"
diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c
index 707d8ab3..9fcb9121 100644
--- a/lib/librte_eal/common/eal_common_proc.c
+++ b/lib/librte_eal/common/eal_common_proc.c
@@ -20,6 +20,7 @@
 #include <sys/un.h>
 #include <unistd.h>
 
+#include <rte_alarm.h>
 #include <rte_common.h>
 #include <rte_cycles.h>
 #include <rte_eal.h>
@@ -94,11 +95,9 @@ TAILQ_HEAD(pending_request_list, pending_request);
 static struct {
 	struct pending_request_list requests;
 	pthread_mutex_t lock;
-	pthread_cond_t async_cond;
 } pending_requests = {
 	.requests = TAILQ_HEAD_INITIALIZER(pending_requests.requests),
 	.lock = PTHREAD_MUTEX_INITIALIZER,
-	.async_cond = PTHREAD_COND_INITIALIZER
 	/**< used in async requests only */
 };
 
@@ -106,6 +105,16 @@ static struct {
 static int
 mp_send(struct rte_mp_msg *msg, const char *peer, int type);
 
+/* for use with alarm callback */
+static void
+async_reply_handle(void *arg);
+
+/* for use with process_msg */
+static struct pending_request *
+async_reply_handle_thread_unsafe(void *arg);
+
+static void
+trigger_async_action(struct pending_request *req);
 
 static struct pending_request *
 find_pending_request(const char *dst, const char *act_name)
@@ -290,6 +299,8 @@ process_msg(struct mp_msg_internal *m, struct sockaddr_un *s)
 	RTE_LOG(DEBUG, EAL, "msg: %s\n", msg->name);
 
 	if (m->type == MP_REP || m->type == MP_IGN) {
+		struct pending_request *req = NULL;
+
 		pthread_mutex_lock(&pending_requests.lock);
 		pending_req = find_pending_request(s->sun_path, msg->name);
 		if (pending_req) {
@@ -301,11 +312,14 @@ process_msg(struct mp_msg_internal *m, struct sockaddr_un *s)
 			if (pending_req->type == REQUEST_TYPE_SYNC)
 				pthread_cond_signal(&pending_req->sync.cond);
 			else if (pending_req->type == REQUEST_TYPE_ASYNC)
-				pthread_cond_signal(
-					&pending_requests.async_cond);
+				req = async_reply_handle_thread_unsafe(
+						pending_req);
 		} else
 			RTE_LOG(ERR, EAL, "Drop mp reply: %s\n", msg->name);
 		pthread_mutex_unlock(&pending_requests.lock);
+
+		if (req != NULL)
+			trigger_async_action(req);
 		return;
 	}
 
@@ -365,7 +379,6 @@ timespec_cmp(const struct timespec *a, const struct timespec *b)
 }
 
 enum async_action {
-	ACTION_NONE, /**< don't do anything */
 	ACTION_FREE, /**< free the action entry, but don't trigger callback */
 	ACTION_TRIGGER /**< trigger callback, then free action entry */
 };
@@ -375,7 +388,7 @@ process_async_request(struct pending_request *sr, const struct timespec *now)
 {
 	struct async_request_param *param;
 	struct rte_mp_reply *reply;
-	bool timeout, received, last_msg;
+	bool timeout, last_msg;
 
 	param = sr->async.param;
 	reply = &param->user_reply;
@@ -383,13 +396,6 @@ process_async_request(struct pending_request *sr, const struct timespec *now)
 	/* did we timeout? */
 	timeout = timespec_cmp(&param->end, now) <= 0;
 
-	/* did we receive a response? */
-	received = sr->reply_received != 0;
-
-	/* if we didn't time out, and we didn't receive a response, ignore */
-	if (!timeout && !received)
-		return ACTION_NONE;
-
 	/* if we received a response, adjust relevant data and copy mesasge. */
 	if (sr->reply_received == 1 && sr->reply) {
 		struct rte_mp_msg *msg, *user_msgs, *tmp;
@@ -448,118 +454,58 @@ trigger_async_action(struct pending_request *sr)
 	free(sr->async.param->user_reply.msgs);
 	free(sr->async.param);
 	free(sr->request);
+	free(sr);
 }
 
 static struct pending_request *
-check_trigger(struct timespec *ts)
+async_reply_handle_thread_unsafe(void *arg)
 {
-	struct pending_request *next, *cur, *trigger = NULL;
-
-	TAILQ_FOREACH_SAFE(cur, &pending_requests.requests, next, next) {
-		enum async_action action;
-		if (cur->type != REQUEST_TYPE_ASYNC)
-			continue;
+	struct pending_request *req = (struct pending_request *)arg;
+	enum async_action action;
+	struct timespec ts_now;
+	struct timeval now;
 
-		action = process_async_request(cur, ts);
-		if (action == ACTION_FREE) {
-			TAILQ_REMOVE(&pending_requests.requests, cur, next);
-			free(cur);
-		} else if (action == ACTION_TRIGGER) {
-			TAILQ_REMOVE(&pending_requests.requests, cur, next);
-			trigger = cur;
-			break;
-		}
+	if (gettimeofday(&now, NULL) < 0) {
+		RTE_LOG(ERR, EAL, "Cannot get current time\n");
+		goto no_trigger;
 	}
-	return trigger;
-}
+	ts_now.tv_nsec = now.tv_usec * 1000;
+	ts_now.tv_sec = now.tv_sec;
 
-static void
-wait_for_async_messages(void)
-{
-	struct pending_request *sr;
-	struct timespec timeout;
-	bool timedwait = false;
-	bool nowait = false;
-	int ret;
+	action = process_async_request(req, &ts_now);
 
-	/* scan through the list and see if there are any timeouts that
-	 * are earlier than our current timeout.
-	 */
-	TAILQ_FOREACH(sr, &pending_requests.requests, next) {
-		if (sr->type != REQUEST_TYPE_ASYNC)
-			continue;
-		if (!timedwait || timespec_cmp(&sr->async.param->end,
-				&timeout) < 0) {
-			memcpy(&timeout, &sr->async.param->end,
-				sizeof(timeout));
-			timedwait = true;
-		}
+	TAILQ_REMOVE(&pending_requests.requests, req, next);
 
-		/* sometimes, we don't even wait */
-		if (sr->reply_received) {
-			nowait = true;
-			break;
+	if (rte_eal_alarm_cancel(async_reply_handle, req) < 0) {
+		/* if we failed to cancel the alarm because it's already in
+		 * progress, don't proceed because otherwise we will end up
+		 * handling the same message twice.
+		 */
+		if (rte_errno == EINPROGRESS) {
+			RTE_LOG(DEBUG, EAL, "Request handling is already in progress\n");
+			goto no_trigger;
 		}
+		RTE_LOG(ERR, EAL, "Failed to cancel alarm\n");
 	}
 
-	if (nowait)
-		return;
-
-	do {
-		ret = timedwait ?
-			pthread_cond_timedwait(
-				&pending_requests.async_cond,
-				&pending_requests.lock,
-				&timeout) :
-			pthread_cond_wait(
-				&pending_requests.async_cond,
-				&pending_requests.lock);
-	} while (ret != 0 && ret != ETIMEDOUT);
-
-	/* we've been woken up or timed out */
+	if (action == ACTION_TRIGGER)
+		return req;
+no_trigger:
+	free(req);
+	return NULL;
 }
 
-static void *
-async_reply_handle(void *arg __rte_unused)
+static void
+async_reply_handle(void *arg)
 {
-	struct timeval now;
-	struct timespec ts_now;
-	while (1) {
-		struct pending_request *trigger = NULL;
-
-		pthread_mutex_lock(&pending_requests.lock);
+	struct pending_request *req;
 
-		/* we exit this function holding the lock */
-		wait_for_async_messages();
-
-		if (gettimeofday(&now, NULL) < 0) {
-			pthread_mutex_unlock(&pending_requests.lock);
-			RTE_LOG(ERR, EAL, "Cannot get current time\n");
-			break;
-		}
-		ts_now.tv_nsec = now.tv_usec * 1000;
-		ts_now.tv_sec = now.tv_sec;
-
-		do {
-			trigger = check_trigger(&ts_now);
-			/* unlock request list */
-			pthread_mutex_unlock(&pending_requests.lock);
-
-			if (trigger) {
-				trigger_async_action(trigger);
-				free(trigger);
-
-				/* we've triggered a callback, but there may be
-				 * more, so lock the list and check again.
-				 */
-				pthread_mutex_lock(&pending_requests.lock);
-			}
-		} while (trigger);
-	}
-
-	RTE_LOG(ERR, EAL, "ERROR: asynchronous requests disabled\n");
+	pthread_mutex_lock(&pending_requests.lock);
+	req = async_reply_handle_thread_unsafe(arg);
+	pthread_mutex_unlock(&pending_requests.lock);
 
-	return NULL;
+	if (req != NULL)
+		trigger_async_action(req);
 }
 
 static int
@@ -624,7 +570,15 @@ rte_mp_channel_init(void)
 {
 	char path[PATH_MAX];
 	int dir_fd;
-	pthread_t mp_handle_tid, async_reply_handle_tid;
+	pthread_t mp_handle_tid;
+
+	/* in no shared files mode, we do not have secondary processes support,
+	 * so no need to initialize IPC.
+	 */
+	if (internal_config.no_shconf) {
+		RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC will be disabled\n");
+		return 0;
+	}
 
 	/* create filter path */
 	create_socket_path("*", path, sizeof(path));
@@ -671,17 +625,6 @@ rte_mp_channel_init(void)
 		return -1;
 	}
 
-	if (rte_ctrl_thread_create(&async_reply_handle_tid,
-			"rte_mp_async", NULL,
-			async_reply_handle, NULL) < 0) {
-		RTE_LOG(ERR, EAL, "failed to create mp thead: %s\n",
-			strerror(errno));
-		close(mp_fd);
-		close(dir_fd);
-		mp_fd = -1;
-		return -1;
-	}
-
 	/* unlock the directory */
 	flock(dir_fd, LOCK_UN);
 	close(dir_fd);
@@ -786,7 +729,7 @@ mp_send(struct rte_mp_msg *msg, const char *peer, int type)
 
 	dir_fd = dirfd(mp_dir);
 	/* lock the directory to prevent processes spinning up while we send */
-	if (flock(dir_fd, LOCK_EX)) {
+	if (flock(dir_fd, LOCK_SH)) {
 		RTE_LOG(ERR, EAL, "Unable to lock directory %s\n",
 			mp_dir_path);
 		rte_errno = errno;
@@ -853,7 +796,7 @@ rte_mp_sendmsg(struct rte_mp_msg *msg)
 
 static int
 mp_request_async(const char *dst, struct rte_mp_msg *req,
-		struct async_request_param *param)
+		struct async_request_param *param, const struct timespec *ts)
 {
 	struct rte_mp_msg *reply_msg;
 	struct pending_request *pending_req, *exist;
@@ -898,6 +841,13 @@ mp_request_async(const char *dst, struct rte_mp_msg *req,
 
 	param->user_reply.nb_sent++;
 
+	if (rte_eal_alarm_set(ts->tv_sec * 1000000 + ts->tv_nsec / 1000,
+			      async_reply_handle, pending_req) < 0) {
+		RTE_LOG(ERR, EAL, "Fail to set alarm for request %s:%s\n",
+			dst, req->name);
+		rte_panic("Fix the above shit to properly free all memory\n");
+	}
+
 	return 0;
 fail:
 	free(pending_req);
@@ -988,6 +938,12 @@ rte_mp_request_sync(struct rte_mp_msg *req, struct rte_mp_reply *reply,
 
 	if (check_input(req) == false)
 		return -1;
+
+	if (internal_config.no_shconf) {
+		RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC is disabled\n");
+		return 0;
+	}
+
 	if (gettimeofday(&now, NULL) < 0) {
 		RTE_LOG(ERR, EAL, "Faile to get current time\n");
 		rte_errno = errno;
@@ -1020,7 +976,7 @@ rte_mp_request_sync(struct rte_mp_msg *req, struct rte_mp_reply *reply,
 
 	dir_fd = dirfd(mp_dir);
 	/* lock the directory to prevent processes spinning up while we send */
-	if (flock(dir_fd, LOCK_EX)) {
+	if (flock(dir_fd, LOCK_SH)) {
 		RTE_LOG(ERR, EAL, "Unable to lock directory %s\n",
 			mp_dir_path);
 		closedir(mp_dir);
@@ -1072,6 +1028,12 @@ rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts,
 
 	if (check_input(req) == false)
 		return -1;
+
+	if (internal_config.no_shconf) {
+		RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC is disabled\n");
+		return 0;
+	}
+
 	if (gettimeofday(&now, NULL) < 0) {
 		RTE_LOG(ERR, EAL, "Faile to get current time\n");
 		rte_errno = errno;
@@ -1119,7 +1081,7 @@ rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts,
 
 	/* for secondary process, send request to the primary process only */
 	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
-		ret = mp_request_async(eal_mp_socket_path(), copy, param);
+		ret = mp_request_async(eal_mp_socket_path(), copy, param, ts);
 
 		/* if we didn't send anything, put dummy request on the queue */
 		if (ret == 0 && reply->nb_sent == 0) {
@@ -1146,7 +1108,7 @@ rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts,
 	dir_fd = dirfd(mp_dir);
 
 	/* lock the directory to prevent processes spinning up while we send */
-	if (flock(dir_fd, LOCK_EX)) {
+	if (flock(dir_fd, LOCK_SH)) {
 		RTE_LOG(ERR, EAL, "Unable to lock directory %s\n",
 			mp_dir_path);
 		rte_errno = errno;
@@ -1162,7 +1124,7 @@ rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts,
 		snprintf(path, sizeof(path), "%s/%s", mp_dir_path,
 			 ent->d_name);
 
-		if (mp_request_async(path, copy, param))
+		if (mp_request_async(path, copy, param, ts))
 			ret = -1;
 	}
 	/* if we didn't send anything, put dummy request on the queue */
@@ -1171,9 +1133,6 @@ rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts,
 		dummy_used = true;
 	}
 
-	/* trigger async request thread wake up */
-	pthread_cond_signal(&pending_requests.async_cond);
-
 	/* finally, unlock the queue */
 	pthread_mutex_unlock(&pending_requests.lock);
 
@@ -1213,5 +1172,10 @@ rte_mp_reply(struct rte_mp_msg *msg, const char *peer)
 		return -1;
 	}
 
+	if (internal_config.no_shconf) {
+		RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC is disabled\n");
+		return 0;
+	}
+
 	return mp_send(msg, peer, MP_REP);
 }
diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index 42398630..48ef4d6d 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -175,7 +175,7 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
 
 	params = malloc(sizeof(*params));
 	if (!params)
-		return -1;
+		return -ENOMEM;
 
 	params->start_routine = start_routine;
 	params->arg = arg;
@@ -185,13 +185,14 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
 	ret = pthread_create(thread, attr, rte_thread_init, (void *)params);
 	if (ret != 0) {
 		free(params);
-		return ret;
+		return -ret;
 	}
 
 	if (name != NULL) {
 		ret = rte_thread_setname(*thread, name);
 		if (ret < 0)
-			goto fail;
+			RTE_LOG(DEBUG, EAL,
+				"Cannot set name for ctrl thread\n");
 	}
 
 	cpu_found = 0;
@@ -227,5 +228,5 @@ fail:
 	}
 	pthread_cancel(*thread);
 	pthread_join(*thread, NULL);
-	return ret;
+	return -ret;
 }
diff --git a/lib/librte_eal/common/eal_common_uuid.c b/lib/librte_eal/common/eal_common_uuid.c
new file mode 100644
index 00000000..1b93c5b3
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_uuid.c
@@ -0,0 +1,193 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (C) 1996, 1997 Theodore Ts'o.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, and the entire permission notice in its entirety,
+ *    including the disclaimer of warranties.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ *    products derived from this software without specific prior
+ *    written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
+ * WHICH ARE HEREBY DISCLAIMED.	 IN NO EVENT SHALL THE AUTHOR BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <ctype.h>
+
+#include <rte_uuid.h>
+
+/* UUID packed form */
+struct uuid {
+	uint32_t	time_low;
+	uint16_t	time_mid;
+	uint16_t	time_hi_and_version;
+	uint16_t	clock_seq;
+	uint8_t		node[6];
+};
+
+static void uuid_pack(const struct uuid *uu, rte_uuid_t ptr)
+{
+	uint32_t tmp;
+	uint8_t	*out = ptr;
+
+	tmp = uu->time_low;
+	out[3] = (uint8_t) tmp;
+	tmp >>= 8;
+	out[2] = (uint8_t) tmp;
+	tmp >>= 8;
+	out[1] = (uint8_t) tmp;
+	tmp >>= 8;
+	out[0] = (uint8_t) tmp;
+
+	tmp = uu->time_mid;
+	out[5] = (uint8_t) tmp;
+	tmp >>= 8;
+	out[4] = (uint8_t) tmp;
+
+	tmp = uu->time_hi_and_version;
+	out[7] = (uint8_t) tmp;
+	tmp >>= 8;
+	out[6] = (uint8_t) tmp;
+
+	tmp = uu->clock_seq;
+	out[9] = (uint8_t) tmp;
+	tmp >>= 8;
+	out[8] = (uint8_t) tmp;
+
+	memcpy(out+10, uu->node, 6);
+}
+
+static void uuid_unpack(const rte_uuid_t in, struct uuid *uu)
+{
+	const uint8_t *ptr = in;
+	uint32_t tmp;
+
+	tmp = *ptr++;
+	tmp = (tmp << 8) | *ptr++;
+	tmp = (tmp << 8) | *ptr++;
+	tmp = (tmp << 8) | *ptr++;
+	uu->time_low = tmp;
+
+	tmp = *ptr++;
+	tmp = (tmp << 8) | *ptr++;
+	uu->time_mid = tmp;
+
+	tmp = *ptr++;
+	tmp = (tmp << 8) | *ptr++;
+	uu->time_hi_and_version = tmp;
+
+	tmp = *ptr++;
+	tmp = (tmp << 8) | *ptr++;
+	uu->clock_seq = tmp;
+
+	memcpy(uu->node, ptr, 6);
+}
+
+bool rte_uuid_is_null(const rte_uuid_t uu)
+{
+	const uint8_t *cp = uu;
+	int i;
+
+	for (i = 0; i < 16; i++)
+		if (*cp++)
+			return false;
+	return true;
+}
+
+/*
+ * rte_uuid_compare() - compare two UUIDs.
+ */
+int rte_uuid_compare(const rte_uuid_t uu1, const rte_uuid_t uu2)
+{
+	struct uuid	uuid1, uuid2;
+
+	uuid_unpack(uu1, &uuid1);
+	uuid_unpack(uu2, &uuid2);
+
+#define UUCMP(u1, u2) \
+	do { if (u1 != u2) return (u1 < u2) ? -1 : 1; } while (0)
+
+	UUCMP(uuid1.time_low, uuid2.time_low);
+	UUCMP(uuid1.time_mid, uuid2.time_mid);
+	UUCMP(uuid1.time_hi_and_version, uuid2.time_hi_and_version);
+	UUCMP(uuid1.clock_seq, uuid2.clock_seq);
+#undef UUCMP
+
+	return memcmp(uuid1.node, uuid2.node, 6);
+}
+
+int rte_uuid_parse(const char *in, rte_uuid_t uu)
+{
+	struct uuid	uuid;
+	int		i;
+	const char	*cp;
+	char		buf[3];
+
+	if (strlen(in) != 36)
+		return -1;
+
+	for (i = 0, cp = in; i <= 36; i++, cp++) {
+		if ((i == 8) || (i == 13) || (i == 18) ||
+		    (i == 23)) {
+			if (*cp == '-')
+				continue;
+			else
+				return -1;
+		}
+		if (i == 36)
+			if (*cp == 0)
+				continue;
+		if (!isxdigit(*cp))
+			return -1;
+	}
+
+	uuid.time_low = strtoul(in, NULL, 16);
+	uuid.time_mid = strtoul(in+9, NULL, 16);
+	uuid.time_hi_and_version = strtoul(in+14, NULL, 16);
+	uuid.clock_seq = strtoul(in+19, NULL, 16);
+	cp = in+24;
+	buf[2] = 0;
+
+	for (i = 0; i < 6; i++) {
+		buf[0] = *cp++;
+		buf[1] = *cp++;
+		uuid.node[i] = strtoul(buf, NULL, 16);
+	}
+
+	uuid_pack(&uuid, uu);
+	return 0;
+}
+
+void rte_uuid_unparse(const rte_uuid_t uu, char *out, size_t len)
+{
+	struct uuid uuid;
+
+	uuid_unpack(uu, &uuid);
+
+	snprintf(out, len,
+		 "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x",
+		uuid.time_low, uuid.time_mid, uuid.time_hi_and_version,
+		uuid.clock_seq >> 8, uuid.clock_seq & 0xFF,
+		uuid.node[0], uuid.node[1], uuid.node[2],
+		uuid.node[3], uuid.node[4], uuid.node[5]);
+}
diff --git a/lib/librte_eal/common/eal_filesystem.h b/lib/librte_eal/common/eal_filesystem.h
index 364f38d1..de05febf 100644
--- a/lib/librte_eal/common/eal_filesystem.h
+++ b/lib/librte_eal/common/eal_filesystem.h
@@ -12,7 +12,6 @@
 #define EAL_FILESYSTEM_H
 
 /** Path of rte config file. */
-#define RUNTIME_CONFIG_FMT "%s/.%s_config"
 
 #include <stdint.h>
 #include <limits.h>
@@ -30,17 +29,14 @@ eal_create_runtime_dir(void);
 const char *
 eal_get_runtime_dir(void);
 
+#define RUNTIME_CONFIG_FNAME "config"
 static inline const char *
 eal_runtime_config_path(void)
 {
 	static char buffer[PATH_MAX]; /* static so auto-zeroed */
-	const char *directory = "/var/run";
-	const char *home_dir = getenv("HOME");
 
-	if (getuid() != 0 && home_dir != NULL)
-		directory = home_dir;
-	snprintf(buffer, sizeof(buffer) - 1, RUNTIME_CONFIG_FMT, directory,
-			internal_config.hugefile_prefix);
+	snprintf(buffer, sizeof(buffer) - 1, "%s/%s", eal_get_runtime_dir(),
+			RUNTIME_CONFIG_FNAME);
 	return buffer;
 }
 
diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
index c4cbf3ac..00ee6e06 100644
--- a/lib/librte_eal/common/eal_internal_cfg.h
+++ b/lib/librte_eal/common/eal_internal_cfg.h
@@ -41,11 +41,17 @@ struct internal_config {
 	volatile unsigned vmware_tsc_map; /**< true to use VMware TSC mapping
 										* instead of native TSC */
 	volatile unsigned no_shconf;      /**< true if there is no shared config */
+	volatile unsigned in_memory;
+	/**< true if DPDK should operate entirely in-memory and not create any
+	 * shared files or runtime data.
+	 */
 	volatile unsigned create_uio_dev; /**< true to create /dev/uioX devices */
 	volatile enum rte_proc_type_t process_type; /**< multi-process proc type */
 	/** true to try allocating memory on specific sockets */
 	volatile unsigned force_sockets;
 	volatile uint64_t socket_mem[RTE_MAX_NUMA_NODES]; /**< amount of memory per socket */
+	volatile unsigned force_socket_limits;
+	volatile uint64_t socket_limit[RTE_MAX_NUMA_NODES]; /**< limit amount of memory per socket */
 	uintptr_t base_virtaddr;          /**< base address to try and reserve memory from */
 	volatile unsigned legacy_mem;
 	/**< true to enable legacy memory behavior (no dynamic allocation,
diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
index 211ae06a..96e16678 100644
--- a/lib/librte_eal/common/eal_options.h
+++ b/lib/librte_eal/common/eal_options.h
@@ -45,8 +45,12 @@ enum {
 	OPT_NO_PCI_NUM,
 #define OPT_NO_SHCONF         "no-shconf"
 	OPT_NO_SHCONF_NUM,
+#define OPT_IN_MEMORY         "in-memory"
+	OPT_IN_MEMORY_NUM,
 #define OPT_SOCKET_MEM        "socket-mem"
 	OPT_SOCKET_MEM_NUM,
+#define OPT_SOCKET_LIMIT        "socket-limit"
+	OPT_SOCKET_LIMIT_NUM,
 #define OPT_SYSLOG            "syslog"
 	OPT_SYSLOG_NUM,
 #define OPT_VDEV              "vdev"
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index bdadc4d5..4f809a83 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -47,6 +47,18 @@ void eal_log_set_default(FILE *default_log);
 int rte_eal_cpu_init(void);
 
 /**
+ * Create memseg lists
+ *
+ * This function is private to EAL.
+ *
+ * Preallocate virtual memory.
+ *
+ * @return
+ *   0 on success, negative on error
+ */
+int rte_eal_memseg_init(void);
+
+/**
  * Map memory
  *
  * This function is private to EAL.
@@ -258,4 +270,38 @@ int rte_mp_channel_init(void);
  */
 void dev_callback_process(char *device_name, enum rte_dev_event_type event);
 
+/**
+ * @internal
+ * Parse a device string and store its information in an
+ * rte_devargs structure.
+ *
+ * A device description is split by layers of abstraction of the device:
+ * bus, class and driver. Each layer will offer a set of properties that
+ * can be applied either to configure or recognize a device.
+ *
+ * This function will parse those properties and prepare the rte_devargs
+ * to be given to each layers for processing.
+ *
+ * Note: if the "data" field of the devargs points to devstr,
+ * then no dynamic allocation is performed and the rte_devargs
+ * can be safely discarded.
+ *
+ * Otherwise ``data`` will hold a workable copy of devstr, that will be
+ * used by layers descriptors within rte_devargs. In this case,
+ * any rte_devargs should be cleaned-up before being freed.
+ *
+ * @param da
+ *   rte_devargs structure to fill.
+ *
+ * @param devstr
+ *   Device string.
+ *
+ * @return
+ *   0 on success.
+ *   Negative errno values on error (rte_errno is set).
+ */
+int
+rte_devargs_layers_parse(struct rte_devargs *devargs,
+			 const char *devstr);
+
 #endif /* _EAL_PRIVATE_H_ */
diff --git a/lib/librte_eal/common/include/rte_bitmap.h b/lib/librte_eal/common/include/rte_bitmap.h
index 7d4935fc..d9facc64 100644
--- a/lib/librte_eal/common/include/rte_bitmap.h
+++ b/lib/librte_eal/common/include/rte_bitmap.h
@@ -198,12 +198,12 @@ rte_bitmap_get_memory_footprint(uint32_t n_bits) {
 /**
  * Bitmap initialization
  *
- * @param mem_size
- *   Minimum expected size of bitmap.
+ * @param n_bits
+ *   Number of pre-allocated bits in array2.
  * @param mem
  *   Base address of array1 and array2.
- * @param n_bits
- *   Number of pre-allocated bits in array2. Must be non-zero and multiple of 512.
+ * @param mem_size
+ *   Minimum expected size of bitmap.
  * @return
  *   Handle to bitmap instance.
  */
diff --git a/lib/librte_eal/common/include/rte_bus.h b/lib/librte_eal/common/include/rte_bus.h
index eb9eded4..b7b5b084 100644
--- a/lib/librte_eal/common/include/rte_bus.h
+++ b/lib/librte_eal/common/include/rte_bus.h
@@ -211,6 +211,7 @@ struct rte_bus {
 	rte_bus_parse_t parse;       /**< Parse a device name */
 	struct rte_bus_conf conf;    /**< Bus configuration */
 	rte_bus_get_iommu_class_t get_iommu_class; /**< Get iommu class */
+	rte_dev_iterate_t dev_iterate; /**< Device iterator. */
 };
 
 /**
@@ -325,8 +326,7 @@ enum rte_iova_mode rte_bus_get_iommu_class(void);
  * The constructor has higher priority than PMD constructors.
  */
 #define RTE_REGISTER_BUS(nm, bus) \
-RTE_INIT_PRIO(businitfn_ ##nm, BUS); \
-static void businitfn_ ##nm(void) \
+RTE_INIT_PRIO(businitfn_ ##nm, BUS) \
 {\
 	(bus).name = RTE_STR(nm);\
 	rte_bus_register(&bus); \
diff --git a/lib/librte_eal/common/include/rte_class.h b/lib/librte_eal/common/include/rte_class.h
new file mode 100644
index 00000000..276c91e9
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_class.h
@@ -0,0 +1,134 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 Gaëtan Rivet
+ */
+
+#ifndef _RTE_CLASS_H_
+#define _RTE_CLASS_H_
+
+/**
+ * @file
+ *
+ * DPDK device class interface.
+ *
+ * This file describes the interface of the device class
+ * abstraction layer.
+ *
+ * A device class defines the type of function a device
+ * will be used for e.g.: Ethernet adapter (eth),
+ * cryptographic coprocessor (crypto), etc.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/queue.h>
+
+#include <rte_dev.h>
+
+/** Double linked list of classes */
+TAILQ_HEAD(rte_class_list, rte_class);
+
+/**
+ * A structure describing a generic device class.
+ */
+struct rte_class {
+	TAILQ_ENTRY(rte_class) next; /**< Next device class in linked list */
+	const char *name; /**< Name of the class */
+	rte_dev_iterate_t dev_iterate; /**< Device iterator. */
+};
+
+/**
+ * Class comparison function.
+ *
+ * @param cls
+ *	Class under test.
+ *
+ * @param data
+ *	Data to compare against.
+ *
+ * @return
+ *	0 if the class matches the data.
+ *	!0 if the class does not match.
+ *	<0 if ordering is possible and the class is lower than the data.
+ *	>0 if ordering is possible and the class is greater than the data.
+ */
+typedef int (*rte_class_cmp_t)(const struct rte_class *cls, const void *data);
+
+/**
+ * Class iterator to find a particular class.
+ *
+ * This function compares each registered class to find one that matches
+ * the data passed as parameter.
+ *
+ * If the comparison function returns zero this function will stop iterating
+ * over any more classes. To continue a search the class of a previous search
+ * can be passed via the start parameter.
+ *
+ * @param start
+ *	Starting point for the iteration.
+ *
+ * @param cmp
+ *	Comparison function.
+ *
+ * @param data
+ *	 Data to pass to comparison function.
+ *
+ * @return
+ *	 A pointer to a rte_class structure or NULL in case no class matches
+ */
+__rte_experimental
+struct rte_class *
+rte_class_find(const struct rte_class *start, rte_class_cmp_t cmp,
+	       const void *data);
+
+/**
+ * Find the registered class for a given name.
+ */
+__rte_experimental
+struct rte_class *
+rte_class_find_by_name(const char *name);
+
+/**
+ * Register a Class handle.
+ *
+ * @param cls
+ *   A pointer to a rte_class structure describing the class
+ *   to be registered.
+ */
+__rte_experimental
+void rte_class_register(struct rte_class *cls);
+
+/**
+ * Unregister a Class handle.
+ *
+ * @param cls
+ *   A pointer to a rte_class structure describing the class
+ *   to be unregistered.
+ */
+__rte_experimental
+void rte_class_unregister(struct rte_class *cls);
+
+/**
+ * Helper for Class registration.
+ * The constructor has lower priority than Bus constructors.
+ * The constructor has higher priority than PMD constructors.
+ */
+#define RTE_REGISTER_CLASS(nm, cls) \
+RTE_INIT_PRIO(classinitfn_ ##nm, CLASS) \
+{\
+	(cls).name = RTE_STR(nm); \
+	rte_class_register(&cls); \
+}
+
+#define RTE_UNREGISTER_CLASS(nm, cls) \
+RTE_FINI_PRIO(classfinifn_ ##nm, CLASS) \
+{ \
+	rte_class_unregister(&cls); \
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_CLASS_H_ */
diff --git a/lib/librte_eal/common/include/rte_common.h b/lib/librte_eal/common/include/rte_common.h
index 434adfd4..069c13ec 100644
--- a/lib/librte_eal/common/include/rte_common.h
+++ b/lib/librte_eal/common/include/rte_common.h
@@ -83,6 +83,7 @@ typedef uint16_t unaligned_uint16_t;
 
 #define RTE_PRIORITY_LOG 101
 #define RTE_PRIORITY_BUS 110
+#define RTE_PRIORITY_CLASS 120
 #define RTE_PRIORITY_LAST 65535
 
 #define RTE_PRIO(prio) \
@@ -112,6 +113,29 @@ static void __attribute__((constructor(RTE_PRIO(prio)), used)) func(void)
 	RTE_INIT_PRIO(func, LAST)
 
 /**
+ * Run after main() with low priority.
+ *
+ * @param func
+ *   Destructor function name.
+ * @param prio
+ *   Priority number must be above 100.
+ *   Lowest number is the last to run.
+ */
+#define RTE_FINI_PRIO(func, prio) \
+static void __attribute__((destructor(RTE_PRIO(prio)), used)) func(void)
+
+/**
+ * Run after main() with high priority.
+ *
+ * The destructor will be run *before* prioritized destructors.
+ *
+ * @param func
+ *   Destructor function name.
+ */
+#define RTE_FINI(func) \
+	RTE_FINI_PRIO(func, LAST)
+
+/**
  * Force a function to be inlined
  */
 #define __rte_always_inline inline __attribute__((always_inline))
@@ -294,6 +318,11 @@ rte_combine64ms1b(register uint64_t v)
 /*********** Macros to work with powers of 2 ********/
 
 /**
+ * Macro to return 1 if n is a power of 2, 0 otherwise
+ */
+#define RTE_IS_POWER_OF_2(n) ((n) && !(((n) - 1) & (n)))
+
+/**
  * Returns true if n is a power of 2
  * @param n
  *     Number to check
diff --git a/lib/librte_eal/common/include/rte_dev.h b/lib/librte_eal/common/include/rte_dev.h
index 3879ff3c..b80a8059 100644
--- a/lib/librte_eal/common/include/rte_dev.h
+++ b/lib/librte_eal/common/include/rte_dev.h
@@ -69,9 +69,7 @@ rte_pmd_debug_trace(const char *func_name, const char *fmt, ...)
  * Enable RTE_PMD_DEBUG_TRACE() when at least one component relying on the
  * RTE_*_RET() macros defined below is compiled in debug mode.
  */
-#if defined(RTE_LIBRTE_ETHDEV_DEBUG) || \
-	defined(RTE_LIBRTE_CRYPTODEV_DEBUG) || \
-	defined(RTE_LIBRTE_EVENTDEV_DEBUG)
+#if defined(RTE_LIBRTE_EVENTDEV_DEBUG)
 #define RTE_PMD_DEBUG_TRACE(...) \
 	rte_pmd_debug_trace(__func__, __VA_ARGS__)
 #else
@@ -176,6 +174,7 @@ struct rte_device {
  * @return
  *   0 on success, negative on error.
  */
+__rte_deprecated
 int rte_eal_dev_attach(const char *name, const char *devargs);
 
 /**
@@ -186,6 +185,7 @@ int rte_eal_dev_attach(const char *name, const char *devargs);
  * @return
  *   0 on success, negative on error.
  */
+__rte_deprecated
 int rte_eal_dev_detach(struct rte_device *dev);
 
 /**
@@ -285,6 +285,103 @@ __attribute__((used)) = str
 static const char DRV_EXP_TAG(name, kmod_dep_export)[] \
 __attribute__((used)) = str
 
+/**
+ * Iteration context.
+ *
+ * This context carries over the current iteration state.
+ */
+struct rte_dev_iterator {
+	const char *dev_str; /**< device string. */
+	const char *bus_str; /**< bus-related part of device string. */
+	const char *cls_str; /**< class-related part of device string. */
+	struct rte_bus *bus; /**< bus handle. */
+	struct rte_class *cls; /**< class handle. */
+	struct rte_device *device; /**< current position. */
+	void *class_device; /**< additional specialized context. */
+};
+
+/**
+ * Device iteration function.
+ *
+ * Find the next device matching properties passed in parameters.
+ * The function takes an additional ``start`` parameter, that is
+ * used as starting context when relevant.
+ *
+ * The function returns the current element in the iteration.
+ * This return value will potentially be used as a start parameter
+ * in subsequent calls to the function.
+ *
+ * The additional iterator parameter is only there if a specific
+ * implementation needs additional context. It must not be modified by
+ * the iteration function itself.
+ *
+ * @param start
+ *   Starting iteration context.
+ *
+ * @param devstr
+ *   Device description string.
+ *
+ * @param it
+ *   Device iterator.
+ *
+ * @return
+ *   The address of the current element matching the device description
+ *   string.
+ */
+typedef void *(*rte_dev_iterate_t)(const void *start,
+				   const char *devstr,
+				   const struct rte_dev_iterator *it);
+
+/**
+ * Initializes a device iterator.
+ *
+ * This iterator allows accessing a list of devices matching a criteria.
+ * The device matching is made among all buses and classes currently registered,
+ * filtered by the device description given as parameter.
+ *
+ * This function will not allocate any memory. It is safe to stop the
+ * iteration at any moment and let the iterator go out of context.
+ *
+ * @param it
+ *   Device iterator handle.
+ *
+ * @param str
+ *   Device description string.
+ *
+ * @return
+ *   0 on successful initialization.
+ *   <0 on error.
+ */
+__rte_experimental
+int
+rte_dev_iterator_init(struct rte_dev_iterator *it, const char *str);
+
+/**
+ * Iterates on a device iterator.
+ *
+ * Generates a new rte_device handle corresponding to the next element
+ * in the list described in comprehension by the iterator.
+ *
+ * The next object is returned, and the iterator is updated.
+ *
+ * @param it
+ *   Device iterator handle.
+ *
+ * @return
+ *   An rte_device handle if found.
+ *   NULL if an error occurred (rte_errno is set).
+ *   NULL if no device could be found (rte_errno is not set).
+ */
+__rte_experimental
+struct rte_device *
+rte_dev_iterator_next(struct rte_dev_iterator *it);
+
+#define RTE_DEV_FOREACH(dev, devstr, it) \
+	for (rte_dev_iterator_init(it, devstr), \
+	     dev = rte_dev_iterator_next(it); \
+	     dev != NULL; \
+	     dev = rte_dev_iterator_next(it))
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_eal/common/include/rte_devargs.h b/lib/librte_eal/common/include/rte_devargs.h
index 58fbd90a..097a4ce7 100644
--- a/lib/librte_eal/common/include/rte_devargs.h
+++ b/lib/librte_eal/common/include/rte_devargs.h
@@ -51,12 +51,19 @@ struct rte_devargs {
 	enum rte_devtype type;
 	/** Device policy. */
 	enum rte_dev_policy policy;
-	/** Bus handle for the device. */
-	struct rte_bus *bus;
 	/** Name of the device. */
 	char name[RTE_DEV_NAME_MAX_LEN];
+	RTE_STD_C11
+	union {
 	/** Arguments string as given by user or "" for no argument. */
-	char *args;
+		char *args;
+		const char *drv_str;
+	};
+	struct rte_bus *bus; /**< bus handle. */
+	struct rte_class *cls; /**< class handle. */
+	const char *bus_str; /**< bus-related part of device string. */
+	const char *cls_str; /**< class-related part of device string. */
+	const char *data; /**< Device string storage. */
 };
 
 /**
@@ -96,6 +103,42 @@ int rte_eal_parse_devargs_str(const char *devargs_str,
  * in argument. Store which bus will handle the device, its name
  * and the eventual device parameters.
  *
+ * The syntax is:
+ *
+ *     bus:device_identifier,arg1=val1,arg2=val2
+ *
+ * where "bus:" is the bus name followed by any character separator.
+ * The bus name is optional. If no bus name is specified, each bus
+ * will attempt to recognize the device identifier. The first one
+ * to succeed will be used.
+ *
+ * Examples:
+ *
+ *     pci:0000:05.00.0,arg=val
+ *     05.00.0,arg=val
+ *     vdev:net_ring0
+ *
+ * @param da
+ *   The devargs structure holding the device information.
+ *
+ * @param dev
+ *   String describing a device.
+ *
+ * @return
+ *   - 0 on success.
+ *   - Negative errno on error.
+ */
+__rte_experimental
+int
+rte_devargs_parse(struct rte_devargs *da, const char *dev);
+
+/**
+ * Parse a device string.
+ *
+ * Verify that a bus is capable of handling the device passed
+ * in argument. Store which bus will handle the device, its name
+ * and the eventual device parameters.
+ *
  * The device string is built with a printf-like syntax.
  *
  * The syntax is:
@@ -124,8 +167,8 @@ int rte_eal_parse_devargs_str(const char *devargs_str,
  */
 __rte_experimental
 int
-rte_devargs_parse(struct rte_devargs *da,
-		  const char *format, ...)
+rte_devargs_parsef(struct rte_devargs *da,
+		   const char *format, ...)
 __attribute__((format(printf, 2, 0)));
 
 /**
diff --git a/lib/librte_eal/common/include/rte_eal.h b/lib/librte_eal/common/include/rte_eal.h
index 8de5d69e..e114dcbd 100644
--- a/lib/librte_eal/common/include/rte_eal.h
+++ b/lib/librte_eal/common/include/rte_eal.h
@@ -490,27 +490,13 @@ static inline int rte_gettid(void)
 enum rte_iova_mode rte_eal_iova_mode(void);
 
 /**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
  * Get user provided pool ops name for mbuf
  *
  * @return
  *   returns user provided pool ops name.
  */
-const char * __rte_experimental
-rte_eal_mbuf_user_pool_ops(void);
-
-/**
- * @deprecated
- * Get default pool ops name for mbuf
- *
- * @return
- *   returns default pool ops name.
- */
-__rte_deprecated
 const char *
-rte_eal_mbuf_default_mempool_ops(void);
+rte_eal_mbuf_user_pool_ops(void);
 
 #ifdef __cplusplus
 }
diff --git a/lib/librte_eal/common/include/rte_fbarray.h b/lib/librte_eal/common/include/rte_fbarray.h
index 3e61fffe..5d880551 100644
--- a/lib/librte_eal/common/include/rte_fbarray.h
+++ b/lib/librte_eal/common/include/rte_fbarray.h
@@ -336,6 +336,120 @@ rte_fbarray_find_contig_free(struct rte_fbarray *arr,
 int __rte_experimental
 rte_fbarray_find_contig_used(struct rte_fbarray *arr, unsigned int start);
 
+/**
+ * Find index of previous free element, starting at specified index.
+ *
+ * @param arr
+ *   Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ *   Element index to start search from.
+ *
+ * @return
+ *  - non-negative integer on success.
+ *  - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_find_prev_free(struct rte_fbarray *arr, unsigned int start);
+
+
+/**
+ * Find index of previous used element, starting at specified index.
+ *
+ * @param arr
+ *   Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ *   Element index to start search from.
+ *
+ * @return
+ *  - non-negative integer on success.
+ *  - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_find_prev_used(struct rte_fbarray *arr, unsigned int start);
+
+
+/**
+ * Find lowest start index of chunk of ``n`` free elements, down from specified
+ * index.
+ *
+ * @param arr
+ *   Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ *   Element index to start search from.
+ *
+ * @param n
+ *   Number of free elements to look for.
+ *
+ * @return
+ *  - non-negative integer on success.
+ *  - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_find_prev_n_free(struct rte_fbarray *arr, unsigned int start,
+		unsigned int n);
+
+
+/**
+ * Find lowest start index of chunk of ``n`` used elements, down from specified
+ * index.
+ *
+ * @param arr
+ *   Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ *   Element index to start search from.
+ *
+ * @param n
+ *   Number of used elements to look for.
+ *
+ * @return
+ *  - non-negative integer on success.
+ *  - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_find_prev_n_used(struct rte_fbarray *arr, unsigned int start,
+		unsigned int n);
+
+
+/**
+ * Find how many more free entries there are before specified index (like
+ * ``rte_fbarray_find_contig_free`` but going in reverse).
+ *
+ * @param arr
+ *   Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ *   Element index to start search from.
+ *
+ * @return
+ *  - non-negative integer on success.
+ *  - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_find_rev_contig_free(struct rte_fbarray *arr,
+		unsigned int start);
+
+
+/**
+ * Find how many more used entries there are before specified index (like
+ * ``rte_fbarray_find_contig_used`` but going in reverse).
+ *
+ * @param arr
+ *   Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ *   Element index to start search from.
+ *
+ * @return
+ *  - non-negative integer on success.
+ *  - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_find_rev_contig_used(struct rte_fbarray *arr, unsigned int start);
+
 
 /**
  * Dump ``rte_fbarray`` metadata.
diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h
index aab9f6fe..c4b7f4cf 100644
--- a/lib/librte_eal/common/include/rte_memory.h
+++ b/lib/librte_eal/common/include/rte_memory.h
@@ -264,6 +264,60 @@ int __rte_experimental
 rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg);
 
 /**
+ * Walk list of all memsegs without performing any locking.
+ *
+ * @note This function does not perform any locking, and is only safe to call
+ *       from within memory-related callback functions.
+ *
+ * @param func
+ *   Iterator function
+ * @param arg
+ *   Argument passed to iterator
+ * @return
+ *   0 if walked over the entire list
+ *   1 if stopped by the user
+ *   -1 if user function reported error
+ */
+int __rte_experimental
+rte_memseg_walk_thread_unsafe(rte_memseg_walk_t func, void *arg);
+
+/**
+ * Walk each VA-contiguous area without performing any locking.
+ *
+ * @note This function does not perform any locking, and is only safe to call
+ *       from within memory-related callback functions.
+ *
+ * @param func
+ *   Iterator function
+ * @param arg
+ *   Argument passed to iterator
+ * @return
+ *   0 if walked over the entire list
+ *   1 if stopped by the user
+ *   -1 if user function reported error
+ */
+int __rte_experimental
+rte_memseg_contig_walk_thread_unsafe(rte_memseg_contig_walk_t func, void *arg);
+
+/**
+ * Walk each allocated memseg list without performing any locking.
+ *
+ * @note This function does not perform any locking, and is only safe to call
+ *       from within memory-related callback functions.
+ *
+ * @param func
+ *   Iterator function
+ * @param arg
+ *   Argument passed to iterator
+ * @return
+ *   0 if walked over the entire list
+ *   1 if stopped by the user
+ *   -1 if user function reported error
+ */
+int __rte_experimental
+rte_memseg_list_walk_thread_unsafe(rte_memseg_list_walk_t func, void *arg);
+
+/**
  * Dump the physical memory layout to a file.
  *
  * @note This function read-locks the memory hotplug subsystem, and thus cannot
diff --git a/lib/librte_eal/common/include/rte_memzone.h b/lib/librte_eal/common/include/rte_memzone.h
index ef370fa6..f478fa9e 100644
--- a/lib/librte_eal/common/include/rte_memzone.h
+++ b/lib/librte_eal/common/include/rte_memzone.h
@@ -81,8 +81,12 @@ struct rte_memzone {
  *   memzones from memory that is already available. It will not trigger any
  *   new allocations.
  *
- * @note Reserving IOVA-contiguous memzones with len set to 0 is not currently
- *   supported.
+ * @note: When reserving memzones with len set to 0, it is preferable to also
+ *   set a valid socket_id. Setting socket_id to SOCKET_ID_ANY is supported, but
+ *   will likely not yield expected results. Specifically, the resulting memzone
+ *   may not necessarily be the biggest memzone available, but rather biggest
+ *   memzone available on socket id corresponding to an lcore from which
+ *   reservation was called.
  *
  * @param name
  *   The name of the memzone. If it already exists, the function will
@@ -141,8 +145,12 @@ const struct rte_memzone *rte_memzone_reserve(const char *name,
  *   memzones from memory that is already available. It will not trigger any
  *   new allocations.
  *
- * @note Reserving IOVA-contiguous memzones with len set to 0 is not currently
- *   supported.
+ * @note: When reserving memzones with len set to 0, it is preferable to also
+ *   set a valid socket_id. Setting socket_id to SOCKET_ID_ANY is supported, but
+ *   will likely not yield expected results. Specifically, the resulting memzone
+ *   may not necessarily be the biggest memzone available, but rather biggest
+ *   memzone available on socket id corresponding to an lcore from which
+ *   reservation was called.
  *
  * @param name
  *   The name of the memzone. If it already exists, the function will
@@ -206,8 +214,12 @@ const struct rte_memzone *rte_memzone_reserve_aligned(const char *name,
  *   memzones from memory that is already available. It will not trigger any
  *   new allocations.
  *
- * @note Reserving IOVA-contiguous memzones with len set to 0 is not currently
- *   supported.
+ * @note: When reserving memzones with len set to 0, it is preferable to also
+ *   set a valid socket_id. Setting socket_id to SOCKET_ID_ANY is supported, but
+ *   will likely not yield expected results. Specifically, the resulting memzone
+ *   may not necessarily be the biggest memzone available, but rather biggest
+ *   memzone available on socket id corresponding to an lcore from which
+ *   reservation was called.
  *
  * @param name
  *   The name of the memzone. If it already exists, the function will
diff --git a/lib/librte_eal/common/include/rte_service.h b/lib/librte_eal/common/include/rte_service.h
index aea4d91b..34b41aff 100644
--- a/lib/librte_eal/common/include/rte_service.h
+++ b/lib/librte_eal/common/include/rte_service.h
@@ -162,6 +162,26 @@ int32_t rte_service_runstate_set(uint32_t id, uint32_t runstate);
 int32_t rte_service_runstate_get(uint32_t id);
 
 /**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * This function returns whether the service may be currently executing on
+ * at least one lcore, or definitely is not. This function can be used to
+ * determine if, after setting the service runstate to stopped, the service
+ * is still executing a service lcore.
+ *
+ * Care must be taken if calling this function when the service runstate is
+ * running, since the result of this function may be incorrect by the time the
+ * function returns due to service cores running in parallel.
+ *
+ * @retval 1 Service may be running on one or more lcores
+ * @retval 0 Service is not running on any lcore
+ * @retval -EINVAL Invalid service id
+ */
+int32_t __rte_experimental
+rte_service_may_be_active(uint32_t id);
+
+/**
  * Enable or disable the check for a service-core being mapped to the service.
  * An application can disable the check when takes the responsibility to run a
  * service itself using *rte_service_run_iter_on_app_lcore*.
@@ -363,6 +383,42 @@ int32_t rte_service_attr_get(uint32_t id, uint32_t attr_id,
  */
 int32_t rte_service_attr_reset_all(uint32_t id);
 
+/**
+ * Returns the number of times the service runner has looped.
+ */
+#define RTE_SERVICE_LCORE_ATTR_LOOPS 0
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Get an attribute from a service core.
+ *
+ * @param lcore Id of the service core.
+ * @param attr_id Id of the attribute to be retrieved.
+ * @param [out] attr_value Pointer to storage in which to write retrieved value.
+ * @retval 0 Success, the attribute value has been written to *attr_value*.
+ *         -EINVAL Invalid lcore, attr_id or attr_value was NULL.
+ *         -ENOTSUP lcore is not a service core.
+ */
+int32_t __rte_experimental
+rte_service_lcore_attr_get(uint32_t lcore, uint32_t attr_id,
+			   uint64_t *attr_value);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Reset all attribute values of a service core.
+ *
+ * @param lcore The service core to reset all the statistics of
+ * @retval 0 Successfully reset attributes
+ *         -EINVAL Invalid service id provided
+ *         -ENOTSUP lcore is not a service core.
+ */
+int32_t __rte_experimental
+rte_service_lcore_attr_reset_all(uint32_t lcore);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_eal/common/include/rte_tailq.h b/lib/librte_eal/common/include/rte_tailq.h
index 8dccaefc..9b01abb2 100644
--- a/lib/librte_eal/common/include/rte_tailq.h
+++ b/lib/librte_eal/common/include/rte_tailq.h
@@ -119,8 +119,7 @@ struct rte_tailq_head *rte_eal_tailq_lookup(const char *name);
 int rte_eal_tailq_register(struct rte_tailq_elem *t);
 
 #define EAL_REGISTER_TAILQ(t) \
-RTE_INIT(tailqinitfn_ ##t); \
-static void tailqinitfn_ ##t(void) \
+RTE_INIT(tailqinitfn_ ##t) \
 { \
 	if (rte_eal_tailq_register(&t) < 0) \
 		rte_panic("Cannot initialize tailq: %s\n", t.name); \
diff --git a/lib/librte_eal/common/include/rte_uuid.h b/lib/librte_eal/common/include/rte_uuid.h
new file mode 100644
index 00000000..2c846b5f
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_uuid.h
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (C) 1996, 1997, 1998 Theodore Ts'o.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, and the entire permission notice in its entirety,
+ *    including the disclaimer of warranties.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ *    products derived from this software without specific prior
+ *    written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
+ * WHICH ARE HEREBY DISCLAIMED.	 IN NO EVENT SHALL THE AUTHOR BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ */
+/**
+ * @file
+ *
+ * UUID related functions originally from libuuid
+ */
+
+#ifndef _RTE_UUID_H_
+#define _RTE_UUID_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdbool.h>
+
+/**
+ * Struct describing a Universal Unique Identifer
+ */
+typedef unsigned char rte_uuid_t[16];
+
+/**
+ * Helper for defining UUID values for id tables.
+ */
+#define RTE_UUID_INIT(a, b, c, d, e) {		\
+	((a) >> 24) & 0xff, ((a) >> 16) & 0xff,	\
+	((a) >> 8) & 0xff, (a) & 0xff,		\
+	((b) >> 8) & 0xff, (b) & 0xff,		\
+	((c) >> 8) & 0xff, (c) & 0xff,		\
+	((d) >> 8) & 0xff, (d) & 0xff,		\
+	((e) >> 40) & 0xff, ((e) >> 32) & 0xff, \
+	((e) >> 24) & 0xff, ((e) >> 16) & 0xff, \
+	((e) >> 8) & 0xff, (e) & 0xff		\
+}
+
+/**
+ * Test if UUID is all zeros.
+ *
+ * @param uu
+ *    The uuid to check.
+ * @return
+ *    true if uuid is NULL value, false otherwise
+ */
+bool rte_uuid_is_null(const rte_uuid_t uu);
+
+/**
+ * Copy uuid.
+ *
+ * @param dst
+ *    Destination uuid
+ * @param src
+ *    Source uuid
+ */
+static inline void rte_uuid_copy(rte_uuid_t dst, const rte_uuid_t src)
+{
+	memcpy(dst, src, sizeof(rte_uuid_t));
+}
+
+/**
+ * Compare two UUID's
+ *
+ * @param a
+ *    A UUID to compare
+ * @param b
+ *    A UUID to compare
+ * @return
+ *   returns an integer less than, equal to, or greater than zero if UUID a is
+ *   is less than, equal, or greater than UUID b.
+ */
+int	rte_uuid_compare(const rte_uuid_t a, const rte_uuid_t b);
+
+/**
+ * Extract UUID from string
+ *
+ * @param in
+ *    Pointer to string of characters to convert
+ * @param uu
+ *    Destination UUID
+ * @return
+ *    Returns 0 on succes, and -1 if string is not a valid UUID.
+ */
+int	rte_uuid_parse(const char *in, rte_uuid_t uu);
+
+/**
+ * Convert UUID to string
+ *
+ * @param uu
+ *    UUID to format
+ * @param out
+ *    Resulting string buffer
+ * @param len
+ *    Sizeof the available string buffer
+ */
+#define RTE_UUID_STRLEN	(36 + 1)
+void	rte_uuid_unparse(const rte_uuid_t uu, char *out, size_t len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RTE_UUID_H */
diff --git a/lib/librte_eal/common/include/rte_version.h b/lib/librte_eal/common/include/rte_version.h
index 6e2a2362..7c6714a2 100644
--- a/lib/librte_eal/common/include/rte_version.h
+++ b/lib/librte_eal/common/include/rte_version.h
@@ -32,7 +32,7 @@ extern "C" {
 /**
  * Minor version/month number i.e. the mm in yy.mm.z
  */
-#define RTE_VER_MONTH 05
+#define RTE_VER_MONTH 8
 
 /**
  * Patch level number i.e. the z in yy.mm.z
diff --git a/lib/librte_eal/common/include/rte_vfio.h b/lib/librte_eal/common/include/rte_vfio.h
index f90972fa..5ca13fcc 100644
--- a/lib/librte_eal/common/include/rte_vfio.h
+++ b/lib/librte_eal/common/include/rte_vfio.h
@@ -179,7 +179,7 @@ rte_vfio_clear_group(int vfio_group_fd);
  *   0 if success.
  *   -1 on error.
  */
-int  __rte_experimental
+int
 rte_vfio_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len);
 
 
@@ -200,7 +200,7 @@ rte_vfio_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len);
  *   -1 on error.
  */
 
-int __rte_experimental
+int
 rte_vfio_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len);
 /**
  * Parse IOMMU group number for a device
@@ -222,7 +222,7 @@ rte_vfio_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len);
  *   0 for non-existent group or VFIO
  *  <0 for errors
  */
-int __rte_experimental
+int
 rte_vfio_get_group_num(const char *sysfs_base,
 		      const char *dev_addr, int *iommu_group_num);
 
@@ -236,7 +236,7 @@ rte_vfio_get_group_num(const char *sysfs_base,
  *  > 0 container fd
  *  < 0 for errors
  */
-int __rte_experimental
+int
 rte_vfio_get_container_fd(void);
 
 /**
@@ -252,13 +252,10 @@ rte_vfio_get_container_fd(void);
  *  > 0 group fd
  *  < 0 for errors
  */
-int __rte_experimental
+int
 rte_vfio_get_group_fd(int iommu_group_num);
 
 /**
- * @warning
- * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
- *
  * Create a new container for device binding.
  *
  * @note Any newly allocated DPDK memory will not be mapped into these
@@ -269,13 +266,10 @@ rte_vfio_get_group_fd(int iommu_group_num);
  *   the container fd if successful
  *   <0 if failed
  */
-int __rte_experimental
+int
 rte_vfio_container_create(void);
 
 /**
- * @warning
- * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
- *
  * Destroy the container, unbind all vfio groups within it.
  *
  * @param container_fd
@@ -285,13 +279,10 @@ rte_vfio_container_create(void);
  *    0 if successful
  *   <0 if failed
  */
-int __rte_experimental
+int
 rte_vfio_container_destroy(int container_fd);
 
 /**
- * @warning
- * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
- *
  * Bind a IOMMU group to a container.
  *
  * @param container_fd
@@ -304,13 +295,10 @@ rte_vfio_container_destroy(int container_fd);
  *   group fd if successful
  *   <0 if failed
  */
-int __rte_experimental
+int
 rte_vfio_container_group_bind(int container_fd, int iommu_group_num);
 
 /**
- * @warning
- * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
- *
  * Unbind a IOMMU group from a container.
  *
  * @param container_fd
@@ -323,13 +311,10 @@ rte_vfio_container_group_bind(int container_fd, int iommu_group_num);
  *    0 if successful
  *   <0 if failed
  */
-int __rte_experimental
+int
 rte_vfio_container_group_unbind(int container_fd, int iommu_group_num);
 
 /**
- * @warning
- * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
- *
  * Perform DMA mapping for devices in a container.
  *
  * @param container_fd
@@ -348,14 +333,11 @@ rte_vfio_container_group_unbind(int container_fd, int iommu_group_num);
  *    0 if successful
  *   <0 if failed
  */
-int __rte_experimental
+int
 rte_vfio_container_dma_map(int container_fd, uint64_t vaddr,
 		uint64_t iova, uint64_t len);
 
 /**
- * @warning
- * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
- *
  * Perform DMA unmapping for devices in a container.
  *
  * @param container_fd
@@ -374,7 +356,7 @@ rte_vfio_container_dma_map(int container_fd, uint64_t vaddr,
  *    0 if successful
  *   <0 if failed
  */
-int __rte_experimental
+int
 rte_vfio_container_dma_unmap(int container_fd, uint64_t vaddr,
 		uint64_t iova, uint64_t len);
 
diff --git a/lib/librte_eal/common/malloc_elem.c b/lib/librte_eal/common/malloc_elem.c
index 9bfe9b9b..e0a8ed15 100644
--- a/lib/librte_eal/common/malloc_elem.c
+++ b/lib/librte_eal/common/malloc_elem.c
@@ -18,10 +18,89 @@
 #include <rte_common.h>
 #include <rte_spinlock.h>
 
+#include "eal_internal_cfg.h"
 #include "eal_memalloc.h"
 #include "malloc_elem.h"
 #include "malloc_heap.h"
 
+size_t
+malloc_elem_find_max_iova_contig(struct malloc_elem *elem, size_t align)
+{
+	void *cur_page, *contig_seg_start, *page_end, *cur_seg_end;
+	void *data_start, *data_end;
+	rte_iova_t expected_iova;
+	struct rte_memseg *ms;
+	size_t page_sz, cur, max;
+
+	page_sz = (size_t)elem->msl->page_sz;
+	data_start = RTE_PTR_ADD(elem, MALLOC_ELEM_HEADER_LEN);
+	data_end = RTE_PTR_ADD(elem, elem->size - MALLOC_ELEM_TRAILER_LEN);
+	/* segment must start after header and with specified alignment */
+	contig_seg_start = RTE_PTR_ALIGN_CEIL(data_start, align);
+
+	/* if we're in IOVA as VA mode, or if we're in legacy mode with
+	 * hugepages, all elements are IOVA-contiguous.
+	 */
+	if (rte_eal_iova_mode() == RTE_IOVA_VA ||
+			(internal_config.legacy_mem && rte_eal_has_hugepages()))
+		return RTE_PTR_DIFF(data_end, contig_seg_start);
+
+	cur_page = RTE_PTR_ALIGN_FLOOR(contig_seg_start, page_sz);
+	ms = rte_mem_virt2memseg(cur_page, elem->msl);
+
+	/* do first iteration outside the loop */
+	page_end = RTE_PTR_ADD(cur_page, page_sz);
+	cur_seg_end = RTE_MIN(page_end, data_end);
+	cur = RTE_PTR_DIFF(cur_seg_end, contig_seg_start) -
+			MALLOC_ELEM_TRAILER_LEN;
+	max = cur;
+	expected_iova = ms->iova + page_sz;
+	/* memsegs are contiguous in memory */
+	ms++;
+
+	cur_page = RTE_PTR_ADD(cur_page, page_sz);
+
+	while (cur_page < data_end) {
+		page_end = RTE_PTR_ADD(cur_page, page_sz);
+		cur_seg_end = RTE_MIN(page_end, data_end);
+
+		/* reset start of contiguous segment if unexpected iova */
+		if (ms->iova != expected_iova) {
+			/* next contiguous segment must start at specified
+			 * alignment.
+			 */
+			contig_seg_start = RTE_PTR_ALIGN(cur_page, align);
+			/* new segment start may be on a different page, so find
+			 * the page and skip to next iteration to make sure
+			 * we're not blowing past data end.
+			 */
+			ms = rte_mem_virt2memseg(contig_seg_start, elem->msl);
+			cur_page = ms->addr;
+			/* don't trigger another recalculation */
+			expected_iova = ms->iova;
+			continue;
+		}
+		/* cur_seg_end ends on a page boundary or on data end. if we're
+		 * looking at data end, then malloc trailer is already included
+		 * in the calculations. if we're looking at page end, then we
+		 * know there's more data past this page and thus there's space
+		 * for malloc element trailer, so don't count it here.
+		 */
+		cur = RTE_PTR_DIFF(cur_seg_end, contig_seg_start);
+		/* update max if cur value is bigger */
+		if (cur > max)
+			max = cur;
+
+		/* move to next page */
+		cur_page = page_end;
+		expected_iova = ms->iova + page_sz;
+		/* memsegs are contiguous in memory */
+		ms++;
+	}
+
+	return max;
+}
+
 /*
  * Initialize a general malloc_elem header structure
  */
@@ -386,16 +465,18 @@ malloc_elem_join_adjacent_free(struct malloc_elem *elem)
 	if (elem->next != NULL && elem->next->state == ELEM_FREE &&
 			next_elem_is_adjacent(elem)) {
 		void *erase;
+		size_t erase_len;
 
 		/* we will want to erase the trailer and header */
 		erase = RTE_PTR_SUB(elem->next, MALLOC_ELEM_TRAILER_LEN);
+		erase_len = MALLOC_ELEM_OVERHEAD + elem->next->pad;
 
 		/* remove from free list, join to this one */
 		malloc_elem_free_list_remove(elem->next);
 		join_elem(elem, elem->next);
 
-		/* erase header and trailer */
-		memset(erase, 0, MALLOC_ELEM_OVERHEAD);
+		/* erase header, trailer and pad */
+		memset(erase, 0, erase_len);
 	}
 
 	/*
@@ -406,9 +487,11 @@ malloc_elem_join_adjacent_free(struct malloc_elem *elem)
 			prev_elem_is_adjacent(elem)) {
 		struct malloc_elem *new_elem;
 		void *erase;
+		size_t erase_len;
 
 		/* we will want to erase trailer and header */
 		erase = RTE_PTR_SUB(elem, MALLOC_ELEM_TRAILER_LEN);
+		erase_len = MALLOC_ELEM_OVERHEAD + elem->pad;
 
 		/* remove from free list, join to this one */
 		malloc_elem_free_list_remove(elem->prev);
@@ -416,8 +499,8 @@ malloc_elem_join_adjacent_free(struct malloc_elem *elem)
 		new_elem = elem->prev;
 		join_elem(new_elem, elem);
 
-		/* erase header and trailer */
-		memset(erase, 0, MALLOC_ELEM_OVERHEAD);
+		/* erase header, trailer and pad */
+		memset(erase, 0, erase_len);
 
 		elem = new_elem;
 	}
@@ -436,7 +519,7 @@ malloc_elem_free(struct malloc_elem *elem)
 	void *ptr;
 	size_t data_len;
 
-	ptr = RTE_PTR_ADD(elem, sizeof(*elem));
+	ptr = RTE_PTR_ADD(elem, MALLOC_ELEM_HEADER_LEN);
 	data_len = elem->size - MALLOC_ELEM_OVERHEAD;
 
 	elem = malloc_elem_join_adjacent_free(elem);
diff --git a/lib/librte_eal/common/malloc_elem.h b/lib/librte_eal/common/malloc_elem.h
index 7331af9c..e2bda4c0 100644
--- a/lib/librte_eal/common/malloc_elem.h
+++ b/lib/librte_eal/common/malloc_elem.h
@@ -179,4 +179,10 @@ malloc_elem_free_list_index(size_t size);
 void
 malloc_elem_free_list_insert(struct malloc_elem *elem);
 
+/*
+ * Find biggest IOVA-contiguous zone within an element with specified alignment.
+ */
+size_t
+malloc_elem_find_max_iova_contig(struct malloc_elem *elem, size_t align);
+
 #endif /* MALLOC_ELEM_H_ */
diff --git a/lib/librte_eal/common/malloc_heap.c b/lib/librte_eal/common/malloc_heap.c
index d6cf3af8..12aaf2d7 100644
--- a/lib/librte_eal/common/malloc_heap.c
+++ b/lib/librte_eal/common/malloc_heap.c
@@ -149,6 +149,52 @@ find_suitable_element(struct malloc_heap *heap, size_t size,
 }
 
 /*
+ * Iterates through the freelist for a heap to find a free element with the
+ * biggest size and requested alignment. Will also set size to whatever element
+ * size that was found.
+ * Returns null on failure, or pointer to element on success.
+ */
+static struct malloc_elem *
+find_biggest_element(struct malloc_heap *heap, size_t *size,
+		unsigned int flags, size_t align, bool contig)
+{
+	struct malloc_elem *elem, *max_elem = NULL;
+	size_t idx, max_size = 0;
+
+	for (idx = 0; idx < RTE_HEAP_NUM_FREELISTS; idx++) {
+		for (elem = LIST_FIRST(&heap->free_head[idx]);
+				!!elem; elem = LIST_NEXT(elem, free_list)) {
+			size_t cur_size;
+			if (!check_hugepage_sz(flags, elem->msl->page_sz))
+				continue;
+			if (contig) {
+				cur_size =
+					malloc_elem_find_max_iova_contig(elem,
+							align);
+			} else {
+				void *data_start = RTE_PTR_ADD(elem,
+						MALLOC_ELEM_HEADER_LEN);
+				void *data_end = RTE_PTR_ADD(elem, elem->size -
+						MALLOC_ELEM_TRAILER_LEN);
+				void *aligned = RTE_PTR_ALIGN_CEIL(data_start,
+						align);
+				/* check if aligned data start is beyond end */
+				if (aligned >= data_end)
+					continue;
+				cur_size = RTE_PTR_DIFF(data_end, aligned);
+			}
+			if (cur_size > max_size) {
+				max_size = cur_size;
+				max_elem = elem;
+			}
+		}
+	}
+
+	*size = max_size;
+	return max_elem;
+}
+
+/*
  * Main function to allocate a block of memory from the heap.
  * It locks the free list, scans it, and adds a new memseg if the
  * scan fails. Once the new memseg is added, it re-scans and should return
@@ -174,6 +220,26 @@ heap_alloc(struct malloc_heap *heap, const char *type __rte_unused, size_t size,
 	return elem == NULL ? NULL : (void *)(&elem[1]);
 }
 
+static void *
+heap_alloc_biggest(struct malloc_heap *heap, const char *type __rte_unused,
+		unsigned int flags, size_t align, bool contig)
+{
+	struct malloc_elem *elem;
+	size_t size;
+
+	align = RTE_CACHE_LINE_ROUNDUP(align);
+
+	elem = find_biggest_element(heap, &size, flags, align, contig);
+	if (elem != NULL) {
+		elem = malloc_elem_alloc(elem, size, align, 0, contig);
+
+		/* increase heap's count of allocated elements */
+		heap->alloc_count++;
+	}
+
+	return elem == NULL ? NULL : (void *)(&elem[1]);
+}
+
 /* this function is exposed in malloc_mp.h */
 void
 rollback_expand_heap(struct rte_memseg **ms, int n_segs,
@@ -575,6 +641,66 @@ malloc_heap_alloc(const char *type, size_t size, int socket_arg,
 	return NULL;
 }
 
+static void *
+heap_alloc_biggest_on_socket(const char *type, int socket, unsigned int flags,
+		size_t align, bool contig)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	struct malloc_heap *heap = &mcfg->malloc_heaps[socket];
+	void *ret;
+
+	rte_spinlock_lock(&(heap->lock));
+
+	align = align == 0 ? 1 : align;
+
+	ret = heap_alloc_biggest(heap, type, flags, align, contig);
+
+	rte_spinlock_unlock(&(heap->lock));
+
+	return ret;
+}
+
+void *
+malloc_heap_alloc_biggest(const char *type, int socket_arg, unsigned int flags,
+		size_t align, bool contig)
+{
+	int socket, i, cur_socket;
+	void *ret;
+
+	/* return NULL if align is not power-of-2 */
+	if ((align && !rte_is_power_of_2(align)))
+		return NULL;
+
+	if (!rte_eal_has_hugepages())
+		socket_arg = SOCKET_ID_ANY;
+
+	if (socket_arg == SOCKET_ID_ANY)
+		socket = malloc_get_numa_socket();
+	else
+		socket = socket_arg;
+
+	/* Check socket parameter */
+	if (socket >= RTE_MAX_NUMA_NODES)
+		return NULL;
+
+	ret = heap_alloc_biggest_on_socket(type, socket, flags, align,
+			contig);
+	if (ret != NULL || socket_arg != SOCKET_ID_ANY)
+		return ret;
+
+	/* try other heaps */
+	for (i = 0; i < (int) rte_socket_count(); i++) {
+		cur_socket = rte_socket_id_by_idx(i);
+		if (cur_socket == socket)
+			continue;
+		ret = heap_alloc_biggest_on_socket(type, cur_socket, flags,
+				align, contig);
+		if (ret != NULL)
+			return ret;
+	}
+	return NULL;
+}
+
 /* this function is exposed in malloc_mp.h */
 int
 malloc_heap_free_pages(void *aligned_start, size_t aligned_len)
diff --git a/lib/librte_eal/common/malloc_heap.h b/lib/librte_eal/common/malloc_heap.h
index 03b80141..f52cb555 100644
--- a/lib/librte_eal/common/malloc_heap.h
+++ b/lib/librte_eal/common/malloc_heap.h
@@ -29,6 +29,10 @@ void *
 malloc_heap_alloc(const char *type, size_t size, int socket, unsigned int flags,
 		size_t align, size_t bound, bool contig);
 
+void *
+malloc_heap_alloc_biggest(const char *type, int socket, unsigned int flags,
+		size_t align, bool contig);
+
 int
 malloc_heap_free(struct malloc_elem *elem);
 
diff --git a/lib/librte_eal/common/meson.build b/lib/librte_eal/common/meson.build
index 8a3dcfee..56005bea 100644
--- a/lib/librte_eal/common/meson.build
+++ b/lib/librte_eal/common/meson.build
@@ -8,6 +8,7 @@ common_objs = []
 common_sources = files(
 	'eal_common_bus.c',
 	'eal_common_cpuflags.c',
+	'eal_common_class.c',
 	'eal_common_devargs.c',
 	'eal_common_dev.c',
 	'eal_common_errno.c',
@@ -25,6 +26,7 @@ common_sources = files(
 	'eal_common_tailqs.c',
 	'eal_common_thread.c',
 	'eal_common_timer.c',
+	'eal_common_uuid.c',
 	'malloc_elem.c',
 	'malloc_heap.c',
 	'malloc_mp.c',
@@ -46,6 +48,7 @@ common_headers = files(
 	'include/rte_branch_prediction.h',
 	'include/rte_bus.h',
 	'include/rte_bitmap.h',
+	'include/rte_class.h',
 	'include/rte_common.h',
 	'include/rte_debug.h',
 	'include/rte_devargs.h',
@@ -75,6 +78,7 @@ common_headers = files(
 	'include/rte_string_fns.h',
 	'include/rte_tailq.h',
 	'include/rte_time.h',
+	'include/rte_uuid.h',
 	'include/rte_version.h')
 
 # special case install the generic headers, since they go in a subdir
diff --git a/lib/librte_eal/common/rte_service.c b/lib/librte_eal/common/rte_service.c
index 73507aac..8767c722 100644
--- a/lib/librte_eal/common/rte_service.c
+++ b/lib/librte_eal/common/rte_service.c
@@ -52,6 +52,7 @@ struct rte_service_spec_impl {
 	rte_atomic32_t num_mapped_cores;
 	uint64_t calls;
 	uint64_t cycles_spent;
+	uint8_t active_on_lcore[RTE_MAX_LCORE];
 } __rte_cache_aligned;
 
 /* the internal values of a service core */
@@ -61,7 +62,7 @@ struct core_state {
 	uint8_t runstate; /* running or stopped */
 	uint8_t is_service_core; /* set if core is currently a service core */
 
-	/* extreme statistics */
+	uint64_t loops;
 	uint64_t calls_per_service[RTE_SERVICE_NUM_MAX];
 } __rte_cache_aligned;
 
@@ -347,15 +348,19 @@ rte_service_runner_do_callback(struct rte_service_spec_impl *s,
 
 
 static inline int32_t
-service_run(uint32_t i, struct core_state *cs, uint64_t service_mask)
+service_run(uint32_t i, int lcore, struct core_state *cs, uint64_t service_mask)
 {
 	if (!service_valid(i))
 		return -EINVAL;
 	struct rte_service_spec_impl *s = &rte_services[i];
 	if (s->comp_runstate != RUNSTATE_RUNNING ||
 			s->app_runstate != RUNSTATE_RUNNING ||
-			!(service_mask & (UINT64_C(1) << i)))
+			!(service_mask & (UINT64_C(1) << i))) {
+		s->active_on_lcore[lcore] = 0;
 		return -ENOEXEC;
+	}
+
+	s->active_on_lcore[lcore] = 1;
 
 	/* check do we need cmpset, if MT safe or <= 1 core
 	 * mapped, atomic ops are not required.
@@ -374,6 +379,25 @@ service_run(uint32_t i, struct core_state *cs, uint64_t service_mask)
 	return 0;
 }
 
+int32_t __rte_experimental
+rte_service_may_be_active(uint32_t id)
+{
+	uint32_t ids[RTE_MAX_LCORE] = {0};
+	struct rte_service_spec_impl *s = &rte_services[id];
+	int32_t lcore_count = rte_service_lcore_list(ids, RTE_MAX_LCORE);
+	int i;
+
+	if (!service_valid(id))
+		return -EINVAL;
+
+	for (i = 0; i < lcore_count; i++) {
+		if (s->active_on_lcore[ids[i]])
+			return 1;
+	}
+
+	return 0;
+}
+
 int32_t rte_service_run_iter_on_app_lcore(uint32_t id,
 		uint32_t serialize_mt_unsafe)
 {
@@ -398,7 +422,7 @@ int32_t rte_service_run_iter_on_app_lcore(uint32_t id,
 		return -EBUSY;
 	}
 
-	int ret = service_run(id, cs, UINT64_MAX);
+	int ret = service_run(id, rte_lcore_id(), cs, UINT64_MAX);
 
 	if (serialize_mt_unsafe)
 		rte_atomic32_dec(&s->num_mapped_cores);
@@ -419,9 +443,11 @@ rte_service_runner_func(void *arg)
 
 		for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
 			/* return value ignored as no change to code flow */
-			service_run(i, cs, service_mask);
+			service_run(i, lcore, cs, service_mask);
 		}
 
+		cs->loops++;
+
 		rte_smp_rmb();
 	}
 
@@ -729,6 +755,28 @@ rte_service_attr_get(uint32_t id, uint32_t attr_id, uint32_t *attr_value)
 	}
 }
 
+int32_t __rte_experimental
+rte_service_lcore_attr_get(uint32_t lcore, uint32_t attr_id,
+			   uint64_t *attr_value)
+{
+	struct core_state *cs;
+
+	if (lcore >= RTE_MAX_LCORE || !attr_value)
+		return -EINVAL;
+
+	cs = &lcore_states[lcore];
+	if (!cs->is_service_core)
+		return -ENOTSUP;
+
+	switch (attr_id) {
+	case RTE_SERVICE_LCORE_ATTR_LOOPS:
+		*attr_value = cs->loops;
+		return 0;
+	default:
+		return -EINVAL;
+	}
+}
+
 static void
 rte_service_dump_one(FILE *f, struct rte_service_spec_impl *s,
 		     uint64_t all_cycles, uint32_t reset)
@@ -764,6 +812,23 @@ rte_service_attr_reset_all(uint32_t id)
 	return 0;
 }
 
+int32_t __rte_experimental
+rte_service_lcore_attr_reset_all(uint32_t lcore)
+{
+	struct core_state *cs;
+
+	if (lcore >= RTE_MAX_LCORE)
+		return -EINVAL;
+
+	cs = &lcore_states[lcore];
+	if (!cs->is_service_core)
+		return -ENOTSUP;
+
+	cs->loops = 0;
+
+	return 0;
+}
+
 static void
 service_dump_calls_per_lcore(FILE *f, uint32_t lcore, uint32_t reset)
 {
diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile
index 3719ec9d..fd92c75c 100644
--- a/lib/librte_eal/linuxapp/eal/Makefile
+++ b/lib/librte_eal/linuxapp/eal/Makefile
@@ -10,7 +10,7 @@ ARCH_DIR ?= $(RTE_ARCH)
 EXPORT_MAP := ../../rte_eal_version.map
 VPATH += $(RTE_SDK)/lib/librte_eal/common/arch/$(ARCH_DIR)
 
-LIBABIVER := 7
+LIBABIVER := 8
 
 VPATH += $(RTE_SDK)/lib/librte_eal/common
 
@@ -24,6 +24,7 @@ LDLIBS += -ldl
 LDLIBS += -lpthread
 LDLIBS += -lgcc_s
 LDLIBS += -lrt
+LDLIBS += -lrte_kvargs
 ifeq ($(CONFIG_RTE_EAL_NUMA_AWARE_HUGEPAGES),y)
 LDLIBS += -lnuma
 endif
@@ -60,12 +61,14 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_hypervisor.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_string_fns.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_hexdump.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_devargs.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_class.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_bus.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_dev.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_options.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_thread.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_proc.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_fbarray.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_uuid.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_malloc.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += malloc_elem.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += malloc_heap.c
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
index 8655b869..e59ac657 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -155,22 +155,12 @@ eal_get_runtime_dir(void)
 }
 
 /* Return user provided mbuf pool ops name */
-const char * __rte_experimental
+const char *
 rte_eal_mbuf_user_pool_ops(void)
 {
 	return internal_config.user_mbuf_pool_ops_name;
 }
 
-/* Return mbuf pool ops name */
-const char *
-rte_eal_mbuf_default_mempool_ops(void)
-{
-	if (internal_config.user_mbuf_pool_ops_name == NULL)
-		return RTE_MBUF_DEFAULT_MEMPOOL_OPS;
-
-	return internal_config.user_mbuf_pool_ops_name;
-}
-
 /* Return a pointer to the configuration structure */
 struct rte_config *
 rte_eal_get_configuration(void)
@@ -344,12 +334,17 @@ eal_proc_type_detect(void)
 	enum rte_proc_type_t ptype = RTE_PROC_PRIMARY;
 	const char *pathname = eal_runtime_config_path();
 
-	/* if we can open the file but not get a write-lock we are a secondary
-	 * process. NOTE: if we get a file handle back, we keep that open
-	 * and don't close it to prevent a race condition between multiple opens */
-	if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) &&
-			(fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0))
-		ptype = RTE_PROC_SECONDARY;
+	/* if there no shared config, there can be no secondary processes */
+	if (!internal_config.no_shconf) {
+		/* if we can open the file but not get a write-lock we are a
+		 * secondary process. NOTE: if we get a file handle back, we
+		 * keep that open and don't close it to prevent a race condition
+		 * between multiple opens.
+		 */
+		if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) &&
+				(fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0))
+			ptype = RTE_PROC_SECONDARY;
+	}
 
 	RTE_LOG(INFO, EAL, "Auto-detected process type: %s\n",
 			ptype == RTE_PROC_PRIMARY ? "PRIMARY" : "SECONDARY");
@@ -405,6 +400,7 @@ eal_usage(const char *prgname)
 	eal_common_usage();
 	printf("EAL Linux options:\n"
 	       "  --"OPT_SOCKET_MEM"        Memory to allocate on sockets (comma separated values)\n"
+	       "  --"OPT_SOCKET_LIMIT"      Limit memory allocation on sockets (comma separated values)\n"
 	       "  --"OPT_HUGE_DIR"          Directory where hugetlbfs is mounted\n"
 	       "  --"OPT_FILE_PREFIX"       Prefix for hugepage filenames\n"
 	       "  --"OPT_BASE_VIRTADDR"     Base virtual address\n"
@@ -434,46 +430,45 @@ rte_set_application_usage_hook( rte_usage_hook_t usage_func )
 }
 
 static int
-eal_parse_socket_mem(char *socket_mem)
+eal_parse_socket_arg(char *strval, volatile uint64_t *socket_arg)
 {
 	char * arg[RTE_MAX_NUMA_NODES];
 	char *end;
 	int arg_num, i, len;
 	uint64_t total_mem = 0;
 
-	len = strnlen(socket_mem, SOCKET_MEM_STRLEN);
+	len = strnlen(strval, SOCKET_MEM_STRLEN);
 	if (len == SOCKET_MEM_STRLEN) {
 		RTE_LOG(ERR, EAL, "--socket-mem is too long\n");
 		return -1;
 	}
 
 	/* all other error cases will be caught later */
-	if (!isdigit(socket_mem[len-1]))
+	if (!isdigit(strval[len-1]))
 		return -1;
 
 	/* split the optarg into separate socket values */
-	arg_num = rte_strsplit(socket_mem, len,
+	arg_num = rte_strsplit(strval, len,
 			arg, RTE_MAX_NUMA_NODES, ',');
 
 	/* if split failed, or 0 arguments */
 	if (arg_num <= 0)
 		return -1;
 
-	internal_config.force_sockets = 1;
-
 	/* parse each defined socket option */
 	errno = 0;
 	for (i = 0; i < arg_num; i++) {
+		uint64_t val;
 		end = NULL;
-		internal_config.socket_mem[i] = strtoull(arg[i], &end, 10);
+		val = strtoull(arg[i], &end, 10);
 
 		/* check for invalid input */
 		if ((errno != 0)  ||
 				(arg[i][0] == '\0') || (end == NULL) || (*end != '\0'))
 			return -1;
-		internal_config.socket_mem[i] *= 1024ULL;
-		internal_config.socket_mem[i] *= 1024ULL;
-		total_mem += internal_config.socket_mem[i];
+		val <<= 20;
+		total_mem += val;
+		socket_arg[i] = val;
 	}
 
 	/* check if we have a positive amount of total memory */
@@ -621,13 +616,27 @@ eal_parse_args(int argc, char **argv)
 			break;
 
 		case OPT_SOCKET_MEM_NUM:
-			if (eal_parse_socket_mem(optarg) < 0) {
+			if (eal_parse_socket_arg(optarg,
+					internal_config.socket_mem) < 0) {
 				RTE_LOG(ERR, EAL, "invalid parameters for --"
 						OPT_SOCKET_MEM "\n");
 				eal_usage(prgname);
 				ret = -1;
 				goto out;
 			}
+			internal_config.force_sockets = 1;
+			break;
+
+		case OPT_SOCKET_LIMIT_NUM:
+			if (eal_parse_socket_arg(optarg,
+					internal_config.socket_limit) < 0) {
+				RTE_LOG(ERR, EAL, "invalid parameters for --"
+						OPT_SOCKET_LIMIT "\n");
+				eal_usage(prgname);
+				ret = -1;
+				goto out;
+			}
+			internal_config.force_socket_limits = 1;
 			break;
 
 		case OPT_BASE_VIRTADDR_NUM:
@@ -678,6 +687,14 @@ eal_parse_args(int argc, char **argv)
 		}
 	}
 
+	/* create runtime data directory */
+	if (internal_config.no_shconf == 0 &&
+			eal_create_runtime_dir() < 0) {
+		RTE_LOG(ERR, EAL, "Cannot create runtime directory\n");
+		ret = -1;
+		goto out;
+	}
+
 	if (eal_adjust_config(&internal_config) != 0) {
 		ret = -1;
 		goto out;
@@ -817,13 +834,6 @@ rte_eal_init(int argc, char **argv)
 		return -1;
 	}
 
-	/* create runtime data directory */
-	if (eal_create_runtime_dir() < 0) {
-		rte_eal_init_alert("Cannot create runtime directory\n");
-		rte_errno = EACCES;
-		return -1;
-	}
-
 	if (eal_plugins_init() < 0) {
 		rte_eal_init_alert("Cannot init plugins\n");
 		rte_errno = EINVAL;
@@ -839,6 +849,11 @@ rte_eal_init(int argc, char **argv)
 
 	rte_config_init();
 
+	if (rte_eal_intr_init() < 0) {
+		rte_eal_init_alert("Cannot init interrupt-handling thread\n");
+		return -1;
+	}
+
 	/* Put mp channel init before bus scan so that we can init the vdev
 	 * bus through mp channel in the secondary process before the bus scan.
 	 */
@@ -968,11 +983,6 @@ rte_eal_init(int argc, char **argv)
 		rte_config.master_lcore, (int)thread_id, cpuset,
 		ret == 0 ? "" : "...");
 
-	if (rte_eal_intr_init() < 0) {
-		rte_eal_init_alert("Cannot init interrupt-handling thread\n");
-		return -1;
-	}
-
 	RTE_LCORE_FOREACH_SLAVE(i) {
 
 		/*
@@ -1044,9 +1054,26 @@ rte_eal_init(int argc, char **argv)
 	return fctret;
 }
 
+static int
+mark_freeable(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
+		void *arg __rte_unused)
+{
+	/* ms is const, so find this memseg */
+	struct rte_memseg *found = rte_mem_virt2memseg(ms->addr, msl);
+
+	found->flags &= ~RTE_MEMSEG_FLAG_DO_NOT_FREE;
+
+	return 0;
+}
+
 int __rte_experimental
 rte_eal_cleanup(void)
 {
+	/* if we're in a primary process, we need to mark hugepages as freeable
+	 * so that finalization can release them back to the system.
+	 */
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+		rte_memseg_walk(mark_freeable, NULL);
 	rte_service_finalize();
 	return 0;
 }
diff --git a/lib/librte_eal/linuxapp/eal/eal_alarm.c b/lib/librte_eal/linuxapp/eal/eal_alarm.c
index c115e823..391d2a65 100644
--- a/lib/librte_eal/linuxapp/eal/eal_alarm.c
+++ b/lib/librte_eal/linuxapp/eal/eal_alarm.c
@@ -19,7 +19,6 @@
 #include <rte_launch.h>
 #include <rte_lcore.h>
 #include <rte_errno.h>
-#include <rte_malloc.h>
 #include <rte_spinlock.h>
 #include <eal_private.h>
 
@@ -91,7 +90,7 @@ eal_alarm_callback(void *arg __rte_unused)
 		rte_spinlock_lock(&alarm_list_lk);
 
 		LIST_REMOVE(ap, next);
-		rte_free(ap);
+		free(ap);
 	}
 
 	if (!LIST_EMPTY(&alarm_list)) {
@@ -122,7 +121,7 @@ rte_eal_alarm_set(uint64_t us, rte_eal_alarm_callback cb_fn, void *cb_arg)
 	if (us < 1 || us > (UINT64_MAX - US_PER_S) || cb_fn == NULL)
 		return -EINVAL;
 
-	new_alarm = rte_zmalloc(NULL, sizeof(*new_alarm), 0);
+	new_alarm = calloc(1, sizeof(*new_alarm));
 	if (new_alarm == NULL)
 		return -ENOMEM;
 
@@ -196,7 +195,7 @@ rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn, void *cb_arg)
 
 			if (ap->executing == 0) {
 				LIST_REMOVE(ap, next);
-				rte_free(ap);
+				free(ap);
 				count++;
 			} else {
 				/* If calling from other context, mark that alarm is executing
@@ -220,7 +219,7 @@ rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn, void *cb_arg)
 
 				if (ap->executing == 0) {
 					LIST_REMOVE(ap, next);
-					rte_free(ap);
+					free(ap);
 					count++;
 					ap = ap_prev;
 				} else if (pthread_equal(ap->executing_id, pthread_self()) == 0)
diff --git a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c
index 7eca711b..3a7d4b22 100644
--- a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c
+++ b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c
@@ -18,6 +18,8 @@
 #include <sys/queue.h>
 #include <sys/stat.h>
 
+#include <linux/mman.h> /* for hugetlb-related flags */
+
 #include <rte_memory.h>
 #include <rte_eal.h>
 #include <rte_launch.h>
@@ -313,11 +315,49 @@ compare_hpi(const void *a, const void *b)
 	return hpi_b->hugepage_sz - hpi_a->hugepage_sz;
 }
 
+static void
+calc_num_pages(struct hugepage_info *hpi, struct dirent *dirent)
+{
+	uint64_t total_pages = 0;
+	unsigned int i;
+
+	/*
+	 * first, try to put all hugepages into relevant sockets, but
+	 * if first attempts fails, fall back to collecting all pages
+	 * in one socket and sorting them later
+	 */
+	total_pages = 0;
+	/* we also don't want to do this for legacy init */
+	if (!internal_config.legacy_mem)
+		for (i = 0; i < rte_socket_count(); i++) {
+			int socket = rte_socket_id_by_idx(i);
+			unsigned int num_pages =
+					get_num_hugepages_on_node(
+						dirent->d_name, socket);
+			hpi->num_pages[socket] = num_pages;
+			total_pages += num_pages;
+		}
+	/*
+	 * we failed to sort memory from the get go, so fall
+	 * back to old way
+	 */
+	if (total_pages == 0) {
+		hpi->num_pages[0] = get_num_hugepages(dirent->d_name);
+
+#ifndef RTE_ARCH_64
+		/* for 32-bit systems, limit number of hugepages to
+		 * 1GB per page size */
+		hpi->num_pages[0] = RTE_MIN(hpi->num_pages[0],
+				RTE_PGSIZE_1G / hpi->hugepage_sz);
+#endif
+	}
+}
+
 static int
 hugepage_info_init(void)
 {	const char dirent_start_text[] = "hugepages-";
 	const size_t dirent_start_len = sizeof(dirent_start_text) - 1;
-	unsigned int i, total_pages, num_sizes = 0;
+	unsigned int i, num_sizes = 0;
 	DIR *dir;
 	struct dirent *dirent;
 
@@ -355,6 +395,22 @@ hugepage_info_init(void)
 					"%" PRIu64 " reserved, but no mounted "
 					"hugetlbfs found for that size\n",
 					num_pages, hpi->hugepage_sz);
+			/* if we have kernel support for reserving hugepages
+			 * through mmap, and we're in in-memory mode, treat this
+			 * page size as valid. we cannot be in legacy mode at
+			 * this point because we've checked this earlier in the
+			 * init process.
+			 */
+#ifdef MAP_HUGE_SHIFT
+			if (internal_config.in_memory) {
+				RTE_LOG(DEBUG, EAL, "In-memory mode enabled, "
+					"hugepages of size %" PRIu64 " bytes "
+					"will be allocated anonymously\n",
+					hpi->hugepage_sz);
+				calc_num_pages(hpi, dirent);
+				num_sizes++;
+			}
+#endif
 			continue;
 		}
 
@@ -371,35 +427,7 @@ hugepage_info_init(void)
 		if (clear_hugedir(hpi->hugedir) == -1)
 			break;
 
-		/*
-		 * first, try to put all hugepages into relevant sockets, but
-		 * if first attempts fails, fall back to collecting all pages
-		 * in one socket and sorting them later
-		 */
-		total_pages = 0;
-		/* we also don't want to do this for legacy init */
-		if (!internal_config.legacy_mem)
-			for (i = 0; i < rte_socket_count(); i++) {
-				int socket = rte_socket_id_by_idx(i);
-				unsigned int num_pages =
-						get_num_hugepages_on_node(
-							dirent->d_name, socket);
-				hpi->num_pages[socket] = num_pages;
-				total_pages += num_pages;
-			}
-		/*
-		 * we failed to sort memory from the get go, so fall
-		 * back to old way
-		 */
-		if (total_pages == 0)
-			hpi->num_pages[0] = get_num_hugepages(dirent->d_name);
-
-#ifndef RTE_ARCH_64
-		/* for 32-bit systems, limit number of hugepages to
-		 * 1GB per page size */
-		hpi->num_pages[0] = RTE_MIN(hpi->num_pages[0],
-					    RTE_PGSIZE_1G / hpi->hugepage_sz);
-#endif
+		calc_num_pages(hpi, dirent);
 
 		num_sizes++;
 	}
@@ -423,8 +451,7 @@ hugepage_info_init(void)
 
 		for (j = 0; j < RTE_MAX_NUMA_NODES; j++)
 			num_pages += hpi->num_pages[j];
-		if (strnlen(hpi->hugedir, sizeof(hpi->hugedir)) != 0 &&
-				num_pages > 0)
+		if (num_pages > 0)
 			return 0;
 	}
 
@@ -446,6 +473,10 @@ eal_hugepage_info_init(void)
 	if (hugepage_info_init() < 0)
 		return -1;
 
+	/* for no shared files mode, we're done */
+	if (internal_config.no_shconf)
+		return 0;
+
 	hpi = &internal_config.hugepage_info[0];
 
 	tmp_hpi = create_shared_memory(eal_hugepage_info_path(),
diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
index 056d41c1..4076c6d6 100644
--- a/lib/librte_eal/linuxapp/eal/eal_interrupts.c
+++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
@@ -30,7 +30,6 @@
 #include <rte_branch_prediction.h>
 #include <rte_debug.h>
 #include <rte_log.h>
-#include <rte_malloc.h>
 #include <rte_errno.h>
 #include <rte_spinlock.h>
 #include <rte_pause.h>
@@ -405,8 +404,7 @@ rte_intr_callback_register(const struct rte_intr_handle *intr_handle,
 	}
 
 	/* allocate a new interrupt callback entity */
-	callback = rte_zmalloc("interrupt callback list",
-				sizeof(*callback), 0);
+	callback = calloc(1, sizeof(*callback));
 	if (callback == NULL) {
 		RTE_LOG(ERR, EAL, "Can not allocate memory\n");
 		return -ENOMEM;
@@ -420,7 +418,7 @@ rte_intr_callback_register(const struct rte_intr_handle *intr_handle,
 	TAILQ_FOREACH(src, &intr_sources, next) {
 		if (src->intr_handle.fd == intr_handle->fd) {
 			/* we had no interrupts for this */
-			if TAILQ_EMPTY(&src->callbacks)
+			if (TAILQ_EMPTY(&src->callbacks))
 				wake_thread = 1;
 
 			TAILQ_INSERT_TAIL(&(src->callbacks), callback, next);
@@ -431,10 +429,10 @@ rte_intr_callback_register(const struct rte_intr_handle *intr_handle,
 
 	/* no existing callbacks for this - add new source */
 	if (src == NULL) {
-		if ((src = rte_zmalloc("interrupt source list",
-				sizeof(*src), 0)) == NULL) {
+		src = calloc(1, sizeof(*src));
+		if (src == NULL) {
 			RTE_LOG(ERR, EAL, "Can not allocate memory\n");
-			rte_free(callback);
+			free(callback);
 			ret = -ENOMEM;
 		} else {
 			src->intr_handle = *intr_handle;
@@ -501,7 +499,7 @@ rte_intr_callback_unregister(const struct rte_intr_handle *intr_handle,
 			if (cb->cb_fn == cb_fn && (cb_arg == (void *)-1 ||
 					cb->cb_arg == cb_arg)) {
 				TAILQ_REMOVE(&src->callbacks, cb, next);
-				rte_free(cb);
+				free(cb);
 				ret++;
 			}
 		}
@@ -509,7 +507,7 @@ rte_intr_callback_unregister(const struct rte_intr_handle *intr_handle,
 		/* all callbacks for that source are removed. */
 		if (TAILQ_EMPTY(&src->callbacks)) {
 			TAILQ_REMOVE(&intr_sources, src, next);
-			rte_free(src);
+			free(src);
 		}
 	}
 
diff --git a/lib/librte_eal/linuxapp/eal/eal_log.c b/lib/librte_eal/linuxapp/eal/eal_log.c
index ff145884..9d02dddb 100644
--- a/lib/librte_eal/linuxapp/eal/eal_log.c
+++ b/lib/librte_eal/linuxapp/eal/eal_log.c
@@ -25,25 +25,14 @@
 static ssize_t
 console_log_write(__attribute__((unused)) void *c, const char *buf, size_t size)
 {
-	char copybuf[BUFSIZ + 1];
 	ssize_t ret;
-	uint32_t loglevel;
 
 	/* write on stdout */
 	ret = fwrite(buf, 1, size, stdout);
 	fflush(stdout);
 
-	/* truncate message if too big (should not happen) */
-	if (size > BUFSIZ)
-		size = BUFSIZ;
-
 	/* Syslog error levels are from 0 to 7, so subtract 1 to convert */
-	loglevel = rte_log_cur_msg_loglevel() - 1;
-	memcpy(copybuf, buf, size);
-	copybuf[size] = '\0';
-
-	/* write on syslog too */
-	syslog(loglevel, "%s", copybuf);
+	syslog(rte_log_cur_msg_loglevel() - 1, "%.*s", (int)size, buf);
 
 	return ret;
 }
diff --git a/lib/librte_eal/linuxapp/eal/eal_memalloc.c b/lib/librte_eal/linuxapp/eal/eal_memalloc.c
index 8c11f98c..aa95551a 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memalloc.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memalloc.c
@@ -28,6 +28,7 @@
 #include <numaif.h>
 #endif
 #include <linux/falloc.h>
+#include <linux/mman.h> /* for hugetlb-related mmap flags */
 
 #include <rte_common.h>
 #include <rte_log.h>
@@ -39,6 +40,16 @@
 #include "eal_filesystem.h"
 #include "eal_internal_cfg.h"
 #include "eal_memalloc.h"
+#include "eal_private.h"
+
+const int anonymous_hugepages_supported =
+#ifdef MAP_HUGE_SHIFT
+		1;
+#define RTE_MAP_HUGE_SHIFT MAP_HUGE_SHIFT
+#else
+		0;
+#define RTE_MAP_HUGE_SHIFT 26
+#endif
 
 /*
  * not all kernel version support fallocate on hugetlbfs, so fall back to
@@ -171,32 +182,6 @@ get_file_size(int fd)
 	return st.st_size;
 }
 
-/* we cannot use rte_memseg_list_walk() here because we will be holding a
- * write lock whenever we enter every function in this file, however copying
- * the same iteration code everywhere is not ideal as well. so, use a lockless
- * copy of memseg list walk here.
- */
-static int
-memseg_list_walk_thread_unsafe(rte_memseg_list_walk_t func, void *arg)
-{
-	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
-	int i, ret = 0;
-
-	for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
-		struct rte_memseg_list *msl = &mcfg->memsegs[i];
-
-		if (msl->base_va == NULL)
-			continue;
-
-		ret = func(msl, arg);
-		if (ret < 0)
-			return -1;
-		if (ret > 0)
-			return 1;
-	}
-	return 0;
-}
-
 /* returns 1 on successful lock, 0 on unsuccessful lock, -1 on error */
 static int lock(int fd, int type)
 {
@@ -486,45 +471,84 @@ alloc_seg(struct rte_memseg *ms, void *addr, int socket_id,
 	int cur_socket_id = 0;
 #endif
 	uint64_t map_offset;
+	rte_iova_t iova;
+	void *va;
 	char path[PATH_MAX];
 	int ret = 0;
 	int fd;
 	size_t alloc_sz;
-
-	/* takes out a read lock on segment or segment list */
-	fd = get_seg_fd(path, sizeof(path), hi, list_idx, seg_idx);
-	if (fd < 0) {
-		RTE_LOG(ERR, EAL, "Couldn't get fd on hugepage file\n");
-		return -1;
-	}
+	int flags;
+	void *new_addr;
 
 	alloc_sz = hi->hugepage_sz;
-	if (internal_config.single_file_segments) {
-		map_offset = seg_idx * alloc_sz;
-		ret = resize_hugefile(fd, path, list_idx, seg_idx, map_offset,
-				alloc_sz, true);
-		if (ret < 0)
-			goto resized;
-	} else {
+	if (!internal_config.single_file_segments &&
+			internal_config.in_memory &&
+			anonymous_hugepages_supported) {
+		int log2, flags;
+
+		log2 = rte_log2_u32(alloc_sz);
+		/* as per mmap() manpage, all page sizes are log2 of page size
+		 * shifted by MAP_HUGE_SHIFT
+		 */
+		flags = (log2 << RTE_MAP_HUGE_SHIFT) | MAP_HUGETLB | MAP_FIXED |
+				MAP_PRIVATE | MAP_ANONYMOUS;
+		fd = -1;
+		va = mmap(addr, alloc_sz, PROT_READ | PROT_WRITE, flags, -1, 0);
+
+		/* single-file segments codepath will never be active because
+		 * in-memory mode is incompatible with it and it's stopped at
+		 * EAL initialization stage, however the compiler doesn't know
+		 * that and complains about map_offset being used uninitialized
+		 * on failure codepaths while having in-memory mode enabled. so,
+		 * assign a value here.
+		 */
 		map_offset = 0;
-		if (ftruncate(fd, alloc_sz) < 0) {
-			RTE_LOG(DEBUG, EAL, "%s(): ftruncate() failed: %s\n",
-				__func__, strerror(errno));
-			goto resized;
+	} else {
+		/* takes out a read lock on segment or segment list */
+		fd = get_seg_fd(path, sizeof(path), hi, list_idx, seg_idx);
+		if (fd < 0) {
+			RTE_LOG(ERR, EAL, "Couldn't get fd on hugepage file\n");
+			return -1;
 		}
-	}
 
-	/*
-	 * map the segment, and populate page tables, the kernel fills this
-	 * segment with zeros if it's a new page.
-	 */
-	void *va = mmap(addr, alloc_sz, PROT_READ | PROT_WRITE,
-			MAP_SHARED | MAP_POPULATE | MAP_FIXED, fd, map_offset);
+		if (internal_config.single_file_segments) {
+			map_offset = seg_idx * alloc_sz;
+			ret = resize_hugefile(fd, path, list_idx, seg_idx,
+					map_offset, alloc_sz, true);
+			if (ret < 0)
+				goto resized;
+		} else {
+			map_offset = 0;
+			if (ftruncate(fd, alloc_sz) < 0) {
+				RTE_LOG(DEBUG, EAL, "%s(): ftruncate() failed: %s\n",
+					__func__, strerror(errno));
+				goto resized;
+			}
+			if (internal_config.hugepage_unlink) {
+				if (unlink(path)) {
+					RTE_LOG(DEBUG, EAL, "%s(): unlink() failed: %s\n",
+						__func__, strerror(errno));
+					goto resized;
+				}
+			}
+		}
+
+		/*
+		 * map the segment, and populate page tables, the kernel fills
+		 * this segment with zeros if it's a new page.
+		 */
+		va = mmap(addr, alloc_sz, PROT_READ | PROT_WRITE,
+				MAP_SHARED | MAP_POPULATE | MAP_FIXED, fd,
+				map_offset);
+	}
 
 	if (va == MAP_FAILED) {
 		RTE_LOG(DEBUG, EAL, "%s(): mmap() failed: %s\n", __func__,
 			strerror(errno));
-		goto resized;
+		/* mmap failed, but the previous region might have been
+		 * unmapped anyway. try to remap it
+		 */
+		goto unmapped;
 	}
 	if (va != addr) {
 		RTE_LOG(DEBUG, EAL, "%s(): wrong mmap() address\n", __func__);
@@ -532,24 +556,6 @@ alloc_seg(struct rte_memseg *ms, void *addr, int socket_id,
 		goto resized;
 	}
 
-	rte_iova_t iova = rte_mem_virt2iova(addr);
-	if (iova == RTE_BAD_PHYS_ADDR) {
-		RTE_LOG(DEBUG, EAL, "%s(): can't get IOVA addr\n",
-			__func__);
-		goto mapped;
-	}
-
-#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
-	move_pages(getpid(), 1, &addr, NULL, &cur_socket_id, 0);
-
-	if (cur_socket_id != socket_id) {
-		RTE_LOG(DEBUG, EAL,
-				"%s(): allocation happened on wrong socket (wanted %d, got %d)\n",
-			__func__, socket_id, cur_socket_id);
-		goto mapped;
-	}
-#endif
-
 	/* In linux, hugetlb limitations, like cgroup, are
 	 * enforced at fault time instead of mmap(), even
 	 * with the option of MAP_POPULATE. Kernel will send
@@ -562,9 +568,6 @@ alloc_seg(struct rte_memseg *ms, void *addr, int socket_id,
 			(unsigned int)(alloc_sz >> 20));
 		goto mapped;
 	}
-	/* for non-single file segments, we can close fd here */
-	if (!internal_config.single_file_segments)
-		close(fd);
 
 	/* we need to trigger a write to the page to enforce page fault and
 	 * ensure that page is accessible to us, but we can't overwrite value
@@ -573,6 +576,28 @@ alloc_seg(struct rte_memseg *ms, void *addr, int socket_id,
 	 */
 	*(volatile int *)addr = *(volatile int *)addr;
 
+	iova = rte_mem_virt2iova(addr);
+	if (iova == RTE_BAD_PHYS_ADDR) {
+		RTE_LOG(DEBUG, EAL, "%s(): can't get IOVA addr\n",
+			__func__);
+		goto mapped;
+	}
+
+#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
+	move_pages(getpid(), 1, &addr, NULL, &cur_socket_id, 0);
+
+	if (cur_socket_id != socket_id) {
+		RTE_LOG(DEBUG, EAL,
+				"%s(): allocation happened on wrong socket (wanted %d, got %d)\n",
+			__func__, socket_id, cur_socket_id);
+		goto mapped;
+	}
+#endif
+	/* for non-single file segments that aren't in-memory, we can close fd
+	 * here */
+	if (!internal_config.single_file_segments && !internal_config.in_memory)
+		close(fd);
+
 	ms->addr = addr;
 	ms->hugepage_sz = alloc_sz;
 	ms->len = alloc_sz;
@@ -585,14 +610,32 @@ alloc_seg(struct rte_memseg *ms, void *addr, int socket_id,
 
 mapped:
 	munmap(addr, alloc_sz);
+unmapped:
+	flags = MAP_FIXED;
+#ifdef RTE_ARCH_PPC_64
+	flags |= MAP_HUGETLB;
+#endif
+	new_addr = eal_get_virtual_area(addr, &alloc_sz, alloc_sz, 0, flags);
+	if (new_addr != addr) {
+		if (new_addr != NULL)
+			munmap(new_addr, alloc_sz);
+		/* we're leaving a hole in our virtual address space. if
+		 * somebody else maps this hole now, we could accidentally
+		 * override it in the future.
+		 */
+		RTE_LOG(CRIT, EAL, "Can't mmap holes in our virtual address space\n");
+	}
 resized:
+	/* in-memory mode will never be single-file-segments mode */
 	if (internal_config.single_file_segments) {
 		resize_hugefile(fd, path, list_idx, seg_idx, map_offset,
 				alloc_sz, false);
 		/* ignore failure, can't make it any worse */
 	} else {
 		/* only remove file if we can take out a write lock */
-		if (lock(fd, LOCK_EX) == 1)
+		if (internal_config.hugepage_unlink == 0 &&
+				internal_config.in_memory == 0 &&
+				lock(fd, LOCK_EX) == 1)
 			unlink(path);
 		close(fd);
 	}
@@ -617,6 +660,12 @@ free_seg(struct rte_memseg *ms, struct hugepage_info *hi,
 		return -1;
 	}
 
+	/* if we've already unlinked the page, nothing needs to be done */
+	if (internal_config.hugepage_unlink) {
+		memset(ms, 0, sizeof(*ms));
+		return 0;
+	}
+
 	/* if we are not in single file segments mode, we're going to unmap the
 	 * segment and thus drop the lock on original fd, but hugepage dir is
 	 * now locked so we can take out another one without races.
@@ -695,7 +744,7 @@ alloc_seg_walk(const struct rte_memseg_list *msl, void *arg)
 	 * during init, we already hold a write lock, so don't try to take out
 	 * another one.
 	 */
-	if (wa->hi->lock_descriptor == -1) {
+	if (wa->hi->lock_descriptor == -1 && !internal_config.in_memory) {
 		dir_fd = open(wa->hi->hugedir, O_RDONLY);
 		if (dir_fd < 0) {
 			RTE_LOG(ERR, EAL, "%s(): Cannot open '%s': %s\n",
@@ -799,7 +848,7 @@ free_seg_walk(const struct rte_memseg_list *msl, void *arg)
 	 * during init, we already hold a write lock, so don't try to take out
 	 * another one.
 	 */
-	if (wa->hi->lock_descriptor == -1) {
+	if (wa->hi->lock_descriptor == -1 && !internal_config.in_memory) {
 		dir_fd = open(wa->hi->hugedir, O_RDONLY);
 		if (dir_fd < 0) {
 			RTE_LOG(ERR, EAL, "%s(): Cannot open '%s': %s\n",
@@ -878,7 +927,8 @@ eal_memalloc_alloc_seg_bulk(struct rte_memseg **ms, int n_segs, size_t page_sz,
 	wa.socket = socket;
 	wa.segs_allocated = 0;
 
-	ret = memseg_list_walk_thread_unsafe(alloc_seg_walk, &wa);
+	/* memalloc is locked, so it's safe to use thread-unsafe version */
+	ret = rte_memseg_list_walk_thread_unsafe(alloc_seg_walk, &wa);
 	if (ret == 0) {
 		RTE_LOG(ERR, EAL, "%s(): couldn't find suitable memseg_list\n",
 			__func__);
@@ -943,7 +993,10 @@ eal_memalloc_free_seg_bulk(struct rte_memseg **ms, int n_segs)
 		wa.ms = cur;
 		wa.hi = hi;
 
-		walk_res = memseg_list_walk_thread_unsafe(free_seg_walk, &wa);
+		/* memalloc is locked, so it's safe to use thread-unsafe version
+		 */
+		walk_res = rte_memseg_list_walk_thread_unsafe(free_seg_walk,
+				&wa);
 		if (walk_res == 1)
 			continue;
 		if (walk_res == 0)
@@ -1230,7 +1283,8 @@ eal_memalloc_sync_with_primary(void)
 	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
 		return 0;
 
-	if (memseg_list_walk_thread_unsafe(sync_walk, NULL))
+	/* memalloc is locked, so it's safe to call thread-unsafe version */
+	if (rte_memseg_list_walk_thread_unsafe(sync_walk, NULL))
 		return -1;
 	return 0;
 }
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
index c917de1c..dbf19499 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -66,7 +66,7 @@ static bool phys_addrs_available = true;
 static void
 test_phys_addrs_available(void)
 {
-	uint64_t tmp;
+	uint64_t tmp = 0;
 	phys_addr_t physaddr;
 
 	if (!rte_eal_has_hugepages()) {
@@ -521,7 +521,18 @@ static void *
 create_shared_memory(const char *filename, const size_t mem_size)
 {
 	void *retval;
-	int fd = open(filename, O_CREAT | O_RDWR, 0666);
+	int fd;
+
+	/* if no shared files mode is used, create anonymous memory instead */
+	if (internal_config.no_shconf) {
+		retval = mmap(NULL, mem_size, PROT_READ | PROT_WRITE,
+				MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+		if (retval == MAP_FAILED)
+			return NULL;
+		return retval;
+	}
+
+	fd = open(filename, O_CREAT | O_RDWR, 0666);
 	if (fd < 0)
 		return NULL;
 	if (ftruncate(fd, mem_size) < 0) {
@@ -767,6 +778,34 @@ remap_segment(struct hugepage_file *hugepages, int seg_start, int seg_end)
 	return 0;
 }
 
+static uint64_t
+get_mem_amount(uint64_t page_sz, uint64_t max_mem)
+{
+	uint64_t area_sz, max_pages;
+
+	/* limit to RTE_MAX_MEMSEG_PER_LIST pages or RTE_MAX_MEM_MB_PER_LIST */
+	max_pages = RTE_MAX_MEMSEG_PER_LIST;
+	max_mem = RTE_MIN((uint64_t)RTE_MAX_MEM_MB_PER_LIST << 20, max_mem);
+
+	area_sz = RTE_MIN(page_sz * max_pages, max_mem);
+
+	/* make sure the list isn't smaller than the page size */
+	area_sz = RTE_MAX(area_sz, page_sz);
+
+	return RTE_ALIGN(area_sz, page_sz);
+}
+
+static int
+free_memseg_list(struct rte_memseg_list *msl)
+{
+	if (rte_fbarray_destroy(&msl->memseg_arr)) {
+		RTE_LOG(ERR, EAL, "Cannot destroy memseg list\n");
+		return -1;
+	}
+	memset(msl, 0, sizeof(*msl));
+	return 0;
+}
+
 #define MEMSEG_LIST_FMT "memseg-%" PRIu64 "k-%i-%i"
 static int
 alloc_memseg_list(struct rte_memseg_list *msl, uint64_t page_sz,
@@ -1049,8 +1088,7 @@ get_socket_mem_size(int socket)
 
 	for (i = 0; i < internal_config.num_hugepage_sizes; i++){
 		struct hugepage_info *hpi = &internal_config.hugepage_info[i];
-		if (strnlen(hpi->hugedir, sizeof(hpi->hugedir)) != 0)
-			size += hpi->hugepage_sz * hpi->num_pages[socket];
+		size += hpi->hugepage_sz * hpi->num_pages[socket];
 	}
 
 	return size;
@@ -1605,6 +1643,15 @@ hugepage_count_walk(const struct rte_memseg_list *msl, void *arg)
 }
 
 static int
+limits_callback(int socket_id, size_t cur_limit, size_t new_len)
+{
+	RTE_SET_USED(socket_id);
+	RTE_SET_USED(cur_limit);
+	RTE_SET_USED(new_len);
+	return -1;
+}
+
+static int
 eal_hugepage_init(void)
 {
 	struct hugepage_info used_hp[MAX_HUGEPAGE_SIZES];
@@ -1687,6 +1734,18 @@ eal_hugepage_init(void)
 			free(pages);
 		}
 	}
+	/* if socket limits were specified, set them */
+	if (internal_config.force_socket_limits) {
+		unsigned int i;
+		for (i = 0; i < RTE_MAX_NUMA_NODES; i++) {
+			uint64_t limit = internal_config.socket_limit[i];
+			if (limit == 0)
+				continue;
+			if (rte_mem_alloc_validator_register("socket-limit",
+					limits_callback, i, limit))
+				RTE_LOG(ERR, EAL, "Failed to register socket limits validator callback\n");
+		}
+	}
 	return 0;
 }
 
@@ -1840,3 +1899,316 @@ rte_eal_using_phys_addrs(void)
 {
 	return phys_addrs_available;
 }
+
+static int __rte_unused
+memseg_primary_init_32(void)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	int active_sockets, hpi_idx, msl_idx = 0;
+	unsigned int socket_id, i;
+	struct rte_memseg_list *msl;
+	uint64_t extra_mem_per_socket, total_extra_mem, total_requested_mem;
+	uint64_t max_mem;
+
+	/* no-huge does not need this at all */
+	if (internal_config.no_hugetlbfs)
+		return 0;
+
+	/* this is a giant hack, but desperate times call for desperate
+	 * measures. in legacy 32-bit mode, we cannot preallocate VA space,
+	 * because having upwards of 2 gigabytes of VA space already mapped will
+	 * interfere with our ability to map and sort hugepages.
+	 *
+	 * therefore, in legacy 32-bit mode, we will be initializing memseg
+	 * lists much later - in eal_memory.c, right after we unmap all the
+	 * unneeded pages. this will not affect secondary processes, as those
+	 * should be able to mmap the space without (too many) problems.
+	 */
+	if (internal_config.legacy_mem)
+		return 0;
+
+	/* 32-bit mode is a very special case. we cannot know in advance where
+	 * the user will want to allocate their memory, so we have to do some
+	 * heuristics.
+	 */
+	active_sockets = 0;
+	total_requested_mem = 0;
+	if (internal_config.force_sockets)
+		for (i = 0; i < rte_socket_count(); i++) {
+			uint64_t mem;
+
+			socket_id = rte_socket_id_by_idx(i);
+			mem = internal_config.socket_mem[socket_id];
+
+			if (mem == 0)
+				continue;
+
+			active_sockets++;
+			total_requested_mem += mem;
+		}
+	else
+		total_requested_mem = internal_config.memory;
+
+	max_mem = (uint64_t)RTE_MAX_MEM_MB << 20;
+	if (total_requested_mem > max_mem) {
+		RTE_LOG(ERR, EAL, "Invalid parameters: 32-bit process can at most use %uM of memory\n",
+				(unsigned int)(max_mem >> 20));
+		return -1;
+	}
+	total_extra_mem = max_mem - total_requested_mem;
+	extra_mem_per_socket = active_sockets == 0 ? total_extra_mem :
+			total_extra_mem / active_sockets;
+
+	/* the allocation logic is a little bit convoluted, but here's how it
+	 * works, in a nutshell:
+	 *  - if user hasn't specified on which sockets to allocate memory via
+	 *    --socket-mem, we allocate all of our memory on master core socket.
+	 *  - if user has specified sockets to allocate memory on, there may be
+	 *    some "unused" memory left (e.g. if user has specified --socket-mem
+	 *    such that not all memory adds up to 2 gigabytes), so add it to all
+	 *    sockets that are in use equally.
+	 *
+	 * page sizes are sorted by size in descending order, so we can safely
+	 * assume that we dispense with bigger page sizes first.
+	 */
+
+	/* create memseg lists */
+	for (i = 0; i < rte_socket_count(); i++) {
+		int hp_sizes = (int) internal_config.num_hugepage_sizes;
+		uint64_t max_socket_mem, cur_socket_mem;
+		unsigned int master_lcore_socket;
+		struct rte_config *cfg = rte_eal_get_configuration();
+		bool skip;
+
+		socket_id = rte_socket_id_by_idx(i);
+
+#ifndef RTE_EAL_NUMA_AWARE_HUGEPAGES
+		if (socket_id > 0)
+			break;
+#endif
+
+		/* if we didn't specifically request memory on this socket */
+		skip = active_sockets != 0 &&
+				internal_config.socket_mem[socket_id] == 0;
+		/* ...or if we didn't specifically request memory on *any*
+		 * socket, and this is not master lcore
+		 */
+		master_lcore_socket = rte_lcore_to_socket_id(cfg->master_lcore);
+		skip |= active_sockets == 0 && socket_id != master_lcore_socket;
+
+		if (skip) {
+			RTE_LOG(DEBUG, EAL, "Will not preallocate memory on socket %u\n",
+					socket_id);
+			continue;
+		}
+
+		/* max amount of memory on this socket */
+		max_socket_mem = (active_sockets != 0 ?
+					internal_config.socket_mem[socket_id] :
+					internal_config.memory) +
+					extra_mem_per_socket;
+		cur_socket_mem = 0;
+
+		for (hpi_idx = 0; hpi_idx < hp_sizes; hpi_idx++) {
+			uint64_t max_pagesz_mem, cur_pagesz_mem = 0;
+			uint64_t hugepage_sz;
+			struct hugepage_info *hpi;
+			int type_msl_idx, max_segs, total_segs = 0;
+
+			hpi = &internal_config.hugepage_info[hpi_idx];
+			hugepage_sz = hpi->hugepage_sz;
+
+			/* check if pages are actually available */
+			if (hpi->num_pages[socket_id] == 0)
+				continue;
+
+			max_segs = RTE_MAX_MEMSEG_PER_TYPE;
+			max_pagesz_mem = max_socket_mem - cur_socket_mem;
+
+			/* make it multiple of page size */
+			max_pagesz_mem = RTE_ALIGN_FLOOR(max_pagesz_mem,
+					hugepage_sz);
+
+			RTE_LOG(DEBUG, EAL, "Attempting to preallocate "
+					"%" PRIu64 "M on socket %i\n",
+					max_pagesz_mem >> 20, socket_id);
+
+			type_msl_idx = 0;
+			while (cur_pagesz_mem < max_pagesz_mem &&
+					total_segs < max_segs) {
+				uint64_t cur_mem;
+				unsigned int n_segs;
+
+				if (msl_idx >= RTE_MAX_MEMSEG_LISTS) {
+					RTE_LOG(ERR, EAL,
+						"No more space in memseg lists, please increase %s\n",
+						RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS));
+					return -1;
+				}
+
+				msl = &mcfg->memsegs[msl_idx];
+
+				cur_mem = get_mem_amount(hugepage_sz,
+						max_pagesz_mem);
+				n_segs = cur_mem / hugepage_sz;
+
+				if (alloc_memseg_list(msl, hugepage_sz, n_segs,
+						socket_id, type_msl_idx)) {
+					/* failing to allocate a memseg list is
+					 * a serious error.
+					 */
+					RTE_LOG(ERR, EAL, "Cannot allocate memseg list\n");
+					return -1;
+				}
+
+				if (alloc_va_space(msl)) {
+					/* if we couldn't allocate VA space, we
+					 * can try with smaller page sizes.
+					 */
+					RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list, retrying with different page size\n");
+					/* deallocate memseg list */
+					if (free_memseg_list(msl))
+						return -1;
+					break;
+				}
+
+				total_segs += msl->memseg_arr.len;
+				cur_pagesz_mem = total_segs * hugepage_sz;
+				type_msl_idx++;
+				msl_idx++;
+			}
+			cur_socket_mem += cur_pagesz_mem;
+		}
+		if (cur_socket_mem == 0) {
+			RTE_LOG(ERR, EAL, "Cannot allocate VA space on socket %u\n",
+				socket_id);
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+static int __rte_unused
+memseg_primary_init(void)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	int i, socket_id, hpi_idx, msl_idx = 0;
+	struct rte_memseg_list *msl;
+	uint64_t max_mem, total_mem;
+
+	/* no-huge does not need this at all */
+	if (internal_config.no_hugetlbfs)
+		return 0;
+
+	max_mem = (uint64_t)RTE_MAX_MEM_MB << 20;
+	total_mem = 0;
+
+	/* create memseg lists */
+	for (hpi_idx = 0; hpi_idx < (int) internal_config.num_hugepage_sizes;
+			hpi_idx++) {
+		struct hugepage_info *hpi;
+		uint64_t hugepage_sz;
+
+		hpi = &internal_config.hugepage_info[hpi_idx];
+		hugepage_sz = hpi->hugepage_sz;
+
+		for (i = 0; i < (int) rte_socket_count(); i++) {
+			uint64_t max_type_mem, total_type_mem = 0;
+			int type_msl_idx, max_segs, total_segs = 0;
+
+			socket_id = rte_socket_id_by_idx(i);
+
+#ifndef RTE_EAL_NUMA_AWARE_HUGEPAGES
+			if (socket_id > 0)
+				break;
+#endif
+
+			if (total_mem >= max_mem)
+				break;
+
+			max_type_mem = RTE_MIN(max_mem - total_mem,
+				(uint64_t)RTE_MAX_MEM_MB_PER_TYPE << 20);
+			max_segs = RTE_MAX_MEMSEG_PER_TYPE;
+
+			type_msl_idx = 0;
+			while (total_type_mem < max_type_mem &&
+					total_segs < max_segs) {
+				uint64_t cur_max_mem, cur_mem;
+				unsigned int n_segs;
+
+				if (msl_idx >= RTE_MAX_MEMSEG_LISTS) {
+					RTE_LOG(ERR, EAL,
+						"No more space in memseg lists, please increase %s\n",
+						RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS));
+					return -1;
+				}
+
+				msl = &mcfg->memsegs[msl_idx++];
+
+				cur_max_mem = max_type_mem - total_type_mem;
+
+				cur_mem = get_mem_amount(hugepage_sz,
+						cur_max_mem);
+				n_segs = cur_mem / hugepage_sz;
+
+				if (alloc_memseg_list(msl, hugepage_sz, n_segs,
+						socket_id, type_msl_idx))
+					return -1;
+
+				total_segs += msl->memseg_arr.len;
+				total_type_mem = total_segs * hugepage_sz;
+				type_msl_idx++;
+
+				if (alloc_va_space(msl)) {
+					RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list\n");
+					return -1;
+				}
+			}
+			total_mem += total_type_mem;
+		}
+	}
+	return 0;
+}
+
+static int
+memseg_secondary_init(void)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	int msl_idx = 0;
+	struct rte_memseg_list *msl;
+
+	for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) {
+
+		msl = &mcfg->memsegs[msl_idx];
+
+		/* skip empty memseg lists */
+		if (msl->memseg_arr.len == 0)
+			continue;
+
+		if (rte_fbarray_attach(&msl->memseg_arr)) {
+			RTE_LOG(ERR, EAL, "Cannot attach to primary process memseg lists\n");
+			return -1;
+		}
+
+		/* preallocate VA space */
+		if (alloc_va_space(msl)) {
+			RTE_LOG(ERR, EAL, "Cannot preallocate VA space for hugepage memory\n");
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+int
+rte_eal_memseg_init(void)
+{
+	return rte_eal_process_type() == RTE_PROC_PRIMARY ?
+#ifndef RTE_ARCH_64
+			memseg_primary_init_32() :
+#else
+			memseg_primary_init() :
+#endif
+			memseg_secondary_init();
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_thread.c b/lib/librte_eal/linuxapp/eal/eal_thread.c
index f652ff98..b496fc71 100644
--- a/lib/librte_eal/linuxapp/eal/eal_thread.c
+++ b/lib/librte_eal/linuxapp/eal/eal_thread.c
@@ -176,7 +176,7 @@ int rte_sys_gettid(void)
 
 int rte_thread_setname(pthread_t id, const char *name)
 {
-	int ret = -1;
+	int ret = ENOSYS;
 #if defined(__GLIBC__) && defined(__GLIBC_PREREQ)
 #if __GLIBC_PREREQ(2, 12)
 	ret = pthread_setname_np(id, name);
@@ -184,5 +184,5 @@ int rte_thread_setname(pthread_t id, const char *name)
 #endif
 	RTE_SET_USED(id);
 	RTE_SET_USED(name);
-	return ret;
+	return -ret;
 }
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c
index a2bbdfbf..c68dc38e 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c
@@ -87,42 +87,6 @@ static const struct vfio_iommu_type iommu_types[] = {
 	},
 };
 
-/* for sPAPR IOMMU, we will need to walk memseg list, but we cannot use
- * rte_memseg_walk() because by the time we enter callback we will be holding a
- * write lock, so regular rte-memseg_walk will deadlock. copying the same
- * iteration code everywhere is not ideal as well. so, use a lockless copy of
- * memseg walk here.
- */
-static int
-memseg_walk_thread_unsafe(rte_memseg_walk_t func, void *arg)
-{
-	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
-	int i, ms_idx, ret = 0;
-
-	for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
-		struct rte_memseg_list *msl = &mcfg->memsegs[i];
-		const struct rte_memseg *ms;
-		struct rte_fbarray *arr;
-
-		if (msl->memseg_arr.count == 0)
-			continue;
-
-		arr = &msl->memseg_arr;
-
-		ms_idx = rte_fbarray_find_next_used(arr, 0);
-		while (ms_idx >= 0) {
-			ms = rte_fbarray_get(arr, ms_idx);
-			ret = func(msl, ms, arg);
-			if (ret < 0)
-				return -1;
-			if (ret > 0)
-				return 1;
-			ms_idx = rte_fbarray_find_next_used(arr, ms_idx + 1);
-		}
-	}
-	return 0;
-}
-
 static int
 is_null_map(const struct user_mem_map *map)
 {
@@ -575,10 +539,6 @@ int
 rte_vfio_clear_group(int vfio_group_fd)
 {
 	int i;
-	struct rte_mp_msg mp_req, *mp_rep;
-	struct rte_mp_reply mp_reply;
-	struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
-	struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param;
 	struct vfio_config *vfio_cfg;
 
 	vfio_cfg = get_vfio_cfg_by_group_fd(vfio_group_fd);
@@ -587,40 +547,15 @@ rte_vfio_clear_group(int vfio_group_fd)
 		return -1;
 	}
 
-	if (internal_config.process_type == RTE_PROC_PRIMARY) {
-
-		i = get_vfio_group_idx(vfio_group_fd);
-		if (i < 0)
-			return -1;
-		vfio_cfg->vfio_groups[i].group_num = -1;
-		vfio_cfg->vfio_groups[i].fd = -1;
-		vfio_cfg->vfio_groups[i].devices = 0;
-		vfio_cfg->vfio_active_groups--;
-		return 0;
-	}
-
-	p->req = SOCKET_CLR_GROUP;
-	p->group_num = vfio_group_fd;
-	strcpy(mp_req.name, EAL_VFIO_MP);
-	mp_req.len_param = sizeof(*p);
-	mp_req.num_fds = 0;
-
-	if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0 &&
-	    mp_reply.nb_received == 1) {
-		mp_rep = &mp_reply.msgs[0];
-		p = (struct vfio_mp_param *)mp_rep->param;
-		if (p->result == SOCKET_OK) {
-			free(mp_reply.msgs);
-			return 0;
-		} else if (p->result == SOCKET_NO_FD)
-			RTE_LOG(ERR, EAL, "  BAD VFIO group fd!\n");
-		else
-			RTE_LOG(ERR, EAL, "  no such VFIO group fd!\n");
-
-		free(mp_reply.msgs);
-	}
+	i = get_vfio_group_idx(vfio_group_fd);
+	if (i < 0)
+		return -1;
+	vfio_cfg->vfio_groups[i].group_num = -1;
+	vfio_cfg->vfio_groups[i].fd = -1;
+	vfio_cfg->vfio_groups[i].devices = 0;
+	vfio_cfg->vfio_active_groups--;
 
-	return -1;
+	return 0;
 }
 
 int
@@ -1357,7 +1292,8 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 	/* check if window size needs to be adjusted */
 	memset(&param, 0, sizeof(param));
 
-	if (memseg_walk_thread_unsafe(vfio_spapr_window_size_walk,
+	/* we're inside a callback so use thread-unsafe version */
+	if (rte_memseg_walk_thread_unsafe(vfio_spapr_window_size_walk,
 				&param) < 0) {
 		RTE_LOG(ERR, EAL, "Could not get window size\n");
 		ret = -1;
@@ -1386,7 +1322,9 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 				ret = -1;
 				goto out;
 			}
-			if (memseg_walk_thread_unsafe(vfio_spapr_map_walk,
+			/* we're inside a callback, so use thread-unsafe version
+			 */
+			if (rte_memseg_walk_thread_unsafe(vfio_spapr_map_walk,
 					&vfio_container_fd) < 0) {
 				RTE_LOG(ERR, EAL, "Could not recreate DMA maps\n");
 				ret = -1;
@@ -1624,7 +1562,7 @@ out:
 	return ret;
 }
 
-int __rte_experimental
+int
 rte_vfio_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len)
 {
 	if (len == 0) {
@@ -1635,7 +1573,7 @@ rte_vfio_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len)
 	return container_dma_map(default_vfio_cfg, vaddr, iova, len);
 }
 
-int __rte_experimental
+int
 rte_vfio_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len)
 {
 	if (len == 0) {
@@ -1678,7 +1616,7 @@ rte_vfio_noiommu_is_enabled(void)
 	return c == 'Y';
 }
 
-int __rte_experimental
+int
 rte_vfio_container_create(void)
 {
 	int i;
@@ -1728,7 +1666,7 @@ rte_vfio_container_destroy(int container_fd)
 	return 0;
 }
 
-int __rte_experimental
+int
 rte_vfio_container_group_bind(int container_fd, int iommu_group_num)
 {
 	struct vfio_config *vfio_cfg;
@@ -1774,11 +1712,11 @@ rte_vfio_container_group_bind(int container_fd, int iommu_group_num)
 	return vfio_group_fd;
 }
 
-int __rte_experimental
+int
 rte_vfio_container_group_unbind(int container_fd, int iommu_group_num)
 {
 	struct vfio_config *vfio_cfg;
-	struct vfio_group *cur_grp;
+	struct vfio_group *cur_grp = NULL;
 	int i;
 
 	vfio_cfg = get_vfio_cfg_by_container_fd(container_fd);
@@ -1795,7 +1733,7 @@ rte_vfio_container_group_unbind(int container_fd, int iommu_group_num)
 	}
 
 	/* This should not happen */
-	if (i == VFIO_MAX_GROUPS) {
+	if (i == VFIO_MAX_GROUPS || cur_grp == NULL) {
 		RTE_LOG(ERR, EAL, "Specified group number not found\n");
 		return -1;
 	}
@@ -1813,7 +1751,7 @@ rte_vfio_container_group_unbind(int container_fd, int iommu_group_num)
 	return 0;
 }
 
-int __rte_experimental
+int
 rte_vfio_container_dma_map(int container_fd, uint64_t vaddr, uint64_t iova,
 		uint64_t len)
 {
@@ -1833,7 +1771,7 @@ rte_vfio_container_dma_map(int container_fd, uint64_t vaddr, uint64_t iova,
 	return container_dma_map(vfio_cfg, vaddr, iova, len);
 }
 
-int __rte_experimental
+int
 rte_vfio_container_dma_unmap(int container_fd, uint64_t vaddr, uint64_t iova,
 		uint64_t len)
 {
@@ -1855,14 +1793,14 @@ rte_vfio_container_dma_unmap(int container_fd, uint64_t vaddr, uint64_t iova,
 
 #else
 
-int __rte_experimental
+int
 rte_vfio_dma_map(uint64_t __rte_unused vaddr, __rte_unused uint64_t iova,
 		  __rte_unused uint64_t len)
 {
 	return -1;
 }
 
-int __rte_experimental
+int
 rte_vfio_dma_unmap(uint64_t __rte_unused vaddr, uint64_t __rte_unused iova,
 		    __rte_unused uint64_t len)
 {
@@ -1909,7 +1847,7 @@ rte_vfio_clear_group(__rte_unused int vfio_group_fd)
 	return -1;
 }
 
-int __rte_experimental
+int
 rte_vfio_get_group_num(__rte_unused const char *sysfs_base,
 		__rte_unused const char *dev_addr,
 		__rte_unused int *iommu_group_num)
@@ -1917,45 +1855,45 @@ rte_vfio_get_group_num(__rte_unused const char *sysfs_base,
 	return -1;
 }
 
-int __rte_experimental
+int
 rte_vfio_get_container_fd(void)
 {
 	return -1;
 }
 
-int __rte_experimental
+int
 rte_vfio_get_group_fd(__rte_unused int iommu_group_num)
 {
 	return -1;
 }
 
-int __rte_experimental
+int
 rte_vfio_container_create(void)
 {
 	return -1;
 }
 
-int __rte_experimental
+int
 rte_vfio_container_destroy(__rte_unused int container_fd)
 {
 	return -1;
 }
 
-int __rte_experimental
+int
 rte_vfio_container_group_bind(__rte_unused int container_fd,
 		__rte_unused int iommu_group_num)
 {
 	return -1;
 }
 
-int __rte_experimental
+int
 rte_vfio_container_group_unbind(__rte_unused int container_fd,
 		__rte_unused int iommu_group_num)
 {
 	return -1;
 }
 
-int __rte_experimental
+int
 rte_vfio_container_dma_map(__rte_unused int container_fd,
 		__rte_unused uint64_t vaddr,
 		__rte_unused uint64_t iova,
@@ -1964,7 +1902,7 @@ rte_vfio_container_dma_map(__rte_unused int container_fd,
 	return -1;
 }
 
-int __rte_experimental
+int
 rte_vfio_container_dma_unmap(__rte_unused int container_fd,
 		__rte_unused uint64_t vaddr,
 		__rte_unused uint64_t iova,
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h b/lib/librte_eal/linuxapp/eal/eal_vfio.h
index e65b1037..68d4750a 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.h
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h
@@ -129,7 +129,6 @@ int vfio_mp_sync_setup(void);
 
 #define SOCKET_REQ_CONTAINER 0x100
 #define SOCKET_REQ_GROUP 0x200
-#define SOCKET_CLR_GROUP 0x300
 #define SOCKET_OK 0x0
 #define SOCKET_NO_FD 0x1
 #define SOCKET_ERR 0xFF
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c b/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c
index 9c202bb0..680a24aa 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c
@@ -55,14 +55,6 @@ vfio_mp_primary(const struct rte_mp_msg *msg, const void *peer)
 			reply.fds[0] = fd;
 		}
 		break;
-	case SOCKET_CLR_GROUP:
-		r->req = SOCKET_CLR_GROUP;
-		r->group_num = m->group_num;
-		if (rte_vfio_clear_group(m->group_num) < 0)
-			r->result = SOCKET_NO_FD;
-		else
-			r->result = SOCKET_OK;
-		break;
 	case SOCKET_REQ_CONTAINER:
 		r->req = SOCKET_REQ_CONTAINER;
 		fd = rte_vfio_get_container_fd();
diff --git a/lib/librte_eal/linuxapp/eal/meson.build b/lib/librte_eal/linuxapp/eal/meson.build
index cce37712..6e31c2aa 100644
--- a/lib/librte_eal/linuxapp/eal/meson.build
+++ b/lib/librte_eal/linuxapp/eal/meson.build
@@ -23,6 +23,7 @@ env_sources = files('eal_alarm.c',
 		'eal_dev.c',
 )
 
+deps += ['kvargs']
 if has_libnuma == 1
 	dpdk_conf.set10('RTE_EAL_NUMA_AWARE_HUGEPAGES', true)
 endif
diff --git a/lib/librte_eal/meson.build b/lib/librte_eal/meson.build
index 4aa63e3d..e1fde15d 100644
--- a/lib/librte_eal/meson.build
+++ b/lib/librte_eal/meson.build
@@ -18,12 +18,13 @@ elif host_machine.system() == 'freebsd'
 	subdir('bsdapp/eal')
 
 else
-	error('unsupported system type @0@'.format(hostmachine.system()))
+	error('unsupported system type "@0@"'.format(host_machine.system()))
 endif
 
-version = 7  # the version of the EAL API
+version = 8  # the version of the EAL API
 allow_experimental_apis = true
 deps += 'compat'
+deps += 'kvargs'
 cflags += '-D_GNU_SOURCE'
 sources = common_sources + env_sources
 objs = common_objs + env_objs
diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
index f7dd0e7b..344a43d3 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -181,7 +181,6 @@ DPDK_17.11 {
 	rte_bus_get_iommu_class;
 	rte_eal_has_pci;
 	rte_eal_iova_mode;
-	rte_eal_mbuf_default_mempool_ops;
 	rte_eal_using_phys_addrs;
 	rte_eal_vfio_intr_mode;
 	rte_lcore_has_role;
@@ -241,25 +240,53 @@ DPDK_18.05 {
 
 } DPDK_18.02;
 
+DPDK_18.08 {
+	global:
+
+	rte_eal_mbuf_user_pool_ops;
+	rte_uuid_compare;
+	rte_uuid_is_null;
+	rte_uuid_parse;
+	rte_uuid_unparse;
+	rte_vfio_container_create;
+	rte_vfio_container_destroy;
+	rte_vfio_container_dma_map;
+	rte_vfio_container_dma_unmap;
+	rte_vfio_container_group_bind;
+	rte_vfio_container_group_unbind;
+	rte_vfio_dma_map;
+	rte_vfio_dma_unmap;
+	rte_vfio_get_container_fd;
+	rte_vfio_get_group_fd;
+	rte_vfio_get_group_num;
+
+} DPDK_18.05;
+
 EXPERIMENTAL {
 	global:
 
+	rte_class_find;
+	rte_class_find_by_name;
+	rte_class_register;
+	rte_class_unregister;
 	rte_ctrl_thread_create;
 	rte_dev_event_callback_register;
 	rte_dev_event_callback_unregister;
 	rte_dev_event_monitor_start;
 	rte_dev_event_monitor_stop;
+	rte_dev_iterator_init;
+	rte_dev_iterator_next;
 	rte_devargs_add;
 	rte_devargs_dump;
 	rte_devargs_insert;
 	rte_devargs_next;
 	rte_devargs_parse;
+	rte_devargs_parsef;
 	rte_devargs_remove;
 	rte_devargs_type_count;
 	rte_eal_cleanup;
 	rte_eal_hotplug_add;
 	rte_eal_hotplug_remove;
-	rte_eal_mbuf_user_pool_ops;
 	rte_fbarray_attach;
 	rte_fbarray_destroy;
 	rte_fbarray_detach;
@@ -269,8 +296,14 @@ EXPERIMENTAL {
 	rte_fbarray_find_next_used;
 	rte_fbarray_find_next_n_free;
 	rte_fbarray_find_next_n_used;
+	rte_fbarray_find_prev_free;
+	rte_fbarray_find_prev_used;
+	rte_fbarray_find_prev_n_free;
+	rte_fbarray_find_prev_n_used;
 	rte_fbarray_find_contig_free;
 	rte_fbarray_find_contig_used;
+	rte_fbarray_find_rev_contig_free;
+	rte_fbarray_find_rev_contig_used;
 	rte_fbarray_get;
 	rte_fbarray_init;
 	rte_fbarray_is_used;
@@ -286,25 +319,20 @@ EXPERIMENTAL {
 	rte_mem_virt2memseg;
 	rte_mem_virt2memseg_list;
 	rte_memseg_contig_walk;
+	rte_memseg_contig_walk_thread_unsafe;
 	rte_memseg_list_walk;
+	rte_memseg_list_walk_thread_unsafe;
 	rte_memseg_walk;
+	rte_memseg_walk_thread_unsafe;
 	rte_mp_action_register;
 	rte_mp_action_unregister;
 	rte_mp_reply;
 	rte_mp_request_sync;
 	rte_mp_request_async;
 	rte_mp_sendmsg;
+	rte_service_lcore_attr_get;
+	rte_service_lcore_attr_reset_all;
+	rte_service_may_be_active;
 	rte_socket_count;
 	rte_socket_id_by_idx;
-	rte_vfio_dma_map;
-	rte_vfio_dma_unmap;
-	rte_vfio_get_container_fd;
-	rte_vfio_get_group_fd;
-	rte_vfio_get_group_num;
-	rte_vfio_container_create;
-	rte_vfio_container_destroy;
-	rte_vfio_container_dma_map;
-	rte_vfio_container_dma_unmap;
-	rte_vfio_container_group_bind;
-	rte_vfio_container_group_unbind;
 };
diff --git a/lib/librte_ethdev/Makefile b/lib/librte_ethdev/Makefile
index c2f2f7d8..0935a275 100644
--- a/lib/librte_ethdev/Makefile
+++ b/lib/librte_ethdev/Makefile
@@ -16,7 +16,7 @@ LDLIBS += -lrte_mbuf
 
 EXPORT_MAP := rte_ethdev_version.map
 
-LIBABIVER := 9
+LIBABIVER := 10
 
 SRCS-y += rte_ethdev.c
 SRCS-y += rte_flow.c
diff --git a/lib/librte_ethdev/meson.build b/lib/librte_ethdev/meson.build
index aed5d226..596cd0f3 100644
--- a/lib/librte_ethdev/meson.build
+++ b/lib/librte_ethdev/meson.build
@@ -2,7 +2,7 @@
 # Copyright(c) 2017 Intel Corporation
 
 name = 'ethdev'
-version = 9
+version = 10
 allow_experimental_apis = true
 sources = files('ethdev_profile.c',
 	'rte_ethdev.c',
diff --git a/lib/librte_ethdev/rte_ethdev.c b/lib/librte_ethdev/rte_ethdev.c
index cd4bfd3c..4c320250 100644
--- a/lib/librte_ethdev/rte_ethdev.c
+++ b/lib/librte_ethdev/rte_ethdev.c
@@ -42,10 +42,7 @@
 #include "rte_ethdev_driver.h"
 #include "ethdev_profile.h"
 
-static int ethdev_logtype;
-
-#define ethdev_log(level, fmt, ...) \
-	rte_log(RTE_LOG_ ## level, ethdev_logtype, fmt "\n", ## __VA_ARGS__)
+int rte_eth_dev_logtype;
 
 static const char *MZ_RTE_ETH_DEV_DATA = "rte_eth_dev_data";
 struct rte_eth_dev rte_eth_devices[RTE_MAX_ETHPORTS];
@@ -129,6 +126,7 @@ static const struct {
 	RTE_RX_OFFLOAD_BIT2STR(SCATTER),
 	RTE_RX_OFFLOAD_BIT2STR(TIMESTAMP),
 	RTE_RX_OFFLOAD_BIT2STR(SECURITY),
+	RTE_RX_OFFLOAD_BIT2STR(KEEP_CRC),
 };
 
 #undef RTE_RX_OFFLOAD_BIT2STR
@@ -303,14 +301,16 @@ rte_eth_dev_allocate(const char *name)
 	rte_spinlock_lock(&rte_eth_dev_shared_data->ownership_lock);
 
 	if (_rte_eth_dev_allocated(name) != NULL) {
-		ethdev_log(ERR, "Ethernet device with name %s already allocated",
-				name);
+		RTE_ETHDEV_LOG(ERR,
+			"Ethernet device with name %s already allocated\n",
+			name);
 		goto unlock;
 	}
 
 	port_id = rte_eth_dev_find_free_port();
 	if (port_id == RTE_MAX_ETHPORTS) {
-		ethdev_log(ERR, "Reached maximum number of Ethernet ports");
+		RTE_ETHDEV_LOG(ERR,
+			"Reached maximum number of Ethernet ports\n");
 		goto unlock;
 	}
 
@@ -346,8 +346,8 @@ rte_eth_dev_attach_secondary(const char *name)
 			break;
 	}
 	if (i == RTE_MAX_ETHPORTS) {
-		RTE_PMD_DEBUG_TRACE(
-			"device %s is not driven by the primary process\n",
+		RTE_ETHDEV_LOG(ERR,
+			"Device %s is not driven by the primary process\n",
 			name);
 	} else {
 		eth_dev = eth_dev_get(i);
@@ -394,7 +394,8 @@ rte_eth_is_valid_owner_id(uint64_t owner_id)
 {
 	if (owner_id == RTE_ETH_DEV_NO_OWNER ||
 	    rte_eth_dev_shared_data->next_owner_id <= owner_id) {
-		RTE_PMD_DEBUG_TRACE("Invalid owner_id=%016"PRIX64".\n", owner_id);
+		RTE_ETHDEV_LOG(ERR, "Invalid owner_id=%016"PRIx64"\n",
+			owner_id);
 		return 0;
 	}
 	return 1;
@@ -437,7 +438,8 @@ _rte_eth_dev_owner_set(const uint16_t port_id, const uint64_t old_owner_id,
 	int sret;
 
 	if (port_id >= RTE_MAX_ETHPORTS || !is_allocated(ethdev)) {
-		RTE_PMD_DEBUG_TRACE("Port id %"PRIu16" is not allocated.\n", port_id);
+		RTE_ETHDEV_LOG(ERR, "Port id %"PRIu16" is not allocated\n",
+			port_id);
 		return -ENODEV;
 	}
 
@@ -447,22 +449,22 @@ _rte_eth_dev_owner_set(const uint16_t port_id, const uint64_t old_owner_id,
 
 	port_owner = &rte_eth_devices[port_id].data->owner;
 	if (port_owner->id != old_owner_id) {
-		RTE_PMD_DEBUG_TRACE("Cannot set owner to port %d already owned"
-				    " by %s_%016"PRIX64".\n", port_id,
-				    port_owner->name, port_owner->id);
+		RTE_ETHDEV_LOG(ERR,
+			"Cannot set owner to port %u already owned by %s_%016"PRIX64"\n",
+			port_id, port_owner->name, port_owner->id);
 		return -EPERM;
 	}
 
 	sret = snprintf(port_owner->name, RTE_ETH_MAX_OWNER_NAME_LEN, "%s",
 			new_owner->name);
 	if (sret < 0 || sret >= RTE_ETH_MAX_OWNER_NAME_LEN)
-		RTE_PMD_DEBUG_TRACE("Port %d owner name was truncated.\n",
-				    port_id);
+		RTE_ETHDEV_LOG(ERR, "Port %u owner name was truncated\n",
+			port_id);
 
 	port_owner->id = new_owner->id;
 
-	RTE_PMD_DEBUG_TRACE("Port %d owner is %s_%016"PRIX64".\n", port_id,
-			    new_owner->name, new_owner->id);
+	RTE_ETHDEV_LOG(DEBUG, "Port %u owner is %s_%016"PRIx64"\n",
+		port_id, new_owner->name, new_owner->id);
 
 	return 0;
 }
@@ -514,8 +516,9 @@ rte_eth_dev_owner_delete(const uint64_t owner_id)
 			if (rte_eth_devices[port_id].data->owner.id == owner_id)
 				memset(&rte_eth_devices[port_id].data->owner, 0,
 				       sizeof(struct rte_eth_dev_owner));
-		RTE_PMD_DEBUG_TRACE("All port owners owned by %016"PRIX64
-				" identifier have removed.\n", owner_id);
+		RTE_ETHDEV_LOG(ERR,
+			"All port owners owned by %016"PRIx64" identifier have removed\n",
+			owner_id);
 	}
 
 	rte_spinlock_unlock(&rte_eth_dev_shared_data->ownership_lock);
@@ -532,7 +535,8 @@ rte_eth_dev_owner_get(const uint16_t port_id, struct rte_eth_dev_owner *owner)
 	rte_spinlock_lock(&rte_eth_dev_shared_data->ownership_lock);
 
 	if (port_id >= RTE_MAX_ETHPORTS || !is_allocated(ethdev)) {
-		RTE_PMD_DEBUG_TRACE("Port id %"PRIu16" is not allocated.\n", port_id);
+		RTE_ETHDEV_LOG(ERR, "Port id %"PRIu16" is not allocated\n",
+			port_id);
 		ret = -ENODEV;
 	} else {
 		rte_memcpy(owner, &ethdev->data->owner, sizeof(*owner));
@@ -596,7 +600,7 @@ rte_eth_dev_get_name_by_port(uint16_t port_id, char *name)
 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
 
 	if (name == NULL) {
-		RTE_PMD_DEBUG_TRACE("Null pointer is specified\n");
+		RTE_ETHDEV_LOG(ERR, "Null pointer is specified\n");
 		return -EINVAL;
 	}
 
@@ -613,7 +617,7 @@ rte_eth_dev_get_port_by_name(const char *name, uint16_t *port_id)
 	uint32_t pid;
 
 	if (name == NULL) {
-		RTE_PMD_DEBUG_TRACE("Null pointer is specified\n");
+		RTE_ETHDEV_LOG(ERR, "Null pointer is specified\n");
 		return -EINVAL;
 	}
 
@@ -654,7 +658,7 @@ rte_eth_dev_attach(const char *devargs, uint16_t *port_id)
 	}
 
 	/* parse devargs */
-	if (rte_devargs_parse(&da, "%s", devargs))
+	if (rte_devargs_parse(&da, devargs))
 		goto err;
 
 	ret = rte_eal_hotplug_add(da.bus->name, da.name, da.args);
@@ -663,7 +667,7 @@ rte_eth_dev_attach(const char *devargs, uint16_t *port_id)
 
 	/* no point looking at the port count if no port exists */
 	if (!rte_eth_dev_count_total()) {
-		ethdev_log(ERR, "No port found for device (%s)", da.name);
+		RTE_ETHDEV_LOG(ERR, "No port found for device (%s)\n", da.name);
 		ret = -1;
 		goto err;
 	}
@@ -698,8 +702,8 @@ rte_eth_dev_detach(uint16_t port_id, char *name __rte_unused)
 
 	dev_flags = rte_eth_devices[port_id].data->dev_flags;
 	if (dev_flags & RTE_ETH_DEV_BONDED_SLAVE) {
-		ethdev_log(ERR,
-			"Port %" PRIu16 " is bonded, cannot detach", port_id);
+		RTE_ETHDEV_LOG(ERR,
+			"Port %"PRIu16" is bonded, cannot detach\n", port_id);
 		return -ENOTSUP;
 	}
 
@@ -778,21 +782,22 @@ rte_eth_dev_rx_queue_start(uint16_t port_id, uint16_t rx_queue_id)
 
 	dev = &rte_eth_devices[port_id];
 	if (!dev->data->dev_started) {
-		RTE_PMD_DEBUG_TRACE(
-		    "port %d must be started before start any queue\n", port_id);
+		RTE_ETHDEV_LOG(ERR,
+			"Port %u must be started before start any queue\n",
+			port_id);
 		return -EINVAL;
 	}
 
 	if (rx_queue_id >= dev->data->nb_rx_queues) {
-		RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", rx_queue_id);
+		RTE_ETHDEV_LOG(ERR, "Invalid RX queue_id=%u\n", rx_queue_id);
 		return -EINVAL;
 	}
 
 	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_start, -ENOTSUP);
 
 	if (dev->data->rx_queue_state[rx_queue_id] != RTE_ETH_QUEUE_STATE_STOPPED) {
-		RTE_PMD_DEBUG_TRACE("Queue %" PRIu16" of device with port_id=%" PRIu8
-			" already started\n",
+		RTE_ETHDEV_LOG(INFO,
+			"Queue %"PRIu16" of device with port_id=%"PRIu16" already started\n",
 			rx_queue_id, port_id);
 		return 0;
 	}
@@ -811,15 +816,15 @@ rte_eth_dev_rx_queue_stop(uint16_t port_id, uint16_t rx_queue_id)
 
 	dev = &rte_eth_devices[port_id];
 	if (rx_queue_id >= dev->data->nb_rx_queues) {
-		RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", rx_queue_id);
+		RTE_ETHDEV_LOG(ERR, "Invalid RX queue_id=%u\n", rx_queue_id);
 		return -EINVAL;
 	}
 
 	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_stop, -ENOTSUP);
 
 	if (dev->data->rx_queue_state[rx_queue_id] == RTE_ETH_QUEUE_STATE_STOPPED) {
-		RTE_PMD_DEBUG_TRACE("Queue %" PRIu16" of device with port_id=%" PRIu8
-			" already stopped\n",
+		RTE_ETHDEV_LOG(INFO,
+			"Queue %"PRIu16" of device with port_id=%"PRIu16" already stopped\n",
 			rx_queue_id, port_id);
 		return 0;
 	}
@@ -837,28 +842,27 @@ rte_eth_dev_tx_queue_start(uint16_t port_id, uint16_t tx_queue_id)
 
 	dev = &rte_eth_devices[port_id];
 	if (!dev->data->dev_started) {
-		RTE_PMD_DEBUG_TRACE(
-		    "port %d must be started before start any queue\n", port_id);
+		RTE_ETHDEV_LOG(ERR,
+			"Port %u must be started before start any queue\n",
+			port_id);
 		return -EINVAL;
 	}
 
 	if (tx_queue_id >= dev->data->nb_tx_queues) {
-		RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", tx_queue_id);
+		RTE_ETHDEV_LOG(ERR, "Invalid TX queue_id=%u\n", tx_queue_id);
 		return -EINVAL;
 	}
 
 	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_start, -ENOTSUP);
 
 	if (dev->data->tx_queue_state[tx_queue_id] != RTE_ETH_QUEUE_STATE_STOPPED) {
-		RTE_PMD_DEBUG_TRACE("Queue %" PRIu16" of device with port_id=%" PRIu8
-			" already started\n",
+		RTE_ETHDEV_LOG(INFO,
+			"Queue %"PRIu16" of device with port_id=%"PRIu16" already started\n",
 			tx_queue_id, port_id);
 		return 0;
 	}
 
-	return eth_err(port_id, dev->dev_ops->tx_queue_start(dev,
-							     tx_queue_id));
-
+	return eth_err(port_id, dev->dev_ops->tx_queue_start(dev, tx_queue_id));
 }
 
 int
@@ -870,15 +874,15 @@ rte_eth_dev_tx_queue_stop(uint16_t port_id, uint16_t tx_queue_id)
 
 	dev = &rte_eth_devices[port_id];
 	if (tx_queue_id >= dev->data->nb_tx_queues) {
-		RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", tx_queue_id);
+		RTE_ETHDEV_LOG(ERR, "Invalid TX queue_id=%u\n", tx_queue_id);
 		return -EINVAL;
 	}
 
 	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_stop, -ENOTSUP);
 
 	if (dev->data->tx_queue_state[tx_queue_id] == RTE_ETH_QUEUE_STATE_STOPPED) {
-		RTE_PMD_DEBUG_TRACE("Queue %" PRIu16" of device with port_id=%" PRIu8
-			" already stopped\n",
+		RTE_ETHDEV_LOG(INFO,
+			"Queue %"PRIu16" of device with port_id=%"PRIu16" already stopped\n",
 			tx_queue_id, port_id);
 		return 0;
 	}
@@ -970,41 +974,6 @@ rte_eth_speed_bitflag(uint32_t speed, int duplex)
 	}
 }
 
-/**
- * A conversion function from rxmode bitfield API.
- */
-static void
-rte_eth_convert_rx_offload_bitfield(const struct rte_eth_rxmode *rxmode,
-				    uint64_t *rx_offloads)
-{
-	uint64_t offloads = 0;
-
-	if (rxmode->header_split == 1)
-		offloads |= DEV_RX_OFFLOAD_HEADER_SPLIT;
-	if (rxmode->hw_ip_checksum == 1)
-		offloads |= DEV_RX_OFFLOAD_CHECKSUM;
-	if (rxmode->hw_vlan_filter == 1)
-		offloads |= DEV_RX_OFFLOAD_VLAN_FILTER;
-	if (rxmode->hw_vlan_strip == 1)
-		offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
-	if (rxmode->hw_vlan_extend == 1)
-		offloads |= DEV_RX_OFFLOAD_VLAN_EXTEND;
-	if (rxmode->jumbo_frame == 1)
-		offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME;
-	if (rxmode->hw_strip_crc == 1)
-		offloads |= DEV_RX_OFFLOAD_CRC_STRIP;
-	if (rxmode->enable_scatter == 1)
-		offloads |= DEV_RX_OFFLOAD_SCATTER;
-	if (rxmode->enable_lro == 1)
-		offloads |= DEV_RX_OFFLOAD_TCP_LRO;
-	if (rxmode->hw_timestamp == 1)
-		offloads |= DEV_RX_OFFLOAD_TIMESTAMP;
-	if (rxmode->security == 1)
-		offloads |= DEV_RX_OFFLOAD_SECURITY;
-
-	*rx_offloads = offloads;
-}
-
 const char * __rte_experimental
 rte_eth_dev_rx_offload_name(uint64_t offload)
 {
@@ -1071,33 +1040,26 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 	}
 
 	if (nb_rx_q > RTE_MAX_QUEUES_PER_PORT) {
-		RTE_PMD_DEBUG_TRACE(
+		RTE_ETHDEV_LOG(ERR,
 			"Number of RX queues requested (%u) is greater than max supported(%d)\n",
 			nb_rx_q, RTE_MAX_QUEUES_PER_PORT);
 		return -EINVAL;
 	}
 
 	if (nb_tx_q > RTE_MAX_QUEUES_PER_PORT) {
-		RTE_PMD_DEBUG_TRACE(
+		RTE_ETHDEV_LOG(ERR,
 			"Number of TX queues requested (%u) is greater than max supported(%d)\n",
 			nb_tx_q, RTE_MAX_QUEUES_PER_PORT);
 		return -EINVAL;
 	}
 
 	if (dev->data->dev_started) {
-		RTE_PMD_DEBUG_TRACE(
-		    "port %d must be stopped to allow configuration\n", port_id);
+		RTE_ETHDEV_LOG(ERR,
+			"Port %u must be stopped to allow configuration\n",
+			port_id);
 		return -EBUSY;
 	}
 
-	/*
-	 * Convert between the offloads API to enable PMDs to support
-	 * only one of them.
-	 */
-	if (dev_conf->rxmode.ignore_offload_bitfield == 0)
-		rte_eth_convert_rx_offload_bitfield(
-				&dev_conf->rxmode, &local_conf.rxmode.offloads);
-
 	/* Copy the dev_conf parameter into the dev structure */
 	memcpy(&dev->data->dev_conf, &local_conf, sizeof(dev->data->dev_conf));
 
@@ -1107,28 +1069,28 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 	 * configured device.
 	 */
 	if (nb_rx_q > dev_info.max_rx_queues) {
-		RTE_PMD_DEBUG_TRACE("ethdev port_id=%d nb_rx_queues=%d > %d\n",
-				port_id, nb_rx_q, dev_info.max_rx_queues);
+		RTE_ETHDEV_LOG(ERR, "Ethdev port_id=%u nb_rx_queues=%u > %u\n",
+			port_id, nb_rx_q, dev_info.max_rx_queues);
 		return -EINVAL;
 	}
 
 	if (nb_tx_q > dev_info.max_tx_queues) {
-		RTE_PMD_DEBUG_TRACE("ethdev port_id=%d nb_tx_queues=%d > %d\n",
-				port_id, nb_tx_q, dev_info.max_tx_queues);
+		RTE_ETHDEV_LOG(ERR, "Ethdev port_id=%u nb_tx_queues=%u > %u\n",
+			port_id, nb_tx_q, dev_info.max_tx_queues);
 		return -EINVAL;
 	}
 
 	/* Check that the device supports requested interrupts */
 	if ((dev_conf->intr_conf.lsc == 1) &&
-		(!(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC))) {
-			RTE_PMD_DEBUG_TRACE("driver %s does not support lsc\n",
-					dev->device->driver->name);
-			return -EINVAL;
+			(!(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC))) {
+		RTE_ETHDEV_LOG(ERR, "Driver %s does not support lsc\n",
+			dev->device->driver->name);
+		return -EINVAL;
 	}
 	if ((dev_conf->intr_conf.rmv == 1) &&
-	    (!(dev->data->dev_flags & RTE_ETH_DEV_INTR_RMV))) {
-		RTE_PMD_DEBUG_TRACE("driver %s does not support rmv\n",
-				    dev->device->driver->name);
+			(!(dev->data->dev_flags & RTE_ETH_DEV_INTR_RMV))) {
+		RTE_ETHDEV_LOG(ERR, "Driver %s does not support rmv\n",
+			dev->device->driver->name);
 		return -EINVAL;
 	}
 
@@ -1137,19 +1099,16 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 	 * length is supported by the configured device.
 	 */
 	if (local_conf.rxmode.offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) {
-		if (dev_conf->rxmode.max_rx_pkt_len >
-		    dev_info.max_rx_pktlen) {
-			RTE_PMD_DEBUG_TRACE("ethdev port_id=%d max_rx_pkt_len %u"
-				" > max valid value %u\n",
-				port_id,
-				(unsigned)dev_conf->rxmode.max_rx_pkt_len,
-				(unsigned)dev_info.max_rx_pktlen);
+		if (dev_conf->rxmode.max_rx_pkt_len > dev_info.max_rx_pktlen) {
+			RTE_ETHDEV_LOG(ERR,
+				"Ethdev port_id=%u max_rx_pkt_len %u > max valid value %u\n",
+				port_id, dev_conf->rxmode.max_rx_pkt_len,
+				dev_info.max_rx_pktlen);
 			return -EINVAL;
 		} else if (dev_conf->rxmode.max_rx_pkt_len < ETHER_MIN_LEN) {
-			RTE_PMD_DEBUG_TRACE("ethdev port_id=%d max_rx_pkt_len %u"
-				" < min valid value %u\n",
-				port_id,
-				(unsigned)dev_conf->rxmode.max_rx_pkt_len,
+			RTE_ETHDEV_LOG(ERR,
+				"Ethdev port_id=%u max_rx_pkt_len %u < min valid value %u\n",
+				port_id, dev_conf->rxmode.max_rx_pkt_len,
 				(unsigned)ETHER_MIN_LEN);
 			return -EINVAL;
 		}
@@ -1164,36 +1123,42 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 	/* Any requested offloading must be within its device capabilities */
 	if ((local_conf.rxmode.offloads & dev_info.rx_offload_capa) !=
 	     local_conf.rxmode.offloads) {
-		ethdev_log(ERR, "ethdev port_id=%d requested Rx offloads "
-				"0x%" PRIx64 " doesn't match Rx offloads "
-				"capabilities 0x%" PRIx64 " in %s()\n",
-				port_id,
-				local_conf.rxmode.offloads,
-				dev_info.rx_offload_capa,
-				__func__);
-		/* Will return -EINVAL in the next release */
+		RTE_ETHDEV_LOG(ERR,
+			"Ethdev port_id=%u requested Rx offloads 0x%"PRIx64" doesn't match Rx offloads "
+			"capabilities 0x%"PRIx64" in %s()\n",
+			port_id, local_conf.rxmode.offloads,
+			dev_info.rx_offload_capa,
+			__func__);
+		return -EINVAL;
 	}
 	if ((local_conf.txmode.offloads & dev_info.tx_offload_capa) !=
 	     local_conf.txmode.offloads) {
-		ethdev_log(ERR, "ethdev port_id=%d requested Tx offloads "
-				"0x%" PRIx64 " doesn't match Tx offloads "
-				"capabilities 0x%" PRIx64 " in %s()\n",
-				port_id,
-				local_conf.txmode.offloads,
-				dev_info.tx_offload_capa,
-				__func__);
-		/* Will return -EINVAL in the next release */
+		RTE_ETHDEV_LOG(ERR,
+			"Ethdev port_id=%u requested Tx offloads 0x%"PRIx64" doesn't match Tx offloads "
+			"capabilities 0x%"PRIx64" in %s()\n",
+			port_id, local_conf.txmode.offloads,
+			dev_info.tx_offload_capa,
+			__func__);
+		return -EINVAL;
+	}
+
+	if ((local_conf.rxmode.offloads & DEV_RX_OFFLOAD_CRC_STRIP) &&
+			(local_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC)) {
+		RTE_ETHDEV_LOG(ERR,
+			"Port id=%u not allowed to set both CRC STRIP and KEEP CRC offload flags\n",
+			port_id);
+		return -EINVAL;
 	}
 
 	/* Check that device supports requested rss hash functions. */
 	if ((dev_info.flow_type_rss_offloads |
 	     dev_conf->rx_adv_conf.rss_conf.rss_hf) !=
 	    dev_info.flow_type_rss_offloads) {
-		RTE_PMD_DEBUG_TRACE("ethdev port_id=%d invalid rss_hf: "
-				    "0x%"PRIx64", valid value: 0x%"PRIx64"\n",
-				    port_id,
-				    dev_conf->rx_adv_conf.rss_conf.rss_hf,
-				    dev_info.flow_type_rss_offloads);
+		RTE_ETHDEV_LOG(ERR,
+			"Ethdev port_id=%u invalid rss_hf: 0x%"PRIx64", valid value: 0x%"PRIx64"\n",
+			port_id, dev_conf->rx_adv_conf.rss_conf.rss_hf,
+			dev_info.flow_type_rss_offloads);
+		return -EINVAL;
 	}
 
 	/*
@@ -1201,23 +1166,25 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 	 */
 	diag = rte_eth_dev_rx_queue_config(dev, nb_rx_q);
 	if (diag != 0) {
-		RTE_PMD_DEBUG_TRACE("port%d rte_eth_dev_rx_queue_config = %d\n",
-				port_id, diag);
+		RTE_ETHDEV_LOG(ERR,
+			"Port%u rte_eth_dev_rx_queue_config = %d\n",
+			port_id, diag);
 		return diag;
 	}
 
 	diag = rte_eth_dev_tx_queue_config(dev, nb_tx_q);
 	if (diag != 0) {
-		RTE_PMD_DEBUG_TRACE("port%d rte_eth_dev_tx_queue_config = %d\n",
-				port_id, diag);
+		RTE_ETHDEV_LOG(ERR,
+			"Port%u rte_eth_dev_tx_queue_config = %d\n",
+			port_id, diag);
 		rte_eth_dev_rx_queue_config(dev, 0);
 		return diag;
 	}
 
 	diag = (*dev->dev_ops->dev_configure)(dev);
 	if (diag != 0) {
-		RTE_PMD_DEBUG_TRACE("port%d dev_configure = %d\n",
-				port_id, diag);
+		RTE_ETHDEV_LOG(ERR, "Port%u dev_configure = %d\n",
+			port_id, diag);
 		rte_eth_dev_rx_queue_config(dev, 0);
 		rte_eth_dev_tx_queue_config(dev, 0);
 		return eth_err(port_id, diag);
@@ -1226,8 +1193,8 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 	/* Initialize Rx profiling if enabled at compilation time. */
 	diag = __rte_eth_profile_rx_init(port_id, dev);
 	if (diag != 0) {
-		RTE_PMD_DEBUG_TRACE("port%d __rte_eth_profile_rx_init = %d\n",
-				port_id, diag);
+		RTE_ETHDEV_LOG(ERR, "Port%u __rte_eth_profile_rx_init = %d\n",
+			port_id, diag);
 		rte_eth_dev_rx_queue_config(dev, 0);
 		rte_eth_dev_tx_queue_config(dev, 0);
 		return eth_err(port_id, diag);
@@ -1240,8 +1207,7 @@ void
 _rte_eth_dev_reset(struct rte_eth_dev *dev)
 {
 	if (dev->data->dev_started) {
-		RTE_PMD_DEBUG_TRACE(
-			"port %d must be stopped to allow reset\n",
+		RTE_ETHDEV_LOG(ERR, "Port %u must be stopped to allow reset\n",
 			dev->data->port_id);
 		return;
 	}
@@ -1320,8 +1286,8 @@ rte_eth_dev_start(uint16_t port_id)
 	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_start, -ENOTSUP);
 
 	if (dev->data->dev_started != 0) {
-		RTE_PMD_DEBUG_TRACE("Device with port_id=%" PRIu16
-			" already started\n",
+		RTE_ETHDEV_LOG(INFO,
+			"Device with port_id=%"PRIu16" already started\n",
 			port_id);
 		return 0;
 	}
@@ -1352,8 +1318,8 @@ rte_eth_dev_stop(uint16_t port_id)
 	RTE_FUNC_PTR_OR_RET(*dev->dev_ops->dev_stop);
 
 	if (dev->data->dev_started == 0) {
-		RTE_PMD_DEBUG_TRACE("Device with port_id=%" PRIu16
-			" already stopped\n",
+		RTE_ETHDEV_LOG(INFO,
+			"Device with port_id=%"PRIu16" already stopped\n",
 			port_id);
 		return;
 	}
@@ -1465,7 +1431,7 @@ rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 
 	dev = &rte_eth_devices[port_id];
 	if (rx_queue_id >= dev->data->nb_rx_queues) {
-		RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", rx_queue_id);
+		RTE_ETHDEV_LOG(ERR, "Invalid RX queue_id=%u\n", rx_queue_id);
 		return -EINVAL;
 	}
 
@@ -1479,23 +1445,20 @@ rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 	 */
 	rte_eth_dev_info_get(port_id, &dev_info);
 	if (mp->private_data_size < sizeof(struct rte_pktmbuf_pool_private)) {
-		RTE_PMD_DEBUG_TRACE("%s private_data_size %d < %d\n",
-				mp->name, (int) mp->private_data_size,
-				(int) sizeof(struct rte_pktmbuf_pool_private));
+		RTE_ETHDEV_LOG(ERR, "%s private_data_size %d < %d\n",
+			mp->name, (int)mp->private_data_size,
+			(int)sizeof(struct rte_pktmbuf_pool_private));
 		return -ENOSPC;
 	}
 	mbp_buf_size = rte_pktmbuf_data_room_size(mp);
 
 	if ((mbp_buf_size - RTE_PKTMBUF_HEADROOM) < dev_info.min_rx_bufsize) {
-		RTE_PMD_DEBUG_TRACE("%s mbuf_data_room_size %d < %d "
-				"(RTE_PKTMBUF_HEADROOM=%d + min_rx_bufsize(dev)"
-				"=%d)\n",
-				mp->name,
-				(int)mbp_buf_size,
-				(int)(RTE_PKTMBUF_HEADROOM +
-				      dev_info.min_rx_bufsize),
-				(int)RTE_PKTMBUF_HEADROOM,
-				(int)dev_info.min_rx_bufsize);
+		RTE_ETHDEV_LOG(ERR,
+			"%s mbuf_data_room_size %d < %d (RTE_PKTMBUF_HEADROOM=%d + min_rx_bufsize(dev)=%d)\n",
+			mp->name, (int)mbp_buf_size,
+			(int)(RTE_PKTMBUF_HEADROOM + dev_info.min_rx_bufsize),
+			(int)RTE_PKTMBUF_HEADROOM,
+			(int)dev_info.min_rx_bufsize);
 		return -EINVAL;
 	}
 
@@ -1511,10 +1474,9 @@ rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 			nb_rx_desc < dev_info.rx_desc_lim.nb_min ||
 			nb_rx_desc % dev_info.rx_desc_lim.nb_align != 0) {
 
-		RTE_PMD_DEBUG_TRACE("Invalid value for nb_rx_desc(=%hu), "
-			"should be: <= %hu, = %hu, and a product of %hu\n",
-			nb_rx_desc,
-			dev_info.rx_desc_lim.nb_max,
+		RTE_ETHDEV_LOG(ERR,
+			"Invalid value for nb_rx_desc(=%hu), should be: <= %hu, = %hu, and a product of %hu\n",
+			nb_rx_desc, dev_info.rx_desc_lim.nb_max,
 			dev_info.rx_desc_lim.nb_min,
 			dev_info.rx_desc_lim.nb_align);
 		return -EINVAL;
@@ -1542,14 +1504,6 @@ rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 		rx_conf = &dev_info.default_rxconf;
 
 	local_conf = *rx_conf;
-	if (dev->data->dev_conf.rxmode.ignore_offload_bitfield == 0) {
-		/**
-		 * Reflect port offloads to queue offloads in order for
-		 * offloads to not be discarded.
-		 */
-		rte_eth_convert_rx_offload_bitfield(&dev->data->dev_conf.rxmode,
-						    &local_conf.offloads);
-	}
 
 	/*
 	 * If an offloading has already been enabled in
@@ -1571,16 +1525,13 @@ rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 	 */
 	if ((local_conf.offloads & dev_info.rx_queue_offload_capa) !=
 	     local_conf.offloads) {
-		ethdev_log(ERR, "Ethdev port_id=%d rx_queue_id=%d, new "
-				"added offloads 0x%" PRIx64 " must be "
-				"within pre-queue offload capabilities 0x%"
-				PRIx64 " in %s()\n",
-				port_id,
-				rx_queue_id,
-				local_conf.offloads,
-				dev_info.rx_queue_offload_capa,
-				__func__);
-		/* Will return -EINVAL in the next release */
+		RTE_ETHDEV_LOG(ERR,
+			"Ethdev port_id=%d rx_queue_id=%d, new added offloads 0x%"PRIx64" must be "
+			"within pre-queue offload capabilities 0x%"PRIx64" in %s()\n",
+			port_id, rx_queue_id, local_conf.offloads,
+			dev_info.rx_queue_offload_capa,
+			__func__);
+		return -EINVAL;
 	}
 
 	ret = (*dev->dev_ops->rx_queue_setup)(dev, rx_queue_id, nb_rx_desc,
@@ -1594,55 +1545,6 @@ rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 	return eth_err(port_id, ret);
 }
 
-/**
- * Convert from tx offloads to txq_flags.
- */
-static void
-rte_eth_convert_tx_offload(const uint64_t tx_offloads, uint32_t *txq_flags)
-{
-	uint32_t flags = 0;
-
-	if (!(tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS))
-		flags |= ETH_TXQ_FLAGS_NOMULTSEGS;
-	if (!(tx_offloads & DEV_TX_OFFLOAD_VLAN_INSERT))
-		flags |= ETH_TXQ_FLAGS_NOVLANOFFL;
-	if (!(tx_offloads & DEV_TX_OFFLOAD_SCTP_CKSUM))
-		flags |= ETH_TXQ_FLAGS_NOXSUMSCTP;
-	if (!(tx_offloads & DEV_TX_OFFLOAD_UDP_CKSUM))
-		flags |= ETH_TXQ_FLAGS_NOXSUMUDP;
-	if (!(tx_offloads & DEV_TX_OFFLOAD_TCP_CKSUM))
-		flags |= ETH_TXQ_FLAGS_NOXSUMTCP;
-	if (tx_offloads & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
-		flags |= ETH_TXQ_FLAGS_NOREFCOUNT | ETH_TXQ_FLAGS_NOMULTMEMP;
-
-	*txq_flags = flags;
-}
-
-/**
- * A conversion function from txq_flags API.
- */
-static void
-rte_eth_convert_txq_flags(const uint32_t txq_flags, uint64_t *tx_offloads)
-{
-	uint64_t offloads = 0;
-
-	if (!(txq_flags & ETH_TXQ_FLAGS_NOMULTSEGS))
-		offloads |= DEV_TX_OFFLOAD_MULTI_SEGS;
-	if (!(txq_flags & ETH_TXQ_FLAGS_NOVLANOFFL))
-		offloads |= DEV_TX_OFFLOAD_VLAN_INSERT;
-	if (!(txq_flags & ETH_TXQ_FLAGS_NOXSUMSCTP))
-		offloads |= DEV_TX_OFFLOAD_SCTP_CKSUM;
-	if (!(txq_flags & ETH_TXQ_FLAGS_NOXSUMUDP))
-		offloads |= DEV_TX_OFFLOAD_UDP_CKSUM;
-	if (!(txq_flags & ETH_TXQ_FLAGS_NOXSUMTCP))
-		offloads |= DEV_TX_OFFLOAD_TCP_CKSUM;
-	if ((txq_flags & ETH_TXQ_FLAGS_NOREFCOUNT) &&
-	    (txq_flags & ETH_TXQ_FLAGS_NOMULTMEMP))
-		offloads |= DEV_TX_OFFLOAD_MBUF_FAST_FREE;
-
-	*tx_offloads = offloads;
-}
-
 int
 rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
 		       uint16_t nb_tx_desc, unsigned int socket_id,
@@ -1657,7 +1559,7 @@ rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
 
 	dev = &rte_eth_devices[port_id];
 	if (tx_queue_id >= dev->data->nb_tx_queues) {
-		RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", tx_queue_id);
+		RTE_ETHDEV_LOG(ERR, "Invalid TX queue_id=%u\n", tx_queue_id);
 		return -EINVAL;
 	}
 
@@ -1676,12 +1578,11 @@ rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
 	if (nb_tx_desc > dev_info.tx_desc_lim.nb_max ||
 	    nb_tx_desc < dev_info.tx_desc_lim.nb_min ||
 	    nb_tx_desc % dev_info.tx_desc_lim.nb_align != 0) {
-		RTE_PMD_DEBUG_TRACE("Invalid value for nb_tx_desc(=%hu), "
-				"should be: <= %hu, = %hu, and a product of %hu\n",
-				nb_tx_desc,
-				dev_info.tx_desc_lim.nb_max,
-				dev_info.tx_desc_lim.nb_min,
-				dev_info.tx_desc_lim.nb_align);
+		RTE_ETHDEV_LOG(ERR,
+			"Invalid value for nb_tx_desc(=%hu), should be: <= %hu, = %hu, and a product of %hu\n",
+			nb_tx_desc, dev_info.tx_desc_lim.nb_max,
+			dev_info.tx_desc_lim.nb_min,
+			dev_info.tx_desc_lim.nb_align);
 		return -EINVAL;
 	}
 
@@ -1706,15 +1607,7 @@ rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
 	if (tx_conf == NULL)
 		tx_conf = &dev_info.default_txconf;
 
-	/*
-	 * Convert between the offloads API to enable PMDs to support
-	 * only one of them.
-	 */
 	local_conf = *tx_conf;
-	if (!(tx_conf->txq_flags & ETH_TXQ_FLAGS_IGNORE)) {
-		rte_eth_convert_txq_flags(tx_conf->txq_flags,
-					  &local_conf.offloads);
-	}
 
 	/*
 	 * If an offloading has already been enabled in
@@ -1736,16 +1629,13 @@ rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
 	 */
 	if ((local_conf.offloads & dev_info.tx_queue_offload_capa) !=
 	     local_conf.offloads) {
-		ethdev_log(ERR, "Ethdev port_id=%d tx_queue_id=%d, new "
-				"added offloads 0x%" PRIx64 " must be "
-				"within pre-queue offload capabilities 0x%"
-				PRIx64 " in %s()\n",
-				port_id,
-				tx_queue_id,
-				local_conf.offloads,
-				dev_info.tx_queue_offload_capa,
-				__func__);
-		/* Will return -EINVAL in the next release */
+		RTE_ETHDEV_LOG(ERR,
+			"Ethdev port_id=%d tx_queue_id=%d, new added offloads 0x%"PRIx64" must be "
+			"within pre-queue offload capabilities 0x%"PRIx64" in %s()\n",
+			port_id, tx_queue_id, local_conf.offloads,
+			dev_info.tx_queue_offload_capa,
+			__func__);
+		return -EINVAL;
 	}
 
 	return eth_err(port_id, (*dev->dev_ops->tx_queue_setup)(dev,
@@ -2009,19 +1899,19 @@ rte_eth_xstats_get_id_by_name(uint16_t port_id, const char *xstat_name,
 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
 
 	if (!id) {
-		RTE_PMD_DEBUG_TRACE("Error: id pointer is NULL\n");
+		RTE_ETHDEV_LOG(ERR, "Id pointer is NULL\n");
 		return -ENOMEM;
 	}
 
 	if (!xstat_name) {
-		RTE_PMD_DEBUG_TRACE("Error: xstat_name pointer is NULL\n");
+		RTE_ETHDEV_LOG(ERR, "xstat_name pointer is NULL\n");
 		return -ENOMEM;
 	}
 
 	/* Get count */
 	cnt_xstats = rte_eth_xstats_get_names_by_id(port_id, NULL, 0, NULL);
 	if (cnt_xstats  < 0) {
-		RTE_PMD_DEBUG_TRACE("Error: Cannot get count of xstats\n");
+		RTE_ETHDEV_LOG(ERR, "Cannot get count of xstats\n");
 		return -ENODEV;
 	}
 
@@ -2030,7 +1920,7 @@ rte_eth_xstats_get_id_by_name(uint16_t port_id, const char *xstat_name,
 
 	if (cnt_xstats != rte_eth_xstats_get_names_by_id(
 			port_id, xstats_names, cnt_xstats, NULL)) {
-		RTE_PMD_DEBUG_TRACE("Error: Cannot get xstats lookup\n");
+		RTE_ETHDEV_LOG(ERR, "Cannot get xstats lookup\n");
 		return -1;
 	}
 
@@ -2153,7 +2043,7 @@ rte_eth_xstats_get_names_by_id(uint16_t port_id,
 		sizeof(struct rte_eth_xstat_name));
 
 	if (!xstats_names_copy) {
-		RTE_PMD_DEBUG_TRACE("ERROR: can't allocate memory");
+		RTE_ETHDEV_LOG(ERR, "Can't allocate memory\n");
 		return -ENOMEM;
 	}
 
@@ -2181,7 +2071,7 @@ rte_eth_xstats_get_names_by_id(uint16_t port_id,
 	/* Filter stats */
 	for (i = 0; i < size; i++) {
 		if (ids[i] >= expected_entries) {
-			RTE_PMD_DEBUG_TRACE("ERROR: id value isn't valid\n");
+			RTE_ETHDEV_LOG(ERR, "Id value isn't valid\n");
 			free(xstats_names_copy);
 			return -1;
 		}
@@ -2366,7 +2256,7 @@ rte_eth_xstats_get_by_id(uint16_t port_id, const uint64_t *ids,
 	/* Filter stats */
 	for (i = 0; i < size; i++) {
 		if (ids[i] >= expected_entries) {
-			RTE_PMD_DEBUG_TRACE("ERROR: id value isn't valid\n");
+			RTE_ETHDEV_LOG(ERR, "Id value isn't valid\n");
 			return -1;
 		}
 		values[i] = xstats[ids[i]].value;
@@ -2456,6 +2346,16 @@ set_queue_stats_mapping(uint16_t port_id, uint16_t queue_id, uint8_t stat_idx,
 	dev = &rte_eth_devices[port_id];
 
 	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->queue_stats_mapping_set, -ENOTSUP);
+
+	if (is_rx && (queue_id >= dev->data->nb_rx_queues))
+		return -EINVAL;
+
+	if (!is_rx && (queue_id >= dev->data->nb_tx_queues))
+		return -EINVAL;
+
+	if (stat_idx >= RTE_ETHDEV_QUEUE_STAT_CNTRS)
+		return -EINVAL;
+
 	return (*dev->dev_ops->queue_stats_mapping_set)
 			(dev, queue_id, stat_idx, is_rx);
 }
@@ -2495,7 +2395,6 @@ void
 rte_eth_dev_info_get(uint16_t port_id, struct rte_eth_dev_info *dev_info)
 {
 	struct rte_eth_dev *dev;
-	struct rte_eth_txconf *txconf;
 	const struct rte_eth_desc_lim lim = {
 		.nb_max = UINT16_MAX,
 		.nb_min = 0,
@@ -2517,9 +2416,6 @@ rte_eth_dev_info_get(uint16_t port_id, struct rte_eth_dev_info *dev_info)
 	dev_info->nb_tx_queues = dev->data->nb_tx_queues;
 
 	dev_info->dev_flags = &dev->data->dev_flags;
-	txconf = &dev_info->default_txconf;
-	/* convert offload to txq_flags to support legacy app */
-	rte_eth_convert_tx_offload(txconf->offloads, &txconf->txq_flags);
 }
 
 int
@@ -2598,13 +2494,14 @@ rte_eth_dev_vlan_filter(uint16_t port_id, uint16_t vlan_id, int on)
 	dev = &rte_eth_devices[port_id];
 	if (!(dev->data->dev_conf.rxmode.offloads &
 	      DEV_RX_OFFLOAD_VLAN_FILTER)) {
-		RTE_PMD_DEBUG_TRACE("port %d: vlan-filtering disabled\n", port_id);
+		RTE_ETHDEV_LOG(ERR, "Port %u: vlan-filtering disabled\n",
+			port_id);
 		return -ENOSYS;
 	}
 
 	if (vlan_id > 4095) {
-		RTE_PMD_DEBUG_TRACE("(port_id=%d) invalid vlan_id=%u > 4095\n",
-				port_id, (unsigned) vlan_id);
+		RTE_ETHDEV_LOG(ERR, "Port_id=%u invalid vlan_id=%u > 4095\n",
+			port_id, vlan_id);
 		return -EINVAL;
 	}
 	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->vlan_filter_set, -ENOTSUP);
@@ -2637,7 +2534,7 @@ rte_eth_dev_set_vlan_strip_on_queue(uint16_t port_id, uint16_t rx_queue_id,
 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
 	dev = &rte_eth_devices[port_id];
 	if (rx_queue_id >= dev->data->nb_rx_queues) {
-		RTE_PMD_DEBUG_TRACE("Invalid rx_queue_id=%d\n", port_id);
+		RTE_ETHDEV_LOG(ERR, "Invalid rx_queue_id=%u\n", rx_queue_id);
 		return -EINVAL;
 	}
 
@@ -2786,7 +2683,7 @@ rte_eth_dev_flow_ctrl_set(uint16_t port_id, struct rte_eth_fc_conf *fc_conf)
 
 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
 	if ((fc_conf->send_xon != 0) && (fc_conf->send_xon != 1)) {
-		RTE_PMD_DEBUG_TRACE("Invalid send_xon, only 0/1 allowed\n");
+		RTE_ETHDEV_LOG(ERR, "Invalid send_xon, only 0/1 allowed\n");
 		return -EINVAL;
 	}
 
@@ -2803,7 +2700,7 @@ rte_eth_dev_priority_flow_ctrl_set(uint16_t port_id,
 
 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
 	if (pfc_conf->priority > (ETH_DCB_NUM_USER_PRIORITIES - 1)) {
-		RTE_PMD_DEBUG_TRACE("Invalid priority, only 0-7 allowed\n");
+		RTE_ETHDEV_LOG(ERR, "Invalid priority, only 0-7 allowed\n");
 		return -EINVAL;
 	}
 
@@ -2844,7 +2741,7 @@ rte_eth_check_reta_entry(struct rte_eth_rss_reta_entry64 *reta_conf,
 		return -EINVAL;
 
 	if (max_rxq == 0) {
-		RTE_PMD_DEBUG_TRACE("No receive queue is available\n");
+		RTE_ETHDEV_LOG(ERR, "No receive queue is available\n");
 		return -EINVAL;
 	}
 
@@ -2853,8 +2750,9 @@ rte_eth_check_reta_entry(struct rte_eth_rss_reta_entry64 *reta_conf,
 		shift = i % RTE_RETA_GROUP_SIZE;
 		if ((reta_conf[idx].mask & (1ULL << shift)) &&
 			(reta_conf[idx].reta[shift] >= max_rxq)) {
-			RTE_PMD_DEBUG_TRACE("reta_conf[%u]->reta[%u]: %u exceeds "
-				"the maximum rxq index: %u\n", idx, shift,
+			RTE_ETHDEV_LOG(ERR,
+				"reta_conf[%u]->reta[%u]: %u exceeds the maximum rxq index: %u\n",
+				idx, shift,
 				reta_conf[idx].reta[shift], max_rxq);
 			return -EINVAL;
 		}
@@ -2923,11 +2821,11 @@ rte_eth_dev_rss_hash_update(uint16_t port_id,
 	rte_eth_dev_info_get(port_id, &dev_info);
 	if ((dev_info.flow_type_rss_offloads | rss_conf->rss_hf) !=
 	    dev_info.flow_type_rss_offloads) {
-		RTE_PMD_DEBUG_TRACE("ethdev port_id=%d invalid rss_hf: "
-				    "0x%"PRIx64", valid value: 0x%"PRIx64"\n",
-				    port_id,
-				    rss_conf->rss_hf,
-				    dev_info.flow_type_rss_offloads);
+		RTE_ETHDEV_LOG(ERR,
+			"Ethdev port_id=%u invalid rss_hf: 0x%"PRIx64", valid value: 0x%"PRIx64"\n",
+			port_id, rss_conf->rss_hf,
+			dev_info.flow_type_rss_offloads);
+		return -EINVAL;
 	}
 	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rss_hash_update, -ENOTSUP);
 	return eth_err(port_id, (*dev->dev_ops->rss_hash_update)(dev,
@@ -2955,12 +2853,12 @@ rte_eth_dev_udp_tunnel_port_add(uint16_t port_id,
 
 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
 	if (udp_tunnel == NULL) {
-		RTE_PMD_DEBUG_TRACE("Invalid udp_tunnel parameter\n");
+		RTE_ETHDEV_LOG(ERR, "Invalid udp_tunnel parameter\n");
 		return -EINVAL;
 	}
 
 	if (udp_tunnel->prot_type >= RTE_TUNNEL_TYPE_MAX) {
-		RTE_PMD_DEBUG_TRACE("Invalid tunnel type\n");
+		RTE_ETHDEV_LOG(ERR, "Invalid tunnel type\n");
 		return -EINVAL;
 	}
 
@@ -2980,12 +2878,12 @@ rte_eth_dev_udp_tunnel_port_delete(uint16_t port_id,
 	dev = &rte_eth_devices[port_id];
 
 	if (udp_tunnel == NULL) {
-		RTE_PMD_DEBUG_TRACE("Invalid udp_tunnel parameter\n");
+		RTE_ETHDEV_LOG(ERR, "Invalid udp_tunnel parameter\n");
 		return -EINVAL;
 	}
 
 	if (udp_tunnel->prot_type >= RTE_TUNNEL_TYPE_MAX) {
-		RTE_PMD_DEBUG_TRACE("Invalid tunnel type\n");
+		RTE_ETHDEV_LOG(ERR, "Invalid tunnel type\n");
 		return -EINVAL;
 	}
 
@@ -3053,12 +2951,12 @@ rte_eth_dev_mac_addr_add(uint16_t port_id, struct ether_addr *addr,
 	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->mac_addr_add, -ENOTSUP);
 
 	if (is_zero_ether_addr(addr)) {
-		RTE_PMD_DEBUG_TRACE("port %d: Cannot add NULL MAC address\n",
+		RTE_ETHDEV_LOG(ERR, "Port %u: Cannot add NULL MAC address\n",
 			port_id);
 		return -EINVAL;
 	}
 	if (pool >= ETH_64_POOLS) {
-		RTE_PMD_DEBUG_TRACE("pool id must be 0-%d\n", ETH_64_POOLS - 1);
+		RTE_ETHDEV_LOG(ERR, "Pool id must be 0-%d\n", ETH_64_POOLS - 1);
 		return -EINVAL;
 	}
 
@@ -3066,7 +2964,7 @@ rte_eth_dev_mac_addr_add(uint16_t port_id, struct ether_addr *addr,
 	if (index < 0) {
 		index = get_mac_addr_index(port_id, &null_mac_addr);
 		if (index < 0) {
-			RTE_PMD_DEBUG_TRACE("port %d: MAC address array full\n",
+			RTE_ETHDEV_LOG(ERR, "Port %u: MAC address array full\n",
 				port_id);
 			return -ENOSPC;
 		}
@@ -3104,7 +3002,9 @@ rte_eth_dev_mac_addr_remove(uint16_t port_id, struct ether_addr *addr)
 
 	index = get_mac_addr_index(port_id, addr);
 	if (index == 0) {
-		RTE_PMD_DEBUG_TRACE("port %d: Cannot remove default MAC address\n", port_id);
+		RTE_ETHDEV_LOG(ERR,
+			"Port %u: Cannot remove default MAC address\n",
+			port_id);
 		return -EADDRINUSE;
 	} else if (index < 0)
 		return 0;  /* Do nothing if address wasn't found */
@@ -3181,7 +3081,7 @@ rte_eth_dev_uc_hash_table_set(uint16_t port_id, struct ether_addr *addr,
 
 	dev = &rte_eth_devices[port_id];
 	if (is_zero_ether_addr(addr)) {
-		RTE_PMD_DEBUG_TRACE("port %d: Cannot add NULL MAC address\n",
+		RTE_ETHDEV_LOG(ERR, "Port %u: Cannot add NULL MAC address\n",
 			port_id);
 		return -EINVAL;
 	}
@@ -3193,15 +3093,16 @@ rte_eth_dev_uc_hash_table_set(uint16_t port_id, struct ether_addr *addr,
 
 	if (index < 0) {
 		if (!on) {
-			RTE_PMD_DEBUG_TRACE("port %d: the MAC address was not "
-				"set in UTA\n", port_id);
+			RTE_ETHDEV_LOG(ERR,
+				"Port %u: the MAC address was not set in UTA\n",
+				port_id);
 			return -EINVAL;
 		}
 
 		index = get_hash_mac_addr_index(port_id, &null_mac_addr);
 		if (index < 0) {
-			RTE_PMD_DEBUG_TRACE("port %d: MAC address array full\n",
-					port_id);
+			RTE_ETHDEV_LOG(ERR, "Port %u: MAC address array full\n",
+				port_id);
 			return -ENOSPC;
 		}
 	}
@@ -3249,14 +3150,15 @@ int rte_eth_set_queue_rate_limit(uint16_t port_id, uint16_t queue_idx,
 	link = dev->data->dev_link;
 
 	if (queue_idx > dev_info.max_tx_queues) {
-		RTE_PMD_DEBUG_TRACE("set queue rate limit:port %d: "
-				"invalid queue id=%d\n", port_id, queue_idx);
+		RTE_ETHDEV_LOG(ERR,
+			"Set queue rate limit:port %u: invalid queue id=%u\n",
+			port_id, queue_idx);
 		return -EINVAL;
 	}
 
 	if (tx_rate > link.link_speed) {
-		RTE_PMD_DEBUG_TRACE("set queue rate limit:invalid tx_rate=%d, "
-				"bigger than link speed= %d\n",
+		RTE_ETHDEV_LOG(ERR,
+			"Set queue rate limit:invalid tx_rate=%u, bigger than link speed= %d\n",
 			tx_rate, link.link_speed);
 		return -EINVAL;
 	}
@@ -3275,26 +3177,28 @@ rte_eth_mirror_rule_set(uint16_t port_id,
 
 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
 	if (mirror_conf->rule_type == 0) {
-		RTE_PMD_DEBUG_TRACE("mirror rule type can not be 0.\n");
+		RTE_ETHDEV_LOG(ERR, "Mirror rule type can not be 0\n");
 		return -EINVAL;
 	}
 
 	if (mirror_conf->dst_pool >= ETH_64_POOLS) {
-		RTE_PMD_DEBUG_TRACE("Invalid dst pool, pool id must be 0-%d\n",
-				ETH_64_POOLS - 1);
+		RTE_ETHDEV_LOG(ERR, "Invalid dst pool, pool id must be 0-%d\n",
+			ETH_64_POOLS - 1);
 		return -EINVAL;
 	}
 
 	if ((mirror_conf->rule_type & (ETH_MIRROR_VIRTUAL_POOL_UP |
 	     ETH_MIRROR_VIRTUAL_POOL_DOWN)) &&
 	    (mirror_conf->pool_mask == 0)) {
-		RTE_PMD_DEBUG_TRACE("Invalid mirror pool, pool mask can not be 0.\n");
+		RTE_ETHDEV_LOG(ERR,
+			"Invalid mirror pool, pool mask can not be 0\n");
 		return -EINVAL;
 	}
 
 	if ((mirror_conf->rule_type & ETH_MIRROR_VLAN) &&
 	    mirror_conf->vlan.vlan_mask == 0) {
-		RTE_PMD_DEBUG_TRACE("Invalid vlan mask, vlan mask can not be 0.\n");
+		RTE_ETHDEV_LOG(ERR,
+			"Invalid vlan mask, vlan mask can not be 0\n");
 		return -EINVAL;
 	}
 
@@ -3341,7 +3245,7 @@ rte_eth_dev_callback_register(uint16_t port_id,
 		return -EINVAL;
 
 	if (!rte_eth_dev_is_valid_port(port_id) && port_id != RTE_ETH_ALL) {
-		ethdev_log(ERR, "Invalid port_id=%d", port_id);
+		RTE_ETHDEV_LOG(ERR, "Invalid port_id=%d\n", port_id);
 		return -EINVAL;
 	}
 
@@ -3404,7 +3308,7 @@ rte_eth_dev_callback_unregister(uint16_t port_id,
 		return -EINVAL;
 
 	if (!rte_eth_dev_is_valid_port(port_id) && port_id != RTE_ETH_ALL) {
-		ethdev_log(ERR, "Invalid port_id=%d", port_id);
+		RTE_ETHDEV_LOG(ERR, "Invalid port_id=%d\n", port_id);
 		return -EINVAL;
 	}
 
@@ -3498,13 +3402,13 @@ rte_eth_dev_rx_intr_ctl(uint16_t port_id, int epfd, int op, void *data)
 	dev = &rte_eth_devices[port_id];
 
 	if (!dev->intr_handle) {
-		RTE_PMD_DEBUG_TRACE("RX Intr handle unset\n");
+		RTE_ETHDEV_LOG(ERR, "RX Intr handle unset\n");
 		return -ENOTSUP;
 	}
 
 	intr_handle = dev->intr_handle;
 	if (!intr_handle->intr_vec) {
-		RTE_PMD_DEBUG_TRACE("RX Intr vector unset\n");
+		RTE_ETHDEV_LOG(ERR, "RX Intr vector unset\n");
 		return -EPERM;
 	}
 
@@ -3512,9 +3416,9 @@ rte_eth_dev_rx_intr_ctl(uint16_t port_id, int epfd, int op, void *data)
 		vec = intr_handle->intr_vec[qid];
 		rc = rte_intr_rx_ctl(intr_handle, epfd, op, vec, data);
 		if (rc && rc != -EEXIST) {
-			RTE_PMD_DEBUG_TRACE("p %u q %u rx ctl error"
-					" op %d epfd %d vec %u\n",
-					port_id, qid, op, epfd, vec);
+			RTE_ETHDEV_LOG(ERR,
+				"p %u q %u rx ctl error op %d epfd %d vec %u\n",
+				port_id, qid, op, epfd, vec);
 		}
 	}
 
@@ -3649,27 +3553,27 @@ rte_eth_dev_rx_intr_ctl_q(uint16_t port_id, uint16_t queue_id,
 
 	dev = &rte_eth_devices[port_id];
 	if (queue_id >= dev->data->nb_rx_queues) {
-		RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%u\n", queue_id);
+		RTE_ETHDEV_LOG(ERR, "Invalid RX queue_id=%u\n", queue_id);
 		return -EINVAL;
 	}
 
 	if (!dev->intr_handle) {
-		RTE_PMD_DEBUG_TRACE("RX Intr handle unset\n");
+		RTE_ETHDEV_LOG(ERR, "RX Intr handle unset\n");
 		return -ENOTSUP;
 	}
 
 	intr_handle = dev->intr_handle;
 	if (!intr_handle->intr_vec) {
-		RTE_PMD_DEBUG_TRACE("RX Intr vector unset\n");
+		RTE_ETHDEV_LOG(ERR, "RX Intr vector unset\n");
 		return -EPERM;
 	}
 
 	vec = intr_handle->intr_vec[queue_id];
 	rc = rte_intr_rx_ctl(intr_handle, epfd, op, vec, data);
 	if (rc && rc != -EEXIST) {
-		RTE_PMD_DEBUG_TRACE("p %u q %u rx ctl error"
-				" op %d epfd %d vec %u\n",
-				port_id, queue_id, op, epfd, vec);
+		RTE_ETHDEV_LOG(ERR,
+			"p %u q %u rx ctl error op %d epfd %d vec %u\n",
+			port_id, queue_id, op, epfd, vec);
 		return rc;
 	}
 
@@ -3936,7 +3840,7 @@ rte_eth_rx_queue_info_get(uint16_t port_id, uint16_t queue_id,
 
 	dev = &rte_eth_devices[port_id];
 	if (queue_id >= dev->data->nb_rx_queues) {
-		RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", queue_id);
+		RTE_ETHDEV_LOG(ERR, "Invalid RX queue_id=%u\n", queue_id);
 		return -EINVAL;
 	}
 
@@ -3952,7 +3856,6 @@ rte_eth_tx_queue_info_get(uint16_t port_id, uint16_t queue_id,
 	struct rte_eth_txq_info *qinfo)
 {
 	struct rte_eth_dev *dev;
-	struct rte_eth_txconf *txconf = &qinfo->conf;
 
 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
 
@@ -3961,7 +3864,7 @@ rte_eth_tx_queue_info_get(uint16_t port_id, uint16_t queue_id,
 
 	dev = &rte_eth_devices[port_id];
 	if (queue_id >= dev->data->nb_tx_queues) {
-		RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", queue_id);
+		RTE_ETHDEV_LOG(ERR, "Invalid TX queue_id=%u\n", queue_id);
 		return -EINVAL;
 	}
 
@@ -3969,8 +3872,6 @@ rte_eth_tx_queue_info_get(uint16_t port_id, uint16_t queue_id,
 
 	memset(qinfo, 0, sizeof(*qinfo));
 	dev->dev_ops->txq_info_get(dev, queue_id, qinfo);
-	/* convert offload to txq_flags to support legacy app */
-	rte_eth_convert_tx_offload(txconf->offloads, &txconf->txq_flags);
 
 	return 0;
 }
@@ -4178,12 +4079,12 @@ rte_eth_dev_l2_tunnel_eth_type_conf(uint16_t port_id,
 
 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
 	if (l2_tunnel == NULL) {
-		RTE_PMD_DEBUG_TRACE("Invalid l2_tunnel parameter\n");
+		RTE_ETHDEV_LOG(ERR, "Invalid l2_tunnel parameter\n");
 		return -EINVAL;
 	}
 
 	if (l2_tunnel->l2_tunnel_type >= RTE_TUNNEL_TYPE_MAX) {
-		RTE_PMD_DEBUG_TRACE("Invalid tunnel type\n");
+		RTE_ETHDEV_LOG(ERR, "Invalid tunnel type\n");
 		return -EINVAL;
 	}
 
@@ -4205,17 +4106,17 @@ rte_eth_dev_l2_tunnel_offload_set(uint16_t port_id,
 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
 
 	if (l2_tunnel == NULL) {
-		RTE_PMD_DEBUG_TRACE("Invalid l2_tunnel parameter\n");
+		RTE_ETHDEV_LOG(ERR, "Invalid l2_tunnel parameter\n");
 		return -EINVAL;
 	}
 
 	if (l2_tunnel->l2_tunnel_type >= RTE_TUNNEL_TYPE_MAX) {
-		RTE_PMD_DEBUG_TRACE("Invalid tunnel type.\n");
+		RTE_ETHDEV_LOG(ERR, "Invalid tunnel type\n");
 		return -EINVAL;
 	}
 
 	if (mask == 0) {
-		RTE_PMD_DEBUG_TRACE("Mask should have a value.\n");
+		RTE_ETHDEV_LOG(ERR, "Mask should have a value\n");
 		return -EINVAL;
 	}
 
@@ -4516,11 +4417,9 @@ parse_cleanup:
 	return result;
 }
 
-RTE_INIT(ethdev_init_log);
-static void
-ethdev_init_log(void)
+RTE_INIT(ethdev_init_log)
 {
-	ethdev_logtype = rte_log_register("lib.ethdev");
-	if (ethdev_logtype >= 0)
-		rte_log_set_level(ethdev_logtype, RTE_LOG_INFO);
+	rte_eth_dev_logtype = rte_log_register("lib.ethdev");
+	if (rte_eth_dev_logtype >= 0)
+		rte_log_set_level(rte_eth_dev_logtype, RTE_LOG_INFO);
 }
diff --git a/lib/librte_ethdev/rte_ethdev.h b/lib/librte_ethdev/rte_ethdev.h
index 36e3984e..7070e9ab 100644
--- a/lib/librte_ethdev/rte_ethdev.h
+++ b/lib/librte_ethdev/rte_ethdev.h
@@ -159,6 +159,11 @@ extern "C" {
 #include "rte_eth_ctrl.h"
 #include "rte_dev_info.h"
 
+extern int rte_eth_dev_logtype;
+
+#define RTE_ETHDEV_LOG(level, ...) \
+	rte_log(RTE_LOG_ ## level, rte_eth_dev_logtype, "" __VA_ARGS__)
+
 struct rte_mbuf;
 
 /**
@@ -321,7 +326,7 @@ enum rte_eth_tx_mq_mode {
 struct rte_eth_rxmode {
 	/** The multi-queue packet distribution mode to be used, e.g. RSS. */
 	enum rte_eth_rx_mq_mode mq_mode;
-	uint32_t max_rx_pkt_len;  /**< Only used if jumbo_frame enabled. */
+	uint32_t max_rx_pkt_len;  /**< Only used if JUMBO_FRAME enabled. */
 	uint16_t split_hdr_size;  /**< hdr buf size (header_split enabled).*/
 	/**
 	 * Per-port Rx offloads to be set using DEV_RX_OFFLOAD_* flags.
@@ -329,33 +334,6 @@ struct rte_eth_rxmode {
 	 * structure are allowed to be set.
 	 */
 	uint64_t offloads;
-	__extension__
-	/**
-	 * Below bitfield API is obsolete. Application should
-	 * enable per-port offloads using the offload field
-	 * above.
-	 */
-	uint16_t header_split : 1, /**< Header Split enable. */
-		hw_ip_checksum   : 1, /**< IP/UDP/TCP checksum offload enable. */
-		hw_vlan_filter   : 1, /**< VLAN filter enable. */
-		hw_vlan_strip    : 1, /**< VLAN strip enable. */
-		hw_vlan_extend   : 1, /**< Extended VLAN enable. */
-		jumbo_frame      : 1, /**< Jumbo Frame Receipt enable. */
-		hw_strip_crc     : 1, /**< Enable CRC stripping by hardware. */
-		enable_scatter   : 1, /**< Enable scatter packets rx handler */
-		enable_lro       : 1, /**< Enable LRO */
-		hw_timestamp     : 1, /**< Enable HW timestamp */
-		security	 : 1, /**< Enable rte_security offloads */
-		/**
-		 * When set the offload bitfield should be ignored.
-		 * Instead per-port Rx offloads should be set on offloads
-		 * field above.
-		 * Per-queue offloads shuold be set on rte_eth_rxq_conf
-		 * structure.
-		 * This bit is temporary till rxmode bitfield offloads API will
-		 * be deprecated.
-		 */
-		ignore_offload_bitfield : 1;
 };
 
 /**
@@ -702,28 +680,6 @@ struct rte_eth_rxconf {
 	uint64_t offloads;
 };
 
-#define ETH_TXQ_FLAGS_NOMULTSEGS 0x0001 /**< nb_segs=1 for all mbufs */
-#define ETH_TXQ_FLAGS_NOREFCOUNT 0x0002 /**< refcnt can be ignored */
-#define ETH_TXQ_FLAGS_NOMULTMEMP 0x0004 /**< all bufs come from same mempool */
-#define ETH_TXQ_FLAGS_NOVLANOFFL 0x0100 /**< disable VLAN offload */
-#define ETH_TXQ_FLAGS_NOXSUMSCTP 0x0200 /**< disable SCTP checksum offload */
-#define ETH_TXQ_FLAGS_NOXSUMUDP  0x0400 /**< disable UDP checksum offload */
-#define ETH_TXQ_FLAGS_NOXSUMTCP  0x0800 /**< disable TCP checksum offload */
-#define ETH_TXQ_FLAGS_NOOFFLOADS \
-		(ETH_TXQ_FLAGS_NOVLANOFFL | ETH_TXQ_FLAGS_NOXSUMSCTP | \
-		 ETH_TXQ_FLAGS_NOXSUMUDP  | ETH_TXQ_FLAGS_NOXSUMTCP)
-#define ETH_TXQ_FLAGS_NOXSUMS \
-		(ETH_TXQ_FLAGS_NOXSUMSCTP | ETH_TXQ_FLAGS_NOXSUMUDP | \
-		 ETH_TXQ_FLAGS_NOXSUMTCP)
-/**
- * When set the txq_flags should be ignored,
- * instead per-queue Tx offloads will be set on offloads field
- * located on rte_eth_txq_conf struct.
- * This flag is temporary till the rte_eth_txq_conf.txq_flags
- * API will be deprecated.
- */
-#define ETH_TXQ_FLAGS_IGNORE	0x8000
-
 /**
  * A structure used to configure a TX ring of an Ethernet port.
  */
@@ -733,7 +689,6 @@ struct rte_eth_txconf {
 	uint16_t tx_free_thresh; /**< Start freeing TX buffers if there are
 				      less free descriptors than this value. */
 
-	uint32_t txq_flags; /**< Set flags for the Tx queue */
 	uint8_t tx_deferred_start; /**< Do not start queue with rte_eth_dev_start(). */
 	/**
 	 * Per-queue Tx offloads to be set  using DEV_TX_OFFLOAD_* flags.
@@ -939,6 +894,12 @@ struct rte_eth_conf {
 #define DEV_RX_OFFLOAD_SCATTER		0x00002000
 #define DEV_RX_OFFLOAD_TIMESTAMP	0x00004000
 #define DEV_RX_OFFLOAD_SECURITY         0x00008000
+
+/**
+ * Invalid to set both DEV_RX_OFFLOAD_CRC_STRIP and DEV_RX_OFFLOAD_KEEP_CRC
+ * No DEV_RX_OFFLOAD_CRC_STRIP flag means keep CRC
+ */
+#define DEV_RX_OFFLOAD_KEEP_CRC		0x00010000
 #define DEV_RX_OFFLOAD_CHECKSUM (DEV_RX_OFFLOAD_IPV4_CKSUM | \
 				 DEV_RX_OFFLOAD_UDP_CKSUM | \
 				 DEV_RX_OFFLOAD_TCP_CKSUM)
@@ -1003,8 +964,6 @@ struct rte_eth_conf {
  * mentioned in rte_tx_offload_names in rte_ethdev.c file.
  */
 
-struct rte_pci_device;
-
 /*
  * Fallback default preferred Rx/Tx port parameters.
  * These are used if an application requests default parameters
@@ -1194,14 +1153,14 @@ struct rte_eth_dcb_info {
 /* Macros to check for valid port */
 #define RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, retval) do { \
 	if (!rte_eth_dev_is_valid_port(port_id)) { \
-		RTE_PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id); \
+		RTE_ETHDEV_LOG(ERR, "Invalid port_id=%u\n", port_id); \
 		return retval; \
 	} \
 } while (0)
 
 #define RTE_ETH_VALID_PORTID_OR_RET(port_id) do { \
 	if (!rte_eth_dev_is_valid_port(port_id)) { \
-		RTE_PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id); \
+		RTE_ETHDEV_LOG(ERR, "Invalid port_id=%u\n", port_id); \
 		return; \
 	} \
 } while (0)
@@ -1472,6 +1431,7 @@ uint16_t __rte_experimental rte_eth_dev_count_total(void);
  * @return
  *  0 on success and port_id is filled, negative on error
  */
+__rte_deprecated
 int rte_eth_dev_attach(const char *devargs, uint16_t *port_id);
 
 /**
@@ -1487,6 +1447,7 @@ int rte_eth_dev_attach(const char *devargs, uint16_t *port_id);
  * @return
  *  0 on success and devname is filled, negative on error
  */
+__rte_deprecated
 int rte_eth_dev_detach(uint16_t port_id, char *devname);
 
 /**
@@ -1554,9 +1515,11 @@ const char * __rte_experimental rte_eth_dev_tx_offload_name(uint64_t offload);
  *        the [rt]x_offload_capa returned from rte_eth_dev_infos_get().
  *        Any type of device supported offloading set in the input argument
  *        eth_conf->[rt]xmode.offloads to rte_eth_dev_configure() is enabled
- *        on all queues and it can't be disabled in rte_eth_[rt]x_queue_setup().
- *     - the Receive Side Scaling (RSS) configuration when using multiple RX
- *         queues per port.
+ *        on all queues and it can't be disabled in rte_eth_[rt]x_queue_setup()
+ *     -  the Receive Side Scaling (RSS) configuration when using multiple RX
+ *        queues per port. Any RSS hash function set in eth_conf->rss_conf.rss_hf
+ *        must be within the flow_type_rss_offloads provided by drivers via
+ *        rte_eth_dev_infos_get() API.
  *
  *   Embedding all configuration information in a single data structure
  *   is the more flexible method that allows the addition of new features
@@ -1669,12 +1632,6 @@ int rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
  *     The *tx_rs_thresh* value should be less or equal then
  *     *tx_free_thresh* value, and both of them should be less then
  *     *nb_tx_desc* - 3.
- *   - The *txq_flags* member contains flags to pass to the TX queue setup
- *     function to configure the behavior of the TX queue. This should be set
- *     to 0 if no special configuration is required.
- *     This API is obsolete and will be deprecated. Applications
- *     should set it to ETH_TXQ_FLAGS_IGNORE and use
- *     the offloads field below.
  *   - The *offloads* member contains Tx offloads to be enabled.
  *     If an offloading set in tx_conf->offloads
  *     hasn't been set in the input argument eth_conf->txmode.offloads
@@ -2003,6 +1960,15 @@ int rte_eth_stats_reset(uint16_t port_id);
 /**
  * Retrieve names of extended statistics of an Ethernet device.
  *
+ * There is an assumption that 'xstat_names' and 'xstats' arrays are matched
+ * by array index:
+ *  xstats_names[i].name => xstats[i].value
+ *
+ * And the array index is same with id field of 'struct rte_eth_xstat':
+ *  xstats[i].id == i
+ *
+ * This assumption makes key-value pair matching less flexible but simpler.
+ *
  * @param port_id
  *   The port identifier of the Ethernet device.
  * @param xstats_names
@@ -2027,13 +1993,20 @@ int rte_eth_xstats_get_names(uint16_t port_id,
 /**
  * Retrieve extended statistics of an Ethernet device.
  *
+ * There is an assumption that 'xstat_names' and 'xstats' arrays are matched
+ * by array index:
+ *  xstats_names[i].name => xstats[i].value
+ *
+ * And the array index is same with id field of 'struct rte_eth_xstat':
+ *  xstats[i].id == i
+ *
+ * This assumption makes key-value pair matching less flexible but simpler.
+ *
  * @param port_id
  *   The port identifier of the Ethernet device.
  * @param xstats
  *   A pointer to a table of structure of type *rte_eth_xstat*
- *   to be filled with device statistics ids and values: id is the
- *   index of the name string in xstats_names (see rte_eth_xstats_get_names()),
- *   and value is the statistic counter.
+ *   to be filled with device statistics ids and values.
  *   This parameter can be set to NULL if n is 0.
  * @param n
  *   The size of the xstats array (number of elements).
@@ -2144,7 +2117,7 @@ void rte_eth_xstats_reset(uint16_t port_id);
  * @param stat_idx
  *   The per-queue packet statistics functionality number that the transmit
  *   queue is to be assigned.
- *   The value must be in the range [0, RTE_MAX_ETHPORT_QUEUE_STATS_MAPS - 1].
+ *   The value must be in the range [0, RTE_ETHDEV_QUEUE_STAT_CNTRS - 1].
  * @return
  *   Zero if successful. Non-zero otherwise.
  */
@@ -2164,7 +2137,7 @@ int rte_eth_dev_set_tx_queue_stats_mapping(uint16_t port_id,
  * @param stat_idx
  *   The per-queue packet statistics functionality number that the receive
  *   queue is to be assigned.
- *   The value must be in the range [0, RTE_MAX_ETHPORT_QUEUE_STATS_MAPS - 1].
+ *   The value must be in the range [0, RTE_ETHDEV_QUEUE_STAT_CNTRS - 1].
  * @return
  *   Zero if successful. Non-zero otherwise.
  */
@@ -3656,11 +3629,11 @@ rte_eth_dev_l2_tunnel_offload_set(uint16_t port_id,
 				  uint8_t en);
 
 /**
-* Get the port id from pci address or device name
-* Example:
-* - PCIe, 0000:2:00.0
-* - SoC, fsl-gmac0
-* - vdev, net_pcap0
+* Get the port id from device name. The device name should be specified
+* as below:
+* - PCIe address (Domain:Bus:Device.Function), for example- 0000:2:00.0
+* - SoC device name, for example- fsl-gmac0
+* - vdev dpdk name, for example- net_[pcap0|null0|tap0]
 *
 * @param name
 *  pci address or name of the device
@@ -3674,11 +3647,10 @@ int
 rte_eth_dev_get_port_by_name(const char *name, uint16_t *port_id);
 
 /**
-* Get the device name from port id
-* Example:
-* - PCIe Bus:Domain:Function, 0000:02:00.0
-* - SoC device name, fsl-gmac0
-* - vdev dpdk name, net_[pcap0|null0|tun0|tap0]
+* Get the device name from port id. The device name is specified as below:
+* - PCIe address (Domain:Bus:Device.Function), for example- 0000:02:00.0
+* - SoC device name, for example- fsl-gmac0
+* - vdev dpdk name, for example- net_[pcap0|null0|tun0|tap0]
 *
 * @param port_id
 *   Port identifier of the device.
@@ -3837,7 +3809,7 @@ rte_eth_rx_burst(uint16_t port_id, uint16_t queue_id,
 	RTE_FUNC_PTR_OR_ERR_RET(*dev->rx_pkt_burst, 0);
 
 	if (queue_id >= dev->data->nb_rx_queues) {
-		RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", queue_id);
+		RTE_ETHDEV_LOG(ERR, "Invalid RX queue_id=%u\n", queue_id);
 		return 0;
 	}
 #endif
@@ -4070,7 +4042,7 @@ static inline int rte_eth_tx_descriptor_status(uint16_t port_id,
  *
  * If the PMD is DEV_TX_OFFLOAD_MT_LOCKFREE capable, multiple threads can
  * invoke this function concurrently on the same tx queue without SW lock.
- * @see rte_eth_dev_info_get, struct rte_eth_txconf::txq_flags
+ * @see rte_eth_dev_info_get, struct rte_eth_txconf::offloads
  *
  * @see rte_eth_tx_prepare to perform some prior checks or adjustments
  * for offloads.
@@ -4103,7 +4075,7 @@ rte_eth_tx_burst(uint16_t port_id, uint16_t queue_id,
 	RTE_FUNC_PTR_OR_ERR_RET(*dev->tx_pkt_burst, 0);
 
 	if (queue_id >= dev->data->nb_tx_queues) {
-		RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", queue_id);
+		RTE_ETHDEV_LOG(ERR, "Invalid TX queue_id=%u\n", queue_id);
 		return 0;
 	}
 #endif
@@ -4189,7 +4161,7 @@ rte_eth_tx_prepare(uint16_t port_id, uint16_t queue_id,
 
 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
 	if (!rte_eth_dev_is_valid_port(port_id)) {
-		RTE_PMD_DEBUG_TRACE("Invalid TX port_id=%d\n", port_id);
+		RTE_ETHDEV_LOG(ERR, "Invalid TX port_id=%u\n", port_id);
 		rte_errno = -EINVAL;
 		return 0;
 	}
@@ -4199,7 +4171,7 @@ rte_eth_tx_prepare(uint16_t port_id, uint16_t queue_id,
 
 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
 	if (queue_id >= dev->data->nb_tx_queues) {
-		RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", queue_id);
+		RTE_ETHDEV_LOG(ERR, "Invalid TX queue_id=%u\n", queue_id);
 		rte_errno = -EINVAL;
 		return 0;
 	}
diff --git a/lib/librte_ethdev/rte_ethdev_driver.h b/lib/librte_ethdev/rte_ethdev_driver.h
index c9c825e3..c6d9bc1a 100644
--- a/lib/librte_ethdev/rte_ethdev_driver.h
+++ b/lib/librte_ethdev/rte_ethdev_driver.h
@@ -38,7 +38,6 @@ struct rte_eth_dev *rte_eth_dev_allocated(const char *name);
  * to that slot for the driver to use.
  *
  * @param	name	Unique identifier name for each Ethernet device
- * @param	type	Device type of this Ethernet device
  * @return
  *   - Slot in the rte_dev_devices array for a new device;
  */
@@ -325,6 +324,32 @@ typedef int (*ethdev_uninit_t)(struct rte_eth_dev *ethdev);
 int __rte_experimental
 rte_eth_dev_destroy(struct rte_eth_dev *ethdev, ethdev_uninit_t ethdev_uninit);
 
+/**
+ * PMD helper function to check if keeping CRC is requested
+ *
+ * @note
+ * When CRC_STRIP offload flag is removed and default behavior switch to
+ * strip CRC, as planned, this helper function is not that useful and will be
+ * removed. In PMDs this function will be replaced with check:
+ *   if (offloads & DEV_RX_OFFLOAD_KEEP_CRC)
+ *
+ * @param rx_offloads
+ *   offload bits to be applied
+ *
+ * @return
+ *   Return positive if keeping CRC is requested,
+ *   zero if stripping CRC is requested
+ */
+static inline int
+rte_eth_dev_must_keep_crc(uint64_t rx_offloads)
+{
+	if (rx_offloads & DEV_RX_OFFLOAD_CRC_STRIP)
+		return 0;
+
+	/* no KEEP_CRC or CRC_STRIP offload flags means keep CRC */
+	return 1;
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_ethdev/rte_ethdev_pci.h b/lib/librte_ethdev/rte_ethdev_pci.h
index 2cfd3727..f652596f 100644
--- a/lib/librte_ethdev/rte_ethdev_pci.h
+++ b/lib/librte_ethdev/rte_ethdev_pci.h
@@ -53,8 +53,8 @@ rte_eth_copy_pci_info(struct rte_eth_dev *eth_dev,
 	struct rte_pci_device *pci_dev)
 {
 	if ((eth_dev == NULL) || (pci_dev == NULL)) {
-		RTE_PMD_DEBUG_TRACE("NULL pointer eth_dev=%p pci_dev=%p\n",
-				eth_dev, pci_dev);
+		RTE_ETHDEV_LOG(ERR, "NULL pointer eth_dev=%p pci_dev=%p",
+			(void *)eth_dev, (void *)pci_dev);
 		return;
 	}
 
diff --git a/lib/librte_ethdev/rte_ethdev_version.map b/lib/librte_ethdev/rte_ethdev_version.map
index 40cf42b8..38f117f0 100644
--- a/lib/librte_ethdev/rte_ethdev_version.map
+++ b/lib/librte_ethdev/rte_ethdev_version.map
@@ -213,6 +213,13 @@ DPDK_18.05 {
 
 } DPDK_18.02;
 
+DPDK_18.08 {
+	global:
+
+	rte_eth_dev_logtype;
+
+} DPDK_18.05;
+
 EXPERIMENTAL {
 	global:
 
@@ -232,6 +239,7 @@ EXPERIMENTAL {
 	rte_eth_dev_tx_offload_name;
 	rte_eth_switch_domain_alloc;
 	rte_eth_switch_domain_free;
+	rte_flow_expand_rss;
 	rte_mtr_capabilities_get;
 	rte_mtr_create;
 	rte_mtr_destroy;
diff --git a/lib/librte_ethdev/rte_flow.c b/lib/librte_ethdev/rte_flow.c
index b2afba08..cff4b520 100644
--- a/lib/librte_ethdev/rte_flow.c
+++ b/lib/librte_ethdev/rte_flow.c
@@ -84,7 +84,7 @@ static const struct rte_flow_desc_data rte_flow_desc_action[] = {
 	MK_FLOW_ACTION(FLAG, 0),
 	MK_FLOW_ACTION(QUEUE, sizeof(struct rte_flow_action_queue)),
 	MK_FLOW_ACTION(DROP, 0),
-	MK_FLOW_ACTION(COUNT, 0),
+	MK_FLOW_ACTION(COUNT, sizeof(struct rte_flow_action_count)),
 	MK_FLOW_ACTION(RSS, sizeof(struct rte_flow_action_rss)),
 	MK_FLOW_ACTION(PF, 0),
 	MK_FLOW_ACTION(VF, sizeof(struct rte_flow_action_vf)),
@@ -526,3 +526,110 @@ store:
 	}
 	return 0;
 }
+
+/**
+ * Expand RSS flows into several possible flows according to the RSS hash
+ * fields requested and the driver capabilities.
+ */
+int __rte_experimental
+rte_flow_expand_rss(struct rte_flow_expand_rss *buf, size_t size,
+		    const struct rte_flow_item *pattern, uint64_t types,
+		    const struct rte_flow_expand_node graph[],
+		    int graph_root_index)
+{
+	const int elt_n = 8;
+	const struct rte_flow_item *item;
+	const struct rte_flow_expand_node *node = &graph[graph_root_index];
+	const int *next_node;
+	const int *stack[elt_n];
+	int stack_pos = 0;
+	struct rte_flow_item flow_items[elt_n];
+	unsigned int i;
+	size_t lsize;
+	size_t user_pattern_size = 0;
+	void *addr = NULL;
+
+	lsize = offsetof(struct rte_flow_expand_rss, entry) +
+		elt_n * sizeof(buf->entry[0]);
+	if (lsize <= size) {
+		buf->entry[0].priority = 0;
+		buf->entry[0].pattern = (void *)&buf->entry[elt_n];
+		buf->entries = 0;
+		addr = buf->entry[0].pattern;
+	}
+	for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
+		const struct rte_flow_expand_node *next = NULL;
+
+		for (i = 0; node->next && node->next[i]; ++i) {
+			next = &graph[node->next[i]];
+			if (next->type == item->type)
+				break;
+		}
+		if (next)
+			node = next;
+		user_pattern_size += sizeof(*item);
+	}
+	user_pattern_size += sizeof(*item); /* Handle END item. */
+	lsize += user_pattern_size;
+	/* Copy the user pattern in the first entry of the buffer. */
+	if (lsize <= size) {
+		rte_memcpy(addr, pattern, user_pattern_size);
+		addr = (void *)(((uintptr_t)addr) + user_pattern_size);
+		buf->entries = 1;
+	}
+	/* Start expanding. */
+	memset(flow_items, 0, sizeof(flow_items));
+	user_pattern_size -= sizeof(*item);
+	next_node = node->next;
+	stack[stack_pos] = next_node;
+	node = next_node ? &graph[*next_node] : NULL;
+	while (node) {
+		flow_items[stack_pos].type = node->type;
+		if (node->rss_types & types) {
+			/*
+			 * compute the number of items to copy from the
+			 * expansion and copy it.
+			 * When the stack_pos is 0, there are 1 element in it,
+			 * plus the addition END item.
+			 */
+			int elt = stack_pos + 2;
+
+			flow_items[stack_pos + 1].type = RTE_FLOW_ITEM_TYPE_END;
+			lsize += elt * sizeof(*item) + user_pattern_size;
+			if (lsize <= size) {
+				size_t n = elt * sizeof(*item);
+
+				buf->entry[buf->entries].priority =
+					stack_pos + 1;
+				buf->entry[buf->entries].pattern = addr;
+				buf->entries++;
+				rte_memcpy(addr, buf->entry[0].pattern,
+					   user_pattern_size);
+				addr = (void *)(((uintptr_t)addr) +
+						user_pattern_size);
+				rte_memcpy(addr, flow_items, n);
+				addr = (void *)(((uintptr_t)addr) + n);
+			}
+		}
+		/* Go deeper. */
+		if (node->next) {
+			next_node = node->next;
+			if (stack_pos++ == elt_n) {
+				rte_errno = E2BIG;
+				return -rte_errno;
+			}
+			stack[stack_pos] = next_node;
+		} else if (*(next_node + 1)) {
+			/* Follow up with the next possibility. */
+			++next_node;
+		} else {
+			/* Move to the next path. */
+			if (stack_pos)
+				next_node = stack[--stack_pos];
+			next_node++;
+			stack[stack_pos] = next_node;
+		}
+		node = *next_node ? &graph[*next_node] : NULL;
+	};
+	return lsize;
+}
diff --git a/lib/librte_ethdev/rte_flow_driver.h b/lib/librte_ethdev/rte_flow_driver.h
index 1c90c600..688f7230 100644
--- a/lib/librte_ethdev/rte_flow_driver.h
+++ b/lib/librte_ethdev/rte_flow_driver.h
@@ -114,6 +114,69 @@ struct rte_flow_ops {
 const struct rte_flow_ops *
 rte_flow_ops_get(uint16_t port_id, struct rte_flow_error *error);
 
+/** Helper macro to build input graph for rte_flow_expand_rss(). */
+#define RTE_FLOW_EXPAND_RSS_NEXT(...) \
+	(const int []){ \
+		__VA_ARGS__, 0, \
+	}
+
+/** Node object of input graph for rte_flow_expand_rss(). */
+struct rte_flow_expand_node {
+	const int *const next;
+	/**<
+	 * List of next node indexes. Index 0 is interpreted as a terminator.
+	 */
+	const enum rte_flow_item_type type;
+	/**< Pattern item type of current node. */
+	uint64_t rss_types;
+	/**<
+	 * RSS types bit-field associated with this node
+	 * (see ETH_RSS_* definitions).
+	 */
+};
+
+/** Object returned by rte_flow_expand_rss(). */
+struct rte_flow_expand_rss {
+	uint32_t entries;
+	/**< Number of entries @p patterns and @p priorities. */
+	struct {
+		struct rte_flow_item *pattern; /**< Expanded pattern array. */
+		uint32_t priority; /**< Priority offset for each expansion. */
+	} entry[];
+};
+
+/**
+ * Expand RSS flows into several possible flows according to the RSS hash
+ * fields requested and the driver capabilities.
+ *
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * @param[out] buf
+ *   Buffer to store the result expansion.
+ * @param[in] size
+ *   Buffer size in bytes. If 0, @p buf can be NULL.
+ * @param[in] pattern
+ *   User flow pattern.
+ * @param[in] types
+ *   RSS types to expand (see ETH_RSS_* definitions).
+ * @param[in] graph
+ *   Input graph to expand @p pattern according to @p types.
+ * @param[in] graph_root_index
+ *   Index of root node in @p graph, typically 0.
+ *
+ * @return
+ *   A positive value representing the size of @p buf in bytes regardless of
+ *   @p size on success, a negative errno value otherwise and rte_errno is
+ *   set, the following errors are defined:
+ *
+ *   -E2BIG: graph-depth @p graph is too deep.
+ */
+int __rte_experimental
+rte_flow_expand_rss(struct rte_flow_expand_rss *buf, size_t size,
+		    const struct rte_flow_item *pattern, uint64_t types,
+		    const struct rte_flow_expand_node graph[],
+		    int graph_root_index);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_ethdev/rte_tm.h b/lib/librte_ethdev/rte_tm.h
index 72554038..955f02ff 100644
--- a/lib/librte_ethdev/rte_tm.h
+++ b/lib/librte_ethdev/rte_tm.h
@@ -1569,6 +1569,10 @@ rte_tm_hierarchy_commit(uint16_t port_id,
 /**
  * Traffic manager node parent update
  *
+ * This function may be used to move a node and its children to a different
+ * parent.  Additionally, if the new parent is the same as the current parent,
+ * this function will update the priority/weight of an existing node.
+ *
  * Restriction for root node: its parent cannot be changed.
  *
  * This function can only be called after the rte_tm_hierarchy_commit()
diff --git a/lib/librte_eventdev/Makefile b/lib/librte_eventdev/Makefile
index b3e25464..47f599a6 100644
--- a/lib/librte_eventdev/Makefile
+++ b/lib/librte_eventdev/Makefile
@@ -8,14 +8,19 @@ include $(RTE_SDK)/mk/rte.vars.mk
 LIB = librte_eventdev.a
 
 # library version
-LIBABIVER := 4
+LIBABIVER := 5
 
 # build flags
 CFLAGS += -DALLOW_EXPERIMENTAL_API
 CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS)
+ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
+CFLAGS += -DLINUX
+else
+CFLAGS += -DBSD
+endif
 LDLIBS += -lrte_eal -lrte_ring -lrte_ethdev -lrte_hash -lrte_mempool -lrte_timer
-LDLIBS += -lrte_mbuf -lrte_cryptodev
+LDLIBS += -lrte_mbuf -lrte_cryptodev -lpthread
 
 # library source files
 SRCS-y += rte_eventdev.c
diff --git a/lib/librte_eventdev/meson.build b/lib/librte_eventdev/meson.build
index bd138bd5..3cbaf298 100644
--- a/lib/librte_eventdev/meson.build
+++ b/lib/librte_eventdev/meson.build
@@ -1,8 +1,15 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
-version = 4
+version = 5
 allow_experimental_apis = true
+
+if host_machine.system() == 'linux'
+	cflags += '-DLINUX'
+else
+	cflags += '-DBSD'
+endif
+
 sources = files('rte_eventdev.c',
 		'rte_event_ring.c',
 		'rte_event_eth_rx_adapter.c',
diff --git a/lib/librte_eventdev/rte_event_crypto_adapter.c b/lib/librte_eventdev/rte_event_crypto_adapter.c
index ba63a87b..11b28ca9 100644
--- a/lib/librte_eventdev/rte_event_crypto_adapter.c
+++ b/lib/librte_eventdev/rte_event_crypto_adapter.c
@@ -342,7 +342,7 @@ eca_enq_to_cryptodev(struct rte_event_crypto_adapter *adapter,
 		if (crypto_op == NULL)
 			continue;
 		if (crypto_op->sess_type == RTE_CRYPTO_OP_WITH_SESSION) {
-			m_data = rte_cryptodev_sym_session_get_private_data(
+			m_data = rte_cryptodev_sym_session_get_user_data(
 					crypto_op->sym->session);
 			if (m_data == NULL) {
 				rte_pktmbuf_free(crypto_op->sym->m_src);
@@ -512,7 +512,7 @@ eca_ops_enqueue_burst(struct rte_event_crypto_adapter *adapter,
 	for (i = 0; i < num; i++) {
 		struct rte_event *ev = &events[nb_ev++];
 		if (ops[i]->sess_type == RTE_CRYPTO_OP_WITH_SESSION) {
-			m_data = rte_cryptodev_sym_session_get_private_data(
+			m_data = rte_cryptodev_sym_session_get_user_data(
 					ops[i]->sym->session);
 		} else if (ops[i]->sess_type == RTE_CRYPTO_OP_SESSIONLESS &&
 				ops[i]->private_data_offset) {
diff --git a/lib/librte_eventdev/rte_event_eth_rx_adapter.c b/lib/librte_eventdev/rte_event_eth_rx_adapter.c
index 6f705095..f5e5a0b5 100644
--- a/lib/librte_eventdev/rte_event_eth_rx_adapter.c
+++ b/lib/librte_eventdev/rte_event_eth_rx_adapter.c
@@ -2,6 +2,11 @@
  * Copyright(c) 2017 Intel Corporation.
  * All rights reserved.
  */
+#if defined(LINUX)
+#include <sys/epoll.h>
+#endif
+#include <unistd.h>
+
 #include <rte_cycles.h>
 #include <rte_common.h>
 #include <rte_dev.h>
@@ -11,6 +16,7 @@
 #include <rte_malloc.h>
 #include <rte_service_component.h>
 #include <rte_thash.h>
+#include <rte_interrupts.h>
 
 #include "rte_eventdev.h"
 #include "rte_eventdev_pmd.h"
@@ -24,6 +30,22 @@
 #define ETH_RX_ADAPTER_MEM_NAME_LEN	32
 
 #define RSS_KEY_SIZE	40
+/* value written to intr thread pipe to signal thread exit */
+#define ETH_BRIDGE_INTR_THREAD_EXIT	1
+/* Sentinel value to detect initialized file handle */
+#define INIT_FD		-1
+
+/*
+ * Used to store port and queue ID of interrupting Rx queue
+ */
+union queue_data {
+	RTE_STD_C11
+	void *ptr;
+	struct {
+		uint16_t port;
+		uint16_t queue;
+	};
+};
 
 /*
  * There is an instance of this struct per polled Rx queue added to the
@@ -75,6 +97,30 @@ struct rte_event_eth_rx_adapter {
 	uint16_t enq_block_count;
 	/* Block start ts */
 	uint64_t rx_enq_block_start_ts;
+	/* epoll fd used to wait for Rx interrupts */
+	int epd;
+	/* Num of interrupt driven interrupt queues */
+	uint32_t num_rx_intr;
+	/* Used to send <dev id, queue id> of interrupting Rx queues from
+	 * the interrupt thread to the Rx thread
+	 */
+	struct rte_ring *intr_ring;
+	/* Rx Queue data (dev id, queue id) for the last non-empty
+	 * queue polled
+	 */
+	union queue_data qd;
+	/* queue_data is valid */
+	int qd_valid;
+	/* Interrupt ring lock, synchronizes Rx thread
+	 * and interrupt thread
+	 */
+	rte_spinlock_t intr_ring_lock;
+	/* event array passed to rte_poll_wait */
+	struct rte_epoll_event *epoll_events;
+	/* Count of interrupt vectors in use */
+	uint32_t num_intr_vec;
+	/* Thread blocked on Rx interrupts */
+	pthread_t rx_intr_thread;
 	/* Configuration callback for rte_service configuration */
 	rte_event_eth_rx_adapter_conf_cb conf_cb;
 	/* Configuration callback argument */
@@ -91,12 +137,20 @@ struct rte_event_eth_rx_adapter {
 	int socket_id;
 	/* Per adapter EAL service */
 	uint32_t service_id;
+	/* Adapter started flag */
+	uint8_t rxa_started;
+	/* Adapter ID */
+	uint8_t id;
 } __rte_cache_aligned;
 
 /* Per eth device */
 struct eth_device_info {
 	struct rte_eth_dev *dev;
 	struct eth_rx_queue_info *rx_queue;
+	/* Rx callback */
+	rte_event_eth_rx_adapter_cb_fn cb_fn;
+	/* Rx callback argument */
+	void *cb_arg;
 	/* Set if ethdev->eventdev packet transfer uses a
 	 * hardware mechanism
 	 */
@@ -107,15 +161,42 @@ struct eth_device_info {
 	 * rx_adapter_stop callback needs to be invoked
 	 */
 	uint8_t dev_rx_started;
-	/* If nb_dev_queues > 0, the start callback will
+	/* Number of queues added for this device */
+	uint16_t nb_dev_queues;
+	/* Number of poll based queues
+	 * If nb_rx_poll > 0, the start callback will
 	 * be invoked if not already invoked
 	 */
-	uint16_t nb_dev_queues;
+	uint16_t nb_rx_poll;
+	/* Number of interrupt based queues
+	 * If nb_rx_intr > 0, the start callback will
+	 * be invoked if not already invoked.
+	 */
+	uint16_t nb_rx_intr;
+	/* Number of queues that use the shared interrupt */
+	uint16_t nb_shared_intr;
+	/* sum(wrr(q)) for all queues within the device
+	 * useful when deleting all device queues
+	 */
+	uint32_t wrr_len;
+	/* Intr based queue index to start polling from, this is used
+	 * if the number of shared interrupts is non-zero
+	 */
+	uint16_t next_q_idx;
+	/* Intr based queue indices */
+	uint16_t *intr_queue;
+	/* device generates per Rx queue interrupt for queue index
+	 * for queue indices < RTE_MAX_RXTX_INTR_VEC_ID - 1
+	 */
+	int multi_intr_cap;
+	/* shared interrupt enabled */
+	int shared_intr_enabled;
 };
 
 /* Per Rx queue */
 struct eth_rx_queue_info {
 	int queue_enabled;	/* True if added */
+	int intr_enabled;
 	uint16_t wt;		/* Polling weight */
 	uint8_t event_queue_id;	/* Event queue to enqueue packets to */
 	uint8_t sched_type;	/* Sched type for events */
@@ -127,30 +208,30 @@ struct eth_rx_queue_info {
 static struct rte_event_eth_rx_adapter **event_eth_rx_adapter;
 
 static inline int
-valid_id(uint8_t id)
+rxa_validate_id(uint8_t id)
 {
 	return id < RTE_EVENT_ETH_RX_ADAPTER_MAX_INSTANCE;
 }
 
 #define RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, retval) do { \
-	if (!valid_id(id)) { \
+	if (!rxa_validate_id(id)) { \
 		RTE_EDEV_LOG_ERR("Invalid eth Rx adapter id = %d\n", id); \
 		return retval; \
 	} \
 } while (0)
 
 static inline int
-sw_rx_adapter_queue_count(struct rte_event_eth_rx_adapter *rx_adapter)
+rxa_sw_adapter_queue_count(struct rte_event_eth_rx_adapter *rx_adapter)
 {
-	return rx_adapter->num_rx_polled;
+	return rx_adapter->num_rx_polled + rx_adapter->num_rx_intr;
 }
 
 /* Greatest common divisor */
-static uint16_t gcd_u16(uint16_t a, uint16_t b)
+static uint16_t rxa_gcd_u16(uint16_t a, uint16_t b)
 {
 	uint16_t r = a % b;
 
-	return r ? gcd_u16(b, r) : b;
+	return r ? rxa_gcd_u16(b, r) : b;
 }
 
 /* Returns the next queue in the polling sequence
@@ -158,7 +239,7 @@ static uint16_t gcd_u16(uint16_t a, uint16_t b)
  * http://kb.linuxvirtualserver.org/wiki/Weighted_Round-Robin_Scheduling
  */
 static int
-wrr_next(struct rte_event_eth_rx_adapter *rx_adapter,
+rxa_wrr_next(struct rte_event_eth_rx_adapter *rx_adapter,
 	 unsigned int n, int *cw,
 	 struct eth_rx_poll_entry *eth_rx_poll, uint16_t max_wt,
 	 uint16_t gcd, int prev)
@@ -186,13 +267,298 @@ wrr_next(struct rte_event_eth_rx_adapter *rx_adapter,
 	}
 }
 
-/* Precalculate WRR polling sequence for all queues in rx_adapter */
+static inline int
+rxa_shared_intr(struct eth_device_info *dev_info,
+	int rx_queue_id)
+{
+	int multi_intr_cap;
+
+	if (dev_info->dev->intr_handle == NULL)
+		return 0;
+
+	multi_intr_cap = rte_intr_cap_multiple(dev_info->dev->intr_handle);
+	return !multi_intr_cap ||
+		rx_queue_id >= RTE_MAX_RXTX_INTR_VEC_ID - 1;
+}
+
+static inline int
+rxa_intr_queue(struct eth_device_info *dev_info,
+	int rx_queue_id)
+{
+	struct eth_rx_queue_info *queue_info;
+
+	queue_info = &dev_info->rx_queue[rx_queue_id];
+	return dev_info->rx_queue &&
+		!dev_info->internal_event_port &&
+		queue_info->queue_enabled && queue_info->wt == 0;
+}
+
+static inline int
+rxa_polled_queue(struct eth_device_info *dev_info,
+	int rx_queue_id)
+{
+	struct eth_rx_queue_info *queue_info;
+
+	queue_info = &dev_info->rx_queue[rx_queue_id];
+	return !dev_info->internal_event_port &&
+		dev_info->rx_queue &&
+		queue_info->queue_enabled && queue_info->wt != 0;
+}
+
+/* Calculate change in number of vectors after Rx queue ID is add/deleted */
 static int
-eth_poll_wrr_calc(struct rte_event_eth_rx_adapter *rx_adapter)
+rxa_nb_intr_vect(struct eth_device_info *dev_info, int rx_queue_id, int add)
+{
+	uint16_t i;
+	int n, s;
+	uint16_t nbq;
+
+	nbq = dev_info->dev->data->nb_rx_queues;
+	n = 0; /* non shared count */
+	s = 0; /* shared count */
+
+	if (rx_queue_id == -1) {
+		for (i = 0; i < nbq; i++) {
+			if (!rxa_shared_intr(dev_info, i))
+				n += add ? !rxa_intr_queue(dev_info, i) :
+					rxa_intr_queue(dev_info, i);
+			else
+				s += add ? !rxa_intr_queue(dev_info, i) :
+					rxa_intr_queue(dev_info, i);
+		}
+
+		if (s > 0) {
+			if ((add && dev_info->nb_shared_intr == 0) ||
+				(!add && dev_info->nb_shared_intr))
+				n += 1;
+		}
+	} else {
+		if (!rxa_shared_intr(dev_info, rx_queue_id))
+			n = add ? !rxa_intr_queue(dev_info, rx_queue_id) :
+				rxa_intr_queue(dev_info, rx_queue_id);
+		else
+			n = add ? !dev_info->nb_shared_intr :
+				dev_info->nb_shared_intr == 1;
+	}
+
+	return add ? n : -n;
+}
+
+/* Calculate nb_rx_intr after deleting interrupt mode rx queues
+ */
+static void
+rxa_calc_nb_post_intr_del(struct rte_event_eth_rx_adapter *rx_adapter,
+			struct eth_device_info *dev_info,
+			int rx_queue_id,
+			uint32_t *nb_rx_intr)
+{
+	uint32_t intr_diff;
+
+	if (rx_queue_id == -1)
+		intr_diff = dev_info->nb_rx_intr;
+	else
+		intr_diff = rxa_intr_queue(dev_info, rx_queue_id);
+
+	*nb_rx_intr = rx_adapter->num_rx_intr - intr_diff;
+}
+
+/* Calculate nb_rx_* after adding interrupt mode rx queues, newly added
+ * interrupt queues could currently be poll mode Rx queues
+ */
+static void
+rxa_calc_nb_post_add_intr(struct rte_event_eth_rx_adapter *rx_adapter,
+			struct eth_device_info *dev_info,
+			int rx_queue_id,
+			uint32_t *nb_rx_poll,
+			uint32_t *nb_rx_intr,
+			uint32_t *nb_wrr)
+{
+	uint32_t intr_diff;
+	uint32_t poll_diff;
+	uint32_t wrr_len_diff;
+
+	if (rx_queue_id == -1) {
+		intr_diff = dev_info->dev->data->nb_rx_queues -
+						dev_info->nb_rx_intr;
+		poll_diff = dev_info->nb_rx_poll;
+		wrr_len_diff = dev_info->wrr_len;
+	} else {
+		intr_diff = !rxa_intr_queue(dev_info, rx_queue_id);
+		poll_diff = rxa_polled_queue(dev_info, rx_queue_id);
+		wrr_len_diff = poll_diff ? dev_info->rx_queue[rx_queue_id].wt :
+					0;
+	}
+
+	*nb_rx_intr = rx_adapter->num_rx_intr + intr_diff;
+	*nb_rx_poll = rx_adapter->num_rx_polled - poll_diff;
+	*nb_wrr = rx_adapter->wrr_len - wrr_len_diff;
+}
+
+/* Calculate size of the eth_rx_poll and wrr_sched arrays
+ * after deleting poll mode rx queues
+ */
+static void
+rxa_calc_nb_post_poll_del(struct rte_event_eth_rx_adapter *rx_adapter,
+			struct eth_device_info *dev_info,
+			int rx_queue_id,
+			uint32_t *nb_rx_poll,
+			uint32_t *nb_wrr)
+{
+	uint32_t poll_diff;
+	uint32_t wrr_len_diff;
+
+	if (rx_queue_id == -1) {
+		poll_diff = dev_info->nb_rx_poll;
+		wrr_len_diff = dev_info->wrr_len;
+	} else {
+		poll_diff = rxa_polled_queue(dev_info, rx_queue_id);
+		wrr_len_diff = poll_diff ? dev_info->rx_queue[rx_queue_id].wt :
+					0;
+	}
+
+	*nb_rx_poll = rx_adapter->num_rx_polled - poll_diff;
+	*nb_wrr = rx_adapter->wrr_len - wrr_len_diff;
+}
+
+/* Calculate nb_rx_* after adding poll mode rx queues
+ */
+static void
+rxa_calc_nb_post_add_poll(struct rte_event_eth_rx_adapter *rx_adapter,
+			struct eth_device_info *dev_info,
+			int rx_queue_id,
+			uint16_t wt,
+			uint32_t *nb_rx_poll,
+			uint32_t *nb_rx_intr,
+			uint32_t *nb_wrr)
+{
+	uint32_t intr_diff;
+	uint32_t poll_diff;
+	uint32_t wrr_len_diff;
+
+	if (rx_queue_id == -1) {
+		intr_diff = dev_info->nb_rx_intr;
+		poll_diff = dev_info->dev->data->nb_rx_queues -
+						dev_info->nb_rx_poll;
+		wrr_len_diff = wt*dev_info->dev->data->nb_rx_queues
+				- dev_info->wrr_len;
+	} else {
+		intr_diff = rxa_intr_queue(dev_info, rx_queue_id);
+		poll_diff = !rxa_polled_queue(dev_info, rx_queue_id);
+		wrr_len_diff = rxa_polled_queue(dev_info, rx_queue_id) ?
+				wt - dev_info->rx_queue[rx_queue_id].wt :
+				wt;
+	}
+
+	*nb_rx_poll = rx_adapter->num_rx_polled + poll_diff;
+	*nb_rx_intr = rx_adapter->num_rx_intr - intr_diff;
+	*nb_wrr = rx_adapter->wrr_len + wrr_len_diff;
+}
+
+/* Calculate nb_rx_* after adding rx_queue_id */
+static void
+rxa_calc_nb_post_add(struct rte_event_eth_rx_adapter *rx_adapter,
+		struct eth_device_info *dev_info,
+		int rx_queue_id,
+		uint16_t wt,
+		uint32_t *nb_rx_poll,
+		uint32_t *nb_rx_intr,
+		uint32_t *nb_wrr)
+{
+	if (wt != 0)
+		rxa_calc_nb_post_add_poll(rx_adapter, dev_info, rx_queue_id,
+					wt, nb_rx_poll, nb_rx_intr, nb_wrr);
+	else
+		rxa_calc_nb_post_add_intr(rx_adapter, dev_info, rx_queue_id,
+					nb_rx_poll, nb_rx_intr, nb_wrr);
+}
+
+/* Calculate nb_rx_* after deleting rx_queue_id */
+static void
+rxa_calc_nb_post_del(struct rte_event_eth_rx_adapter *rx_adapter,
+		struct eth_device_info *dev_info,
+		int rx_queue_id,
+		uint32_t *nb_rx_poll,
+		uint32_t *nb_rx_intr,
+		uint32_t *nb_wrr)
+{
+	rxa_calc_nb_post_poll_del(rx_adapter, dev_info, rx_queue_id, nb_rx_poll,
+				nb_wrr);
+	rxa_calc_nb_post_intr_del(rx_adapter, dev_info, rx_queue_id,
+				nb_rx_intr);
+}
+
+/*
+ * Allocate the rx_poll array
+ */
+static struct eth_rx_poll_entry *
+rxa_alloc_poll(struct rte_event_eth_rx_adapter *rx_adapter,
+	uint32_t num_rx_polled)
+{
+	size_t len;
+
+	len  = RTE_ALIGN(num_rx_polled * sizeof(*rx_adapter->eth_rx_poll),
+							RTE_CACHE_LINE_SIZE);
+	return  rte_zmalloc_socket(rx_adapter->mem_name,
+				len,
+				RTE_CACHE_LINE_SIZE,
+				rx_adapter->socket_id);
+}
+
+/*
+ * Allocate the WRR array
+ */
+static uint32_t *
+rxa_alloc_wrr(struct rte_event_eth_rx_adapter *rx_adapter, int nb_wrr)
+{
+	size_t len;
+
+	len = RTE_ALIGN(nb_wrr * sizeof(*rx_adapter->wrr_sched),
+			RTE_CACHE_LINE_SIZE);
+	return  rte_zmalloc_socket(rx_adapter->mem_name,
+				len,
+				RTE_CACHE_LINE_SIZE,
+				rx_adapter->socket_id);
+}
+
+static int
+rxa_alloc_poll_arrays(struct rte_event_eth_rx_adapter *rx_adapter,
+		uint32_t nb_poll,
+		uint32_t nb_wrr,
+		struct eth_rx_poll_entry **rx_poll,
+		uint32_t **wrr_sched)
+{
+
+	if (nb_poll == 0) {
+		*rx_poll = NULL;
+		*wrr_sched = NULL;
+		return 0;
+	}
+
+	*rx_poll = rxa_alloc_poll(rx_adapter, nb_poll);
+	if (*rx_poll == NULL) {
+		*wrr_sched = NULL;
+		return -ENOMEM;
+	}
+
+	*wrr_sched = rxa_alloc_wrr(rx_adapter, nb_wrr);
+	if (*wrr_sched == NULL) {
+		rte_free(*rx_poll);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+/* Precalculate WRR polling sequence for all queues in rx_adapter */
+static void
+rxa_calc_wrr_sequence(struct rte_event_eth_rx_adapter *rx_adapter,
+		struct eth_rx_poll_entry *rx_poll,
+		uint32_t *rx_wrr)
 {
 	uint16_t d;
 	uint16_t q;
 	unsigned int i;
+	int prev = -1;
+	int cw = -1;
 
 	/* Initialize variables for calculation of wrr schedule */
 	uint16_t max_wrr_pos = 0;
@@ -200,79 +566,52 @@ eth_poll_wrr_calc(struct rte_event_eth_rx_adapter *rx_adapter)
 	uint16_t max_wt = 0;
 	uint16_t gcd = 0;
 
-	struct eth_rx_poll_entry *rx_poll = NULL;
-	uint32_t *rx_wrr = NULL;
+	if (rx_poll == NULL)
+		return;
 
-	if (rx_adapter->num_rx_polled) {
-		size_t len = RTE_ALIGN(rx_adapter->num_rx_polled *
-				sizeof(*rx_adapter->eth_rx_poll),
-				RTE_CACHE_LINE_SIZE);
-		rx_poll = rte_zmalloc_socket(rx_adapter->mem_name,
-					     len,
-					     RTE_CACHE_LINE_SIZE,
-					     rx_adapter->socket_id);
-		if (rx_poll == NULL)
-			return -ENOMEM;
+	/* Generate array of all queues to poll, the size of this
+	 * array is poll_q
+	 */
+	RTE_ETH_FOREACH_DEV(d) {
+		uint16_t nb_rx_queues;
+		struct eth_device_info *dev_info =
+				&rx_adapter->eth_devices[d];
+		nb_rx_queues = dev_info->dev->data->nb_rx_queues;
+		if (dev_info->rx_queue == NULL)
+			continue;
+		if (dev_info->internal_event_port)
+			continue;
+		dev_info->wrr_len = 0;
+		for (q = 0; q < nb_rx_queues; q++) {
+			struct eth_rx_queue_info *queue_info =
+				&dev_info->rx_queue[q];
+			uint16_t wt;
 
-		/* Generate array of all queues to poll, the size of this
-		 * array is poll_q
-		 */
-		RTE_ETH_FOREACH_DEV(d) {
-			uint16_t nb_rx_queues;
-			struct eth_device_info *dev_info =
-					&rx_adapter->eth_devices[d];
-			nb_rx_queues = dev_info->dev->data->nb_rx_queues;
-			if (dev_info->rx_queue == NULL)
+			if (!rxa_polled_queue(dev_info, q))
 				continue;
-			for (q = 0; q < nb_rx_queues; q++) {
-				struct eth_rx_queue_info *queue_info =
-					&dev_info->rx_queue[q];
-				if (queue_info->queue_enabled == 0)
-					continue;
-
-				uint16_t wt = queue_info->wt;
-				rx_poll[poll_q].eth_dev_id = d;
-				rx_poll[poll_q].eth_rx_qid = q;
-				max_wrr_pos += wt;
-				max_wt = RTE_MAX(max_wt, wt);
-				gcd = (gcd) ? gcd_u16(gcd, wt) : wt;
-				poll_q++;
-			}
-		}
-
-		len = RTE_ALIGN(max_wrr_pos * sizeof(*rx_wrr),
-				RTE_CACHE_LINE_SIZE);
-		rx_wrr = rte_zmalloc_socket(rx_adapter->mem_name,
-					    len,
-					    RTE_CACHE_LINE_SIZE,
-					    rx_adapter->socket_id);
-		if (rx_wrr == NULL) {
-			rte_free(rx_poll);
-			return -ENOMEM;
-		}
-
-		/* Generate polling sequence based on weights */
-		int prev = -1;
-		int cw = -1;
-		for (i = 0; i < max_wrr_pos; i++) {
-			rx_wrr[i] = wrr_next(rx_adapter, poll_q, &cw,
-					     rx_poll, max_wt, gcd, prev);
-			prev = rx_wrr[i];
+			wt = queue_info->wt;
+			rx_poll[poll_q].eth_dev_id = d;
+			rx_poll[poll_q].eth_rx_qid = q;
+			max_wrr_pos += wt;
+			dev_info->wrr_len += wt;
+			max_wt = RTE_MAX(max_wt, wt);
+			gcd = (gcd) ? rxa_gcd_u16(gcd, wt) : wt;
+			poll_q++;
 		}
 	}
 
-	rte_free(rx_adapter->eth_rx_poll);
-	rte_free(rx_adapter->wrr_sched);
-
-	rx_adapter->eth_rx_poll = rx_poll;
-	rx_adapter->wrr_sched = rx_wrr;
-	rx_adapter->wrr_len = max_wrr_pos;
-
-	return 0;
+	/* Generate polling sequence based on weights */
+	prev = -1;
+	cw = -1;
+	for (i = 0; i < max_wrr_pos; i++) {
+		rx_wrr[i] = rxa_wrr_next(rx_adapter, poll_q, &cw,
+				     rx_poll, max_wt, gcd, prev);
+		prev = rx_wrr[i];
+	}
 }
 
 static inline void
-mtoip(struct rte_mbuf *m, struct ipv4_hdr **ipv4_hdr,
+rxa_mtoip(struct rte_mbuf *m, struct ipv4_hdr **ipv4_hdr,
 	struct ipv6_hdr **ipv6_hdr)
 {
 	struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
@@ -311,7 +650,7 @@ mtoip(struct rte_mbuf *m, struct ipv4_hdr **ipv4_hdr,
 
 /* Calculate RSS hash for IPv4/6 */
 static inline uint32_t
-do_softrss(struct rte_mbuf *m, const uint8_t *rss_key_be)
+rxa_do_softrss(struct rte_mbuf *m, const uint8_t *rss_key_be)
 {
 	uint32_t input_len;
 	void *tuple;
@@ -320,7 +659,7 @@ do_softrss(struct rte_mbuf *m, const uint8_t *rss_key_be)
 	struct ipv4_hdr *ipv4_hdr;
 	struct ipv6_hdr *ipv6_hdr;
 
-	mtoip(m, &ipv4_hdr, &ipv6_hdr);
+	rxa_mtoip(m, &ipv4_hdr, &ipv6_hdr);
 
 	if (ipv4_hdr) {
 		ipv4_tuple.src_addr = rte_be_to_cpu_32(ipv4_hdr->src_addr);
@@ -339,13 +678,13 @@ do_softrss(struct rte_mbuf *m, const uint8_t *rss_key_be)
 }
 
 static inline int
-rx_enq_blocked(struct rte_event_eth_rx_adapter *rx_adapter)
+rxa_enq_blocked(struct rte_event_eth_rx_adapter *rx_adapter)
 {
 	return !!rx_adapter->enq_block_count;
 }
 
 static inline void
-rx_enq_block_start_ts(struct rte_event_eth_rx_adapter *rx_adapter)
+rxa_enq_block_start_ts(struct rte_event_eth_rx_adapter *rx_adapter)
 {
 	if (rx_adapter->rx_enq_block_start_ts)
 		return;
@@ -358,13 +697,13 @@ rx_enq_block_start_ts(struct rte_event_eth_rx_adapter *rx_adapter)
 }
 
 static inline void
-rx_enq_block_end_ts(struct rte_event_eth_rx_adapter *rx_adapter,
+rxa_enq_block_end_ts(struct rte_event_eth_rx_adapter *rx_adapter,
 		    struct rte_event_eth_rx_adapter_stats *stats)
 {
 	if (unlikely(!stats->rx_enq_start_ts))
 		stats->rx_enq_start_ts = rte_get_tsc_cycles();
 
-	if (likely(!rx_enq_blocked(rx_adapter)))
+	if (likely(!rxa_enq_blocked(rx_adapter)))
 		return;
 
 	rx_adapter->enq_block_count = 0;
@@ -380,8 +719,8 @@ rx_enq_block_end_ts(struct rte_event_eth_rx_adapter *rx_adapter,
  * this function
  */
 static inline void
-buf_event_enqueue(struct rte_event_eth_rx_adapter *rx_adapter,
-		  struct rte_event *ev)
+rxa_buffer_event(struct rte_event_eth_rx_adapter *rx_adapter,
+		struct rte_event *ev)
 {
 	struct rte_eth_event_enqueue_buffer *buf =
 	    &rx_adapter->event_enqueue_buffer;
@@ -390,7 +729,7 @@ buf_event_enqueue(struct rte_event_eth_rx_adapter *rx_adapter,
 
 /* Enqueue buffered events to event device */
 static inline uint16_t
-flush_event_buffer(struct rte_event_eth_rx_adapter *rx_adapter)
+rxa_flush_event_buffer(struct rte_event_eth_rx_adapter *rx_adapter)
 {
 	struct rte_eth_event_enqueue_buffer *buf =
 	    &rx_adapter->event_enqueue_buffer;
@@ -407,8 +746,8 @@ flush_event_buffer(struct rte_event_eth_rx_adapter *rx_adapter)
 		stats->rx_enq_retry++;
 	}
 
-	n ? rx_enq_block_end_ts(rx_adapter, stats) :
-		rx_enq_block_start_ts(rx_adapter);
+	n ? rxa_enq_block_end_ts(rx_adapter, stats) :
+		rxa_enq_block_start_ts(rx_adapter);
 
 	buf->count -= n;
 	stats->rx_enq_count += n;
@@ -417,18 +756,19 @@ flush_event_buffer(struct rte_event_eth_rx_adapter *rx_adapter)
 }
 
 static inline void
-fill_event_buffer(struct rte_event_eth_rx_adapter *rx_adapter,
-	uint8_t dev_id,
-	uint16_t rx_queue_id,
-	struct rte_mbuf **mbufs,
-	uint16_t num)
+rxa_buffer_mbufs(struct rte_event_eth_rx_adapter *rx_adapter,
+		uint16_t eth_dev_id,
+		uint16_t rx_queue_id,
+		struct rte_mbuf **mbufs,
+		uint16_t num)
 {
 	uint32_t i;
-	struct eth_device_info *eth_device_info =
-					&rx_adapter->eth_devices[dev_id];
+	struct eth_device_info *dev_info =
+					&rx_adapter->eth_devices[eth_dev_id];
 	struct eth_rx_queue_info *eth_rx_queue_info =
-					&eth_device_info->rx_queue[rx_queue_id];
-
+					&dev_info->rx_queue[rx_queue_id];
+	struct rte_eth_event_enqueue_buffer *buf =
+					&rx_adapter->event_enqueue_buffer;
 	int32_t qid = eth_rx_queue_info->event_queue_id;
 	uint8_t sched_type = eth_rx_queue_info->sched_type;
 	uint8_t priority = eth_rx_queue_info->priority;
@@ -439,6 +779,8 @@ fill_event_buffer(struct rte_event_eth_rx_adapter *rx_adapter,
 	uint32_t rss;
 	int do_rss;
 	uint64_t ts;
+	struct rte_mbuf *cb_mbufs[BATCH_SIZE];
+	uint16_t nb_cb;
 
 	/* 0xffff ffff if PKT_RX_RSS_HASH is set, otherwise 0 */
 	rss_mask = ~(((m->ol_flags & PKT_RX_RSS_HASH) != 0) - 1);
@@ -454,12 +796,26 @@ fill_event_buffer(struct rte_event_eth_rx_adapter *rx_adapter,
 		}
 	}
 
+
+	nb_cb = dev_info->cb_fn ? dev_info->cb_fn(eth_dev_id, rx_queue_id,
+						ETH_EVENT_BUFFER_SIZE,
+						buf->count, mbufs,
+						num,
+						dev_info->cb_arg,
+						cb_mbufs) :
+						num;
+	if (nb_cb < num) {
+		mbufs = cb_mbufs;
+		num = nb_cb;
+	}
+
 	for (i = 0; i < num; i++) {
 		m = mbufs[i];
 		struct rte_event *ev = &events[i];
 
 		rss = do_rss ?
-			do_softrss(m, rx_adapter->rss_key_be) : m->hash.rss;
+			rxa_do_softrss(m, rx_adapter->rss_key_be) :
+			m->hash.rss;
 		flow_id =
 		    eth_rx_queue_info->flow_id &
 				eth_rx_queue_info->flow_id_mask;
@@ -473,8 +829,275 @@ fill_event_buffer(struct rte_event_eth_rx_adapter *rx_adapter,
 		ev->priority = priority;
 		ev->mbuf = m;
 
-		buf_event_enqueue(rx_adapter, ev);
+		rxa_buffer_event(rx_adapter, ev);
+	}
+}
+
+/* Enqueue packets from  <port, q>  to event buffer */
+static inline uint32_t
+rxa_eth_rx(struct rte_event_eth_rx_adapter *rx_adapter,
+	uint16_t port_id,
+	uint16_t queue_id,
+	uint32_t rx_count,
+	uint32_t max_rx,
+	int *rxq_empty)
+{
+	struct rte_mbuf *mbufs[BATCH_SIZE];
+	struct rte_eth_event_enqueue_buffer *buf =
+					&rx_adapter->event_enqueue_buffer;
+	struct rte_event_eth_rx_adapter_stats *stats =
+					&rx_adapter->stats;
+	uint16_t n;
+	uint32_t nb_rx = 0;
+
+	if (rxq_empty)
+		*rxq_empty = 0;
+	/* Don't do a batch dequeue from the rx queue if there isn't
+	 * enough space in the enqueue buffer.
+	 */
+	while (BATCH_SIZE <= (RTE_DIM(buf->events) - buf->count)) {
+		if (buf->count >= BATCH_SIZE)
+			rxa_flush_event_buffer(rx_adapter);
+
+		stats->rx_poll_count++;
+		n = rte_eth_rx_burst(port_id, queue_id, mbufs, BATCH_SIZE);
+		if (unlikely(!n)) {
+			if (rxq_empty)
+				*rxq_empty = 1;
+			break;
+		}
+		rxa_buffer_mbufs(rx_adapter, port_id, queue_id, mbufs, n);
+		nb_rx += n;
+		if (rx_count + nb_rx > max_rx)
+			break;
 	}
+
+	if (buf->count >= BATCH_SIZE)
+		rxa_flush_event_buffer(rx_adapter);
+
+	return nb_rx;
+}
+
+static inline void
+rxa_intr_ring_enqueue(struct rte_event_eth_rx_adapter *rx_adapter,
+		void *data)
+{
+	uint16_t port_id;
+	uint16_t queue;
+	int err;
+	union queue_data qd;
+	struct eth_device_info *dev_info;
+	struct eth_rx_queue_info *queue_info;
+	int *intr_enabled;
+
+	qd.ptr = data;
+	port_id = qd.port;
+	queue = qd.queue;
+
+	dev_info = &rx_adapter->eth_devices[port_id];
+	queue_info = &dev_info->rx_queue[queue];
+	rte_spinlock_lock(&rx_adapter->intr_ring_lock);
+	if (rxa_shared_intr(dev_info, queue))
+		intr_enabled = &dev_info->shared_intr_enabled;
+	else
+		intr_enabled = &queue_info->intr_enabled;
+
+	if (*intr_enabled) {
+		*intr_enabled = 0;
+		err = rte_ring_enqueue(rx_adapter->intr_ring, data);
+		/* Entry should always be available.
+		 * The ring size equals the maximum number of interrupt
+		 * vectors supported (an interrupt vector is shared in
+		 * case of shared interrupts)
+		 */
+		if (err)
+			RTE_EDEV_LOG_ERR("Failed to enqueue interrupt"
+				" to ring: %s", strerror(err));
+		else
+			rte_eth_dev_rx_intr_disable(port_id, queue);
+	}
+	rte_spinlock_unlock(&rx_adapter->intr_ring_lock);
+}
+
+static int
+rxa_intr_ring_check_avail(struct rte_event_eth_rx_adapter *rx_adapter,
+			uint32_t num_intr_vec)
+{
+	if (rx_adapter->num_intr_vec + num_intr_vec >
+				RTE_EVENT_ETH_INTR_RING_SIZE) {
+		RTE_EDEV_LOG_ERR("Exceeded intr ring slots current"
+		" %d needed %d limit %d", rx_adapter->num_intr_vec,
+		num_intr_vec, RTE_EVENT_ETH_INTR_RING_SIZE);
+		return -ENOSPC;
+	}
+
+	return 0;
+}
+
+/* Delete entries for (dev, queue) from the interrupt ring */
+static void
+rxa_intr_ring_del_entries(struct rte_event_eth_rx_adapter *rx_adapter,
+			struct eth_device_info *dev_info,
+			uint16_t rx_queue_id)
+{
+	int i, n;
+	union queue_data qd;
+
+	rte_spinlock_lock(&rx_adapter->intr_ring_lock);
+
+	n = rte_ring_count(rx_adapter->intr_ring);
+	for (i = 0; i < n; i++) {
+		rte_ring_dequeue(rx_adapter->intr_ring, &qd.ptr);
+		if (!rxa_shared_intr(dev_info, rx_queue_id)) {
+			if (qd.port == dev_info->dev->data->port_id &&
+				qd.queue == rx_queue_id)
+				continue;
+		} else {
+			if (qd.port == dev_info->dev->data->port_id)
+				continue;
+		}
+		rte_ring_enqueue(rx_adapter->intr_ring, qd.ptr);
+	}
+
+	rte_spinlock_unlock(&rx_adapter->intr_ring_lock);
+}
+
+/* pthread callback handling interrupt mode receive queues
+ * After receiving an Rx interrupt, it enqueues the port id and queue id of the
+ * interrupting queue to the adapter's ring buffer for interrupt events.
+ * These events are picked up by rxa_intr_ring_dequeue() which is invoked from
+ * the adapter service function.
+ */
+static void *
+rxa_intr_thread(void *arg)
+{
+	struct rte_event_eth_rx_adapter *rx_adapter = arg;
+	struct rte_epoll_event *epoll_events = rx_adapter->epoll_events;
+	int n, i;
+
+	while (1) {
+		n = rte_epoll_wait(rx_adapter->epd, epoll_events,
+				RTE_EVENT_ETH_INTR_RING_SIZE, -1);
+		if (unlikely(n < 0))
+			RTE_EDEV_LOG_ERR("rte_epoll_wait returned error %d",
+					n);
+		for (i = 0; i < n; i++) {
+			rxa_intr_ring_enqueue(rx_adapter,
+					epoll_events[i].epdata.data);
+		}
+	}
+
+	return NULL;
+}
+
+/* Dequeue <port, q> from interrupt ring and enqueue received
+ * mbufs to eventdev
+ */
+static inline uint32_t
+rxa_intr_ring_dequeue(struct rte_event_eth_rx_adapter *rx_adapter)
+{
+	uint32_t n;
+	uint32_t nb_rx = 0;
+	int rxq_empty;
+	struct rte_eth_event_enqueue_buffer *buf;
+	rte_spinlock_t *ring_lock;
+	uint8_t max_done = 0;
+
+	if (rx_adapter->num_rx_intr == 0)
+		return 0;
+
+	if (rte_ring_count(rx_adapter->intr_ring) == 0
+		&& !rx_adapter->qd_valid)
+		return 0;
+
+	buf = &rx_adapter->event_enqueue_buffer;
+	ring_lock = &rx_adapter->intr_ring_lock;
+
+	if (buf->count >= BATCH_SIZE)
+		rxa_flush_event_buffer(rx_adapter);
+
+	while (BATCH_SIZE <= (RTE_DIM(buf->events) - buf->count)) {
+		struct eth_device_info *dev_info;
+		uint16_t port;
+		uint16_t queue;
+		union queue_data qd  = rx_adapter->qd;
+		int err;
+
+		if (!rx_adapter->qd_valid) {
+			struct eth_rx_queue_info *queue_info;
+
+			rte_spinlock_lock(ring_lock);
+			err = rte_ring_dequeue(rx_adapter->intr_ring, &qd.ptr);
+			if (err) {
+				rte_spinlock_unlock(ring_lock);
+				break;
+			}
+
+			port = qd.port;
+			queue = qd.queue;
+			rx_adapter->qd = qd;
+			rx_adapter->qd_valid = 1;
+			dev_info = &rx_adapter->eth_devices[port];
+			if (rxa_shared_intr(dev_info, queue))
+				dev_info->shared_intr_enabled = 1;
+			else {
+				queue_info = &dev_info->rx_queue[queue];
+				queue_info->intr_enabled = 1;
+			}
+			rte_eth_dev_rx_intr_enable(port, queue);
+			rte_spinlock_unlock(ring_lock);
+		} else {
+			port = qd.port;
+			queue = qd.queue;
+
+			dev_info = &rx_adapter->eth_devices[port];
+		}
+
+		if (rxa_shared_intr(dev_info, queue)) {
+			uint16_t i;
+			uint16_t nb_queues;
+
+			nb_queues = dev_info->dev->data->nb_rx_queues;
+			n = 0;
+			for (i = dev_info->next_q_idx; i < nb_queues; i++) {
+				uint8_t enq_buffer_full;
+
+				if (!rxa_intr_queue(dev_info, i))
+					continue;
+				n = rxa_eth_rx(rx_adapter, port, i, nb_rx,
+					rx_adapter->max_nb_rx,
+					&rxq_empty);
+				nb_rx += n;
+
+				enq_buffer_full = !rxq_empty && n == 0;
+				max_done = nb_rx > rx_adapter->max_nb_rx;
+
+				if (enq_buffer_full || max_done) {
+					dev_info->next_q_idx = i;
+					goto done;
+				}
+			}
+
+			rx_adapter->qd_valid = 0;
+
+			/* Reinitialize for next interrupt */
+			dev_info->next_q_idx = dev_info->multi_intr_cap ?
+						RTE_MAX_RXTX_INTR_VEC_ID - 1 :
+						0;
+		} else {
+			n = rxa_eth_rx(rx_adapter, port, queue, nb_rx,
+				rx_adapter->max_nb_rx,
+				&rxq_empty);
+			rx_adapter->qd_valid = !rxq_empty;
+			nb_rx += n;
+			if (nb_rx > rx_adapter->max_nb_rx)
+				break;
+		}
+	}
+
+done:
+	rx_adapter->stats.rx_intr_packets += nb_rx;
+	return nb_rx;
 }
 
 /*
@@ -491,12 +1114,10 @@ fill_event_buffer(struct rte_event_eth_rx_adapter *rx_adapter,
  * it.
  */
 static inline uint32_t
-eth_rx_poll(struct rte_event_eth_rx_adapter *rx_adapter)
+rxa_poll(struct rte_event_eth_rx_adapter *rx_adapter)
 {
 	uint32_t num_queue;
-	uint16_t n;
 	uint32_t nb_rx = 0;
-	struct rte_mbuf *mbufs[BATCH_SIZE];
 	struct rte_eth_event_enqueue_buffer *buf;
 	uint32_t wrr_pos;
 	uint32_t max_nb_rx;
@@ -504,7 +1125,7 @@ eth_rx_poll(struct rte_event_eth_rx_adapter *rx_adapter)
 	wrr_pos = rx_adapter->wrr_pos;
 	max_nb_rx = rx_adapter->max_nb_rx;
 	buf = &rx_adapter->event_enqueue_buffer;
-	struct rte_event_eth_rx_adapter_stats *stats = &rx_adapter->stats;
+	stats = &rx_adapter->stats;
 
 	/* Iterate through a WRR sequence */
 	for (num_queue = 0; num_queue < rx_adapter->wrr_len; num_queue++) {
@@ -516,45 +1137,42 @@ eth_rx_poll(struct rte_event_eth_rx_adapter *rx_adapter)
 		 * enough space in the enqueue buffer.
 		 */
 		if (buf->count >= BATCH_SIZE)
-			flush_event_buffer(rx_adapter);
-		if (BATCH_SIZE > (ETH_EVENT_BUFFER_SIZE - buf->count))
-			break;
-
-		stats->rx_poll_count++;
-		n = rte_eth_rx_burst(d, qid, mbufs, BATCH_SIZE);
+			rxa_flush_event_buffer(rx_adapter);
+		if (BATCH_SIZE > (ETH_EVENT_BUFFER_SIZE - buf->count)) {
+			rx_adapter->wrr_pos = wrr_pos;
+			return nb_rx;
+		}
 
-		if (n) {
-			stats->rx_packets += n;
-			/* The check before rte_eth_rx_burst() ensures that
-			 * all n mbufs can be buffered
-			 */
-			fill_event_buffer(rx_adapter, d, qid, mbufs, n);
-			nb_rx += n;
-			if (nb_rx > max_nb_rx) {
-				rx_adapter->wrr_pos =
+		nb_rx += rxa_eth_rx(rx_adapter, d, qid, nb_rx, max_nb_rx,
+				NULL);
+		if (nb_rx > max_nb_rx) {
+			rx_adapter->wrr_pos =
 				    (wrr_pos + 1) % rx_adapter->wrr_len;
-				return nb_rx;
-			}
+			break;
 		}
 
 		if (++wrr_pos == rx_adapter->wrr_len)
 			wrr_pos = 0;
 	}
-
 	return nb_rx;
 }
 
 static int
-event_eth_rx_adapter_service_func(void *args)
+rxa_service_func(void *args)
 {
 	struct rte_event_eth_rx_adapter *rx_adapter = args;
-	struct rte_eth_event_enqueue_buffer *buf;
+	struct rte_event_eth_rx_adapter_stats *stats;
 
-	buf = &rx_adapter->event_enqueue_buffer;
 	if (rte_spinlock_trylock(&rx_adapter->rx_lock) == 0)
 		return 0;
-	if (eth_rx_poll(rx_adapter) == 0 && buf->count)
-		flush_event_buffer(rx_adapter);
+	if (!rx_adapter->rxa_started) {
+		return 0;
+		rte_spinlock_unlock(&rx_adapter->rx_lock);
+	}
+
+	stats = &rx_adapter->stats;
+	stats->rx_packets += rxa_intr_ring_dequeue(rx_adapter);
+	stats->rx_packets += rxa_poll(rx_adapter);
 	rte_spinlock_unlock(&rx_adapter->rx_lock);
 	return 0;
 }
@@ -586,14 +1204,14 @@ rte_event_eth_rx_adapter_init(void)
 }
 
 static inline struct rte_event_eth_rx_adapter *
-id_to_rx_adapter(uint8_t id)
+rxa_id_to_adapter(uint8_t id)
 {
 	return event_eth_rx_adapter ?
 		event_eth_rx_adapter[id] : NULL;
 }
 
 static int
-default_conf_cb(uint8_t id, uint8_t dev_id,
+rxa_default_conf_cb(uint8_t id, uint8_t dev_id,
 		struct rte_event_eth_rx_adapter_conf *conf, void *arg)
 {
 	int ret;
@@ -602,7 +1220,7 @@ default_conf_cb(uint8_t id, uint8_t dev_id,
 	int started;
 	uint8_t port_id;
 	struct rte_event_port_conf *port_conf = arg;
-	struct rte_event_eth_rx_adapter *rx_adapter = id_to_rx_adapter(id);
+	struct rte_event_eth_rx_adapter *rx_adapter = rxa_id_to_adapter(id);
 
 	dev = &rte_eventdevs[rx_adapter->eventdev_id];
 	dev_conf = dev->data->dev_conf;
@@ -639,7 +1257,351 @@ default_conf_cb(uint8_t id, uint8_t dev_id,
 }
 
 static int
-init_service(struct rte_event_eth_rx_adapter *rx_adapter, uint8_t id)
+rxa_epoll_create1(void)
+{
+#if defined(LINUX)
+	int fd;
+	fd = epoll_create1(EPOLL_CLOEXEC);
+	return fd < 0 ? -errno : fd;
+#elif defined(BSD)
+	return -ENOTSUP;
+#endif
+}
+
+static int
+rxa_init_epd(struct rte_event_eth_rx_adapter *rx_adapter)
+{
+	if (rx_adapter->epd != INIT_FD)
+		return 0;
+
+	rx_adapter->epd = rxa_epoll_create1();
+	if (rx_adapter->epd < 0) {
+		int err = rx_adapter->epd;
+		rx_adapter->epd = INIT_FD;
+		RTE_EDEV_LOG_ERR("epoll_create1() failed, err %d", err);
+		return err;
+	}
+
+	return 0;
+}
+
+static int
+rxa_create_intr_thread(struct rte_event_eth_rx_adapter *rx_adapter)
+{
+	int err;
+	char thread_name[RTE_MAX_THREAD_NAME_LEN];
+
+	if (rx_adapter->intr_ring)
+		return 0;
+
+	rx_adapter->intr_ring = rte_ring_create("intr_ring",
+					RTE_EVENT_ETH_INTR_RING_SIZE,
+					rte_socket_id(), 0);
+	if (!rx_adapter->intr_ring)
+		return -ENOMEM;
+
+	rx_adapter->epoll_events = rte_zmalloc_socket(rx_adapter->mem_name,
+					RTE_EVENT_ETH_INTR_RING_SIZE *
+					sizeof(struct rte_epoll_event),
+					RTE_CACHE_LINE_SIZE,
+					rx_adapter->socket_id);
+	if (!rx_adapter->epoll_events) {
+		err = -ENOMEM;
+		goto error;
+	}
+
+	rte_spinlock_init(&rx_adapter->intr_ring_lock);
+
+	snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN,
+			"rx-intr-thread-%d", rx_adapter->id);
+
+	err = rte_ctrl_thread_create(&rx_adapter->rx_intr_thread, thread_name,
+				NULL, rxa_intr_thread, rx_adapter);
+	if (!err) {
+		rte_thread_setname(rx_adapter->rx_intr_thread, thread_name);
+		return 0;
+	}
+
+	RTE_EDEV_LOG_ERR("Failed to create interrupt thread err = %d\n", err);
+error:
+	rte_ring_free(rx_adapter->intr_ring);
+	rx_adapter->intr_ring = NULL;
+	rx_adapter->epoll_events = NULL;
+	return err;
+}
+
+static int
+rxa_destroy_intr_thread(struct rte_event_eth_rx_adapter *rx_adapter)
+{
+	int err;
+
+	err = pthread_cancel(rx_adapter->rx_intr_thread);
+	if (err)
+		RTE_EDEV_LOG_ERR("Can't cancel interrupt thread err = %d\n",
+				err);
+
+	err = pthread_join(rx_adapter->rx_intr_thread, NULL);
+	if (err)
+		RTE_EDEV_LOG_ERR("Can't join interrupt thread err = %d\n", err);
+
+	rte_free(rx_adapter->epoll_events);
+	rte_ring_free(rx_adapter->intr_ring);
+	rx_adapter->intr_ring = NULL;
+	rx_adapter->epoll_events = NULL;
+	return 0;
+}
+
+static int
+rxa_free_intr_resources(struct rte_event_eth_rx_adapter *rx_adapter)
+{
+	int ret;
+
+	if (rx_adapter->num_rx_intr == 0)
+		return 0;
+
+	ret = rxa_destroy_intr_thread(rx_adapter);
+	if (ret)
+		return ret;
+
+	close(rx_adapter->epd);
+	rx_adapter->epd = INIT_FD;
+
+	return ret;
+}
+
+static int
+rxa_disable_intr(struct rte_event_eth_rx_adapter *rx_adapter,
+	struct eth_device_info *dev_info,
+	uint16_t rx_queue_id)
+{
+	int err;
+	uint16_t eth_dev_id = dev_info->dev->data->port_id;
+	int sintr = rxa_shared_intr(dev_info, rx_queue_id);
+
+	err = rte_eth_dev_rx_intr_disable(eth_dev_id, rx_queue_id);
+	if (err) {
+		RTE_EDEV_LOG_ERR("Could not disable interrupt for Rx queue %u",
+			rx_queue_id);
+		return err;
+	}
+
+	err = rte_eth_dev_rx_intr_ctl_q(eth_dev_id, rx_queue_id,
+					rx_adapter->epd,
+					RTE_INTR_EVENT_DEL,
+					0);
+	if (err)
+		RTE_EDEV_LOG_ERR("Interrupt event deletion failed %d", err);
+
+	if (sintr)
+		dev_info->rx_queue[rx_queue_id].intr_enabled = 0;
+	else
+		dev_info->shared_intr_enabled = 0;
+	return err;
+}
+
+static int
+rxa_del_intr_queue(struct rte_event_eth_rx_adapter *rx_adapter,
+		struct eth_device_info *dev_info,
+		int rx_queue_id)
+{
+	int err;
+	int i;
+	int s;
+
+	if (dev_info->nb_rx_intr == 0)
+		return 0;
+
+	err = 0;
+	if (rx_queue_id == -1) {
+		s = dev_info->nb_shared_intr;
+		for (i = 0; i < dev_info->nb_rx_intr; i++) {
+			int sintr;
+			uint16_t q;
+
+			q = dev_info->intr_queue[i];
+			sintr = rxa_shared_intr(dev_info, q);
+			s -= sintr;
+
+			if (!sintr || s == 0) {
+
+				err = rxa_disable_intr(rx_adapter, dev_info,
+						q);
+				if (err)
+					return err;
+				rxa_intr_ring_del_entries(rx_adapter, dev_info,
+							q);
+			}
+		}
+	} else {
+		if (!rxa_intr_queue(dev_info, rx_queue_id))
+			return 0;
+		if (!rxa_shared_intr(dev_info, rx_queue_id) ||
+				dev_info->nb_shared_intr == 1) {
+			err = rxa_disable_intr(rx_adapter, dev_info,
+					rx_queue_id);
+			if (err)
+				return err;
+			rxa_intr_ring_del_entries(rx_adapter, dev_info,
+						rx_queue_id);
+		}
+
+		for (i = 0; i < dev_info->nb_rx_intr; i++) {
+			if (dev_info->intr_queue[i] == rx_queue_id) {
+				for (; i < dev_info->nb_rx_intr - 1; i++)
+					dev_info->intr_queue[i] =
+						dev_info->intr_queue[i + 1];
+				break;
+			}
+		}
+	}
+
+	return err;
+}
+
+static int
+rxa_config_intr(struct rte_event_eth_rx_adapter *rx_adapter,
+	struct eth_device_info *dev_info,
+	uint16_t rx_queue_id)
+{
+	int err, err1;
+	uint16_t eth_dev_id = dev_info->dev->data->port_id;
+	union queue_data qd;
+	int init_fd;
+	uint16_t *intr_queue;
+	int sintr = rxa_shared_intr(dev_info, rx_queue_id);
+
+	if (rxa_intr_queue(dev_info, rx_queue_id))
+		return 0;
+
+	intr_queue = dev_info->intr_queue;
+	if (dev_info->intr_queue == NULL) {
+		size_t len =
+			dev_info->dev->data->nb_rx_queues * sizeof(uint16_t);
+		dev_info->intr_queue =
+			rte_zmalloc_socket(
+				rx_adapter->mem_name,
+				len,
+				0,
+				rx_adapter->socket_id);
+		if (dev_info->intr_queue == NULL)
+			return -ENOMEM;
+	}
+
+	init_fd = rx_adapter->epd;
+	err = rxa_init_epd(rx_adapter);
+	if (err)
+		goto err_free_queue;
+
+	qd.port = eth_dev_id;
+	qd.queue = rx_queue_id;
+
+	err = rte_eth_dev_rx_intr_ctl_q(eth_dev_id, rx_queue_id,
+					rx_adapter->epd,
+					RTE_INTR_EVENT_ADD,
+					qd.ptr);
+	if (err) {
+		RTE_EDEV_LOG_ERR("Failed to add interrupt event for"
+			" Rx Queue %u err %d", rx_queue_id, err);
+		goto err_del_fd;
+	}
+
+	err = rte_eth_dev_rx_intr_enable(eth_dev_id, rx_queue_id);
+	if (err) {
+		RTE_EDEV_LOG_ERR("Could not enable interrupt for"
+				" Rx Queue %u err %d", rx_queue_id, err);
+
+		goto err_del_event;
+	}
+
+	err = rxa_create_intr_thread(rx_adapter);
+	if (!err)  {
+		if (sintr)
+			dev_info->shared_intr_enabled = 1;
+		else
+			dev_info->rx_queue[rx_queue_id].intr_enabled = 1;
+		return 0;
+	}
+
+
+	err = rte_eth_dev_rx_intr_disable(eth_dev_id, rx_queue_id);
+	if (err)
+		RTE_EDEV_LOG_ERR("Could not disable interrupt for"
+				" Rx Queue %u err %d", rx_queue_id, err);
+err_del_event:
+	err1 = rte_eth_dev_rx_intr_ctl_q(eth_dev_id, rx_queue_id,
+					rx_adapter->epd,
+					RTE_INTR_EVENT_DEL,
+					0);
+	if (err1) {
+		RTE_EDEV_LOG_ERR("Could not delete event for"
+				" Rx Queue %u err %d", rx_queue_id, err1);
+	}
+err_del_fd:
+	if (init_fd == INIT_FD) {
+		close(rx_adapter->epd);
+		rx_adapter->epd = -1;
+	}
+err_free_queue:
+	if (intr_queue == NULL)
+		rte_free(dev_info->intr_queue);
+
+	return err;
+}
+
+static int
+rxa_add_intr_queue(struct rte_event_eth_rx_adapter *rx_adapter,
+	struct eth_device_info *dev_info,
+	int rx_queue_id)
+
+{
+	int i, j, err;
+	int si = -1;
+	int shared_done = (dev_info->nb_shared_intr > 0);
+
+	if (rx_queue_id != -1) {
+		if (rxa_shared_intr(dev_info, rx_queue_id) && shared_done)
+			return 0;
+		return rxa_config_intr(rx_adapter, dev_info, rx_queue_id);
+	}
+
+	err = 0;
+	for (i = 0; i < dev_info->dev->data->nb_rx_queues; i++) {
+
+		if (rxa_shared_intr(dev_info, i) && shared_done)
+			continue;
+
+		err = rxa_config_intr(rx_adapter, dev_info, i);
+
+		shared_done = err == 0 && rxa_shared_intr(dev_info, i);
+		if (shared_done) {
+			si = i;
+			dev_info->shared_intr_enabled = 1;
+		}
+		if (err)
+			break;
+	}
+
+	if (err == 0)
+		return 0;
+
+	shared_done = (dev_info->nb_shared_intr > 0);
+	for (j = 0; j < i; j++) {
+		if (rxa_intr_queue(dev_info, j))
+			continue;
+		if (rxa_shared_intr(dev_info, j) && si != j)
+			continue;
+		err = rxa_disable_intr(rx_adapter, dev_info, j);
+		if (err)
+			break;
+
+	}
+
+	return err;
+}
+
+
+static int
+rxa_init_service(struct rte_event_eth_rx_adapter *rx_adapter, uint8_t id)
 {
 	int ret;
 	struct rte_service_spec service;
@@ -652,7 +1614,7 @@ init_service(struct rte_event_eth_rx_adapter *rx_adapter, uint8_t id)
 	snprintf(service.name, ETH_RX_ADAPTER_SERVICE_NAME_LEN,
 		"rte_event_eth_rx_adapter_%d", id);
 	service.socket_id = rx_adapter->socket_id;
-	service.callback = event_eth_rx_adapter_service_func;
+	service.callback = rxa_service_func;
 	service.callback_userdata = rx_adapter;
 	/* Service function handles locking for queue add/del updates */
 	service.capabilities = RTE_SERVICE_CAP_MT_SAFE;
@@ -673,6 +1635,7 @@ init_service(struct rte_event_eth_rx_adapter *rx_adapter, uint8_t id)
 	rx_adapter->event_port_id = rx_adapter_conf.event_port_id;
 	rx_adapter->max_nb_rx = rx_adapter_conf.max_nb_rx;
 	rx_adapter->service_inited = 1;
+	rx_adapter->epd = INIT_FD;
 	return 0;
 
 err_done:
@@ -680,9 +1643,8 @@ err_done:
 	return ret;
 }
 
-
 static void
-update_queue_info(struct rte_event_eth_rx_adapter *rx_adapter,
+rxa_update_queue(struct rte_event_eth_rx_adapter *rx_adapter,
 		struct eth_device_info *dev_info,
 		int32_t rx_queue_id,
 		uint8_t add)
@@ -696,7 +1658,7 @@ update_queue_info(struct rte_event_eth_rx_adapter *rx_adapter,
 
 	if (rx_queue_id == -1) {
 		for (i = 0; i < dev_info->dev->data->nb_rx_queues; i++)
-			update_queue_info(rx_adapter, dev_info, i, add);
+			rxa_update_queue(rx_adapter, dev_info, i, add);
 	} else {
 		queue_info = &dev_info->rx_queue[rx_queue_id];
 		enabled = queue_info->queue_enabled;
@@ -711,31 +1673,65 @@ update_queue_info(struct rte_event_eth_rx_adapter *rx_adapter,
 	}
 }
 
-static int
-event_eth_rx_adapter_queue_del(struct rte_event_eth_rx_adapter *rx_adapter,
-			    struct eth_device_info *dev_info,
-			    uint16_t rx_queue_id)
+static void
+rxa_sw_del(struct rte_event_eth_rx_adapter *rx_adapter,
+	struct eth_device_info *dev_info,
+	int32_t rx_queue_id)
 {
-	struct eth_rx_queue_info *queue_info;
+	int pollq;
+	int intrq;
+	int sintrq;
+
 
 	if (rx_adapter->nb_queues == 0)
-		return 0;
+		return;
 
-	queue_info = &dev_info->rx_queue[rx_queue_id];
-	rx_adapter->num_rx_polled -= queue_info->queue_enabled;
-	update_queue_info(rx_adapter, dev_info, rx_queue_id, 0);
-	return 0;
+	if (rx_queue_id == -1) {
+		uint16_t nb_rx_queues;
+		uint16_t i;
+
+		nb_rx_queues = dev_info->dev->data->nb_rx_queues;
+		for (i = 0; i <	nb_rx_queues; i++)
+			rxa_sw_del(rx_adapter, dev_info, i);
+		return;
+	}
+
+	pollq = rxa_polled_queue(dev_info, rx_queue_id);
+	intrq = rxa_intr_queue(dev_info, rx_queue_id);
+	sintrq = rxa_shared_intr(dev_info, rx_queue_id);
+	rxa_update_queue(rx_adapter, dev_info, rx_queue_id, 0);
+	rx_adapter->num_rx_polled -= pollq;
+	dev_info->nb_rx_poll -= pollq;
+	rx_adapter->num_rx_intr -= intrq;
+	dev_info->nb_rx_intr -= intrq;
+	dev_info->nb_shared_intr -= intrq && sintrq;
 }
 
 static void
-event_eth_rx_adapter_queue_add(struct rte_event_eth_rx_adapter *rx_adapter,
-		struct eth_device_info *dev_info,
-		uint16_t rx_queue_id,
-		const struct rte_event_eth_rx_adapter_queue_conf *conf)
-
+rxa_add_queue(struct rte_event_eth_rx_adapter *rx_adapter,
+	struct eth_device_info *dev_info,
+	int32_t rx_queue_id,
+	const struct rte_event_eth_rx_adapter_queue_conf *conf)
 {
 	struct eth_rx_queue_info *queue_info;
 	const struct rte_event *ev = &conf->ev;
+	int pollq;
+	int intrq;
+	int sintrq;
+
+	if (rx_queue_id == -1) {
+		uint16_t nb_rx_queues;
+		uint16_t i;
+
+		nb_rx_queues = dev_info->dev->data->nb_rx_queues;
+		for (i = 0; i <	nb_rx_queues; i++)
+			rxa_add_queue(rx_adapter, dev_info, i, conf);
+		return;
+	}
+
+	pollq = rxa_polled_queue(dev_info, rx_queue_id);
+	intrq = rxa_intr_queue(dev_info, rx_queue_id);
+	sintrq = rxa_shared_intr(dev_info, rx_queue_id);
 
 	queue_info = &dev_info->rx_queue[rx_queue_id];
 	queue_info->event_queue_id = ev->queue_id;
@@ -749,69 +1745,162 @@ event_eth_rx_adapter_queue_add(struct rte_event_eth_rx_adapter *rx_adapter,
 		queue_info->flow_id_mask = ~0;
 	}
 
-	/* The same queue can be added more than once */
-	rx_adapter->num_rx_polled += !queue_info->queue_enabled;
-	update_queue_info(rx_adapter, dev_info, rx_queue_id, 1);
+	rxa_update_queue(rx_adapter, dev_info, rx_queue_id, 1);
+	if (rxa_polled_queue(dev_info, rx_queue_id)) {
+		rx_adapter->num_rx_polled += !pollq;
+		dev_info->nb_rx_poll += !pollq;
+		rx_adapter->num_rx_intr -= intrq;
+		dev_info->nb_rx_intr -= intrq;
+		dev_info->nb_shared_intr -= intrq && sintrq;
+	}
+
+	if (rxa_intr_queue(dev_info, rx_queue_id)) {
+		rx_adapter->num_rx_polled -= pollq;
+		dev_info->nb_rx_poll -= pollq;
+		rx_adapter->num_rx_intr += !intrq;
+		dev_info->nb_rx_intr += !intrq;
+		dev_info->nb_shared_intr += !intrq && sintrq;
+		if (dev_info->nb_shared_intr == 1) {
+			if (dev_info->multi_intr_cap)
+				dev_info->next_q_idx =
+					RTE_MAX_RXTX_INTR_VEC_ID - 1;
+			else
+				dev_info->next_q_idx = 0;
+		}
+	}
 }
 
-static int add_rx_queue(struct rte_event_eth_rx_adapter *rx_adapter,
+static int rxa_sw_add(struct rte_event_eth_rx_adapter *rx_adapter,
 		uint16_t eth_dev_id,
 		int rx_queue_id,
 		const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
 {
 	struct eth_device_info *dev_info = &rx_adapter->eth_devices[eth_dev_id];
 	struct rte_event_eth_rx_adapter_queue_conf temp_conf;
-	uint32_t i;
 	int ret;
+	struct eth_rx_poll_entry *rx_poll;
+	struct eth_rx_queue_info *rx_queue;
+	uint32_t *rx_wrr;
+	uint16_t nb_rx_queues;
+	uint32_t nb_rx_poll, nb_wrr;
+	uint32_t nb_rx_intr;
+	int num_intr_vec;
+	uint16_t wt;
 
 	if (queue_conf->servicing_weight == 0) {
-
 		struct rte_eth_dev_data *data = dev_info->dev->data;
-		if (data->dev_conf.intr_conf.rxq) {
-			RTE_EDEV_LOG_ERR("Interrupt driven queues"
-					" not supported");
-			return -ENOTSUP;
-		}
-		temp_conf = *queue_conf;
 
-		/* If Rx interrupts are disabled set wt = 1 */
-		temp_conf.servicing_weight = 1;
+		temp_conf = *queue_conf;
+		if (!data->dev_conf.intr_conf.rxq) {
+			/* If Rx interrupts are disabled set wt = 1 */
+			temp_conf.servicing_weight = 1;
+		}
 		queue_conf = &temp_conf;
 	}
 
+	nb_rx_queues = dev_info->dev->data->nb_rx_queues;
+	rx_queue = dev_info->rx_queue;
+	wt = queue_conf->servicing_weight;
+
 	if (dev_info->rx_queue == NULL) {
 		dev_info->rx_queue =
 		    rte_zmalloc_socket(rx_adapter->mem_name,
-				       dev_info->dev->data->nb_rx_queues *
+				       nb_rx_queues *
 				       sizeof(struct eth_rx_queue_info), 0,
 				       rx_adapter->socket_id);
 		if (dev_info->rx_queue == NULL)
 			return -ENOMEM;
 	}
+	rx_wrr = NULL;
+	rx_poll = NULL;
 
-	if (rx_queue_id == -1) {
-		for (i = 0; i < dev_info->dev->data->nb_rx_queues; i++)
-			event_eth_rx_adapter_queue_add(rx_adapter,
-						dev_info, i,
-						queue_conf);
+	rxa_calc_nb_post_add(rx_adapter, dev_info, rx_queue_id,
+			queue_conf->servicing_weight,
+			&nb_rx_poll, &nb_rx_intr, &nb_wrr);
+
+	if (dev_info->dev->intr_handle)
+		dev_info->multi_intr_cap =
+			rte_intr_cap_multiple(dev_info->dev->intr_handle);
+
+	ret = rxa_alloc_poll_arrays(rx_adapter, nb_rx_poll, nb_wrr,
+				&rx_poll, &rx_wrr);
+	if (ret)
+		goto err_free_rxqueue;
+
+	if (wt == 0) {
+		num_intr_vec = rxa_nb_intr_vect(dev_info, rx_queue_id, 1);
+
+		ret = rxa_intr_ring_check_avail(rx_adapter, num_intr_vec);
+		if (ret)
+			goto err_free_rxqueue;
+
+		ret = rxa_add_intr_queue(rx_adapter, dev_info, rx_queue_id);
+		if (ret)
+			goto err_free_rxqueue;
 	} else {
-		event_eth_rx_adapter_queue_add(rx_adapter, dev_info,
-					  (uint16_t)rx_queue_id,
-					  queue_conf);
+
+		num_intr_vec = 0;
+		if (rx_adapter->num_rx_intr > nb_rx_intr) {
+			num_intr_vec = rxa_nb_intr_vect(dev_info,
+						rx_queue_id, 0);
+			/* interrupt based queues are being converted to
+			 * poll mode queues, delete the interrupt configuration
+			 * for those.
+			 */
+			ret = rxa_del_intr_queue(rx_adapter,
+						dev_info, rx_queue_id);
+			if (ret)
+				goto err_free_rxqueue;
+		}
 	}
 
-	ret = eth_poll_wrr_calc(rx_adapter);
-	if (ret) {
-		event_eth_rx_adapter_queue_del(rx_adapter,
-					dev_info, rx_queue_id);
-		return ret;
+	if (nb_rx_intr == 0) {
+		ret = rxa_free_intr_resources(rx_adapter);
+		if (ret)
+			goto err_free_rxqueue;
 	}
 
-	return ret;
+	if (wt == 0) {
+		uint16_t i;
+
+		if (rx_queue_id  == -1) {
+			for (i = 0; i < dev_info->dev->data->nb_rx_queues; i++)
+				dev_info->intr_queue[i] = i;
+		} else {
+			if (!rxa_intr_queue(dev_info, rx_queue_id))
+				dev_info->intr_queue[nb_rx_intr - 1] =
+					rx_queue_id;
+		}
+	}
+
+
+
+	rxa_add_queue(rx_adapter, dev_info, rx_queue_id, queue_conf);
+	rxa_calc_wrr_sequence(rx_adapter, rx_poll, rx_wrr);
+
+	rte_free(rx_adapter->eth_rx_poll);
+	rte_free(rx_adapter->wrr_sched);
+
+	rx_adapter->eth_rx_poll = rx_poll;
+	rx_adapter->wrr_sched = rx_wrr;
+	rx_adapter->wrr_len = nb_wrr;
+	rx_adapter->num_intr_vec += num_intr_vec;
+	return 0;
+
+err_free_rxqueue:
+	if (rx_queue == NULL) {
+		rte_free(dev_info->rx_queue);
+		dev_info->rx_queue = NULL;
+	}
+
+	rte_free(rx_poll);
+	rte_free(rx_wrr);
+
+	return 0;
 }
 
 static int
-rx_adapter_ctrl(uint8_t id, int start)
+rxa_ctrl(uint8_t id, int start)
 {
 	struct rte_event_eth_rx_adapter *rx_adapter;
 	struct rte_eventdev *dev;
@@ -821,7 +1910,7 @@ rx_adapter_ctrl(uint8_t id, int start)
 	int stop = !start;
 
 	RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL);
-	rx_adapter = id_to_rx_adapter(id);
+	rx_adapter = rxa_id_to_adapter(id);
 	if (rx_adapter == NULL)
 		return -EINVAL;
 
@@ -845,8 +1934,12 @@ rx_adapter_ctrl(uint8_t id, int start)
 						&rte_eth_devices[i]);
 	}
 
-	if (use_service)
+	if (use_service) {
+		rte_spinlock_lock(&rx_adapter->rx_lock);
+		rx_adapter->rxa_started = start;
 		rte_service_runstate_set(rx_adapter->service_id, start);
+		rte_spinlock_unlock(&rx_adapter->rx_lock);
+	}
 
 	return 0;
 }
@@ -880,7 +1973,7 @@ rte_event_eth_rx_adapter_create_ext(uint8_t id, uint8_t dev_id,
 			return ret;
 	}
 
-	rx_adapter = id_to_rx_adapter(id);
+	rx_adapter = rxa_id_to_adapter(id);
 	if (rx_adapter != NULL) {
 		RTE_EDEV_LOG_ERR("Eth Rx adapter exists id = %" PRIu8, id);
 		return -EEXIST;
@@ -902,6 +1995,7 @@ rte_event_eth_rx_adapter_create_ext(uint8_t id, uint8_t dev_id,
 	rx_adapter->socket_id = socket_id;
 	rx_adapter->conf_cb = conf_cb;
 	rx_adapter->conf_arg = conf_arg;
+	rx_adapter->id = id;
 	strcpy(rx_adapter->mem_name, mem_name);
 	rx_adapter->eth_devices = rte_zmalloc_socket(rx_adapter->mem_name,
 					/* FIXME: incompatible with hotplug */
@@ -922,7 +2016,7 @@ rte_event_eth_rx_adapter_create_ext(uint8_t id, uint8_t dev_id,
 		rx_adapter->eth_devices[i].dev = &rte_eth_devices[i];
 
 	event_eth_rx_adapter[id] = rx_adapter;
-	if (conf_cb == default_conf_cb)
+	if (conf_cb == rxa_default_conf_cb)
 		rx_adapter->default_cb_arg = 1;
 	return 0;
 }
@@ -943,7 +2037,7 @@ rte_event_eth_rx_adapter_create(uint8_t id, uint8_t dev_id,
 		return -ENOMEM;
 	*pc = *port_config;
 	ret = rte_event_eth_rx_adapter_create_ext(id, dev_id,
-					default_conf_cb,
+					rxa_default_conf_cb,
 					pc);
 	if (ret)
 		rte_free(pc);
@@ -957,7 +2051,7 @@ rte_event_eth_rx_adapter_free(uint8_t id)
 
 	RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL);
 
-	rx_adapter = id_to_rx_adapter(id);
+	rx_adapter = rxa_id_to_adapter(id);
 	if (rx_adapter == NULL)
 		return -EINVAL;
 
@@ -987,12 +2081,11 @@ rte_event_eth_rx_adapter_queue_add(uint8_t id,
 	struct rte_event_eth_rx_adapter *rx_adapter;
 	struct rte_eventdev *dev;
 	struct eth_device_info *dev_info;
-	int start_service;
 
 	RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL);
 	RTE_ETH_VALID_PORTID_OR_ERR_RET(eth_dev_id, -EINVAL);
 
-	rx_adapter = id_to_rx_adapter(id);
+	rx_adapter = rxa_id_to_adapter(id);
 	if ((rx_adapter == NULL) || (queue_conf == NULL))
 		return -EINVAL;
 
@@ -1030,7 +2123,6 @@ rte_event_eth_rx_adapter_queue_add(uint8_t id,
 		return -EINVAL;
 	}
 
-	start_service = 0;
 	dev_info = &rx_adapter->eth_devices[eth_dev_id];
 
 	if (cap & RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT) {
@@ -1050,28 +2142,29 @@ rte_event_eth_rx_adapter_queue_add(uint8_t id,
 				&rte_eth_devices[eth_dev_id],
 				rx_queue_id, queue_conf);
 		if (ret == 0) {
-			update_queue_info(rx_adapter,
+			dev_info->internal_event_port = 1;
+			rxa_update_queue(rx_adapter,
 					&rx_adapter->eth_devices[eth_dev_id],
 					rx_queue_id,
 					1);
 		}
 	} else {
 		rte_spinlock_lock(&rx_adapter->rx_lock);
-		ret = init_service(rx_adapter, id);
-		if (ret == 0)
-			ret = add_rx_queue(rx_adapter, eth_dev_id, rx_queue_id,
+		dev_info->internal_event_port = 0;
+		ret = rxa_init_service(rx_adapter, id);
+		if (ret == 0) {
+			uint32_t service_id = rx_adapter->service_id;
+			ret = rxa_sw_add(rx_adapter, eth_dev_id, rx_queue_id,
 					queue_conf);
+			rte_service_component_runstate_set(service_id,
+				rxa_sw_adapter_queue_count(rx_adapter));
+		}
 		rte_spinlock_unlock(&rx_adapter->rx_lock);
-		if (ret == 0)
-			start_service = !!sw_rx_adapter_queue_count(rx_adapter);
 	}
 
 	if (ret)
 		return ret;
 
-	if (start_service)
-		rte_service_component_runstate_set(rx_adapter->service_id, 1);
-
 	return 0;
 }
 
@@ -1084,12 +2177,17 @@ rte_event_eth_rx_adapter_queue_del(uint8_t id, uint16_t eth_dev_id,
 	struct rte_event_eth_rx_adapter *rx_adapter;
 	struct eth_device_info *dev_info;
 	uint32_t cap;
-	uint16_t i;
+	uint32_t nb_rx_poll = 0;
+	uint32_t nb_wrr = 0;
+	uint32_t nb_rx_intr;
+	struct eth_rx_poll_entry *rx_poll = NULL;
+	uint32_t *rx_wrr = NULL;
+	int num_intr_vec;
 
 	RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL);
 	RTE_ETH_VALID_PORTID_OR_ERR_RET(eth_dev_id, -EINVAL);
 
-	rx_adapter = id_to_rx_adapter(id);
+	rx_adapter = rxa_id_to_adapter(id);
 	if (rx_adapter == NULL)
 		return -EINVAL;
 
@@ -1116,7 +2214,7 @@ rte_event_eth_rx_adapter_queue_del(uint8_t id, uint16_t eth_dev_id,
 						&rte_eth_devices[eth_dev_id],
 						rx_queue_id);
 		if (ret == 0) {
-			update_queue_info(rx_adapter,
+			rxa_update_queue(rx_adapter,
 					&rx_adapter->eth_devices[eth_dev_id],
 					rx_queue_id,
 					0);
@@ -1126,48 +2224,78 @@ rte_event_eth_rx_adapter_queue_del(uint8_t id, uint16_t eth_dev_id,
 			}
 		}
 	} else {
-		int rc;
+		rxa_calc_nb_post_del(rx_adapter, dev_info, rx_queue_id,
+			&nb_rx_poll, &nb_rx_intr, &nb_wrr);
+
+		ret = rxa_alloc_poll_arrays(rx_adapter, nb_rx_poll, nb_wrr,
+			&rx_poll, &rx_wrr);
+		if (ret)
+			return ret;
+
 		rte_spinlock_lock(&rx_adapter->rx_lock);
-		if (rx_queue_id == -1) {
-			for (i = 0; i < dev_info->dev->data->nb_rx_queues; i++)
-				event_eth_rx_adapter_queue_del(rx_adapter,
-							dev_info,
-							i);
-		} else {
-			event_eth_rx_adapter_queue_del(rx_adapter,
-						dev_info,
-						(uint16_t)rx_queue_id);
+
+		num_intr_vec = 0;
+		if (rx_adapter->num_rx_intr > nb_rx_intr) {
+
+			num_intr_vec = rxa_nb_intr_vect(dev_info,
+						rx_queue_id, 0);
+			ret = rxa_del_intr_queue(rx_adapter, dev_info,
+					rx_queue_id);
+			if (ret)
+				goto unlock_ret;
+		}
+
+		if (nb_rx_intr == 0) {
+			ret = rxa_free_intr_resources(rx_adapter);
+			if (ret)
+				goto unlock_ret;
+		}
+
+		rxa_sw_del(rx_adapter, dev_info, rx_queue_id);
+		rxa_calc_wrr_sequence(rx_adapter, rx_poll, rx_wrr);
+
+		rte_free(rx_adapter->eth_rx_poll);
+		rte_free(rx_adapter->wrr_sched);
+
+		if (nb_rx_intr == 0) {
+			rte_free(dev_info->intr_queue);
+			dev_info->intr_queue = NULL;
 		}
 
-		rc = eth_poll_wrr_calc(rx_adapter);
-		if (rc)
-			RTE_EDEV_LOG_ERR("WRR recalculation failed %" PRId32,
-					rc);
+		rx_adapter->eth_rx_poll = rx_poll;
+		rx_adapter->wrr_sched = rx_wrr;
+		rx_adapter->wrr_len = nb_wrr;
+		rx_adapter->num_intr_vec += num_intr_vec;
 
 		if (dev_info->nb_dev_queues == 0) {
 			rte_free(dev_info->rx_queue);
 			dev_info->rx_queue = NULL;
 		}
-
+unlock_ret:
 		rte_spinlock_unlock(&rx_adapter->rx_lock);
+		if (ret) {
+			rte_free(rx_poll);
+			rte_free(rx_wrr);
+			return ret;
+		}
+
 		rte_service_component_runstate_set(rx_adapter->service_id,
-				sw_rx_adapter_queue_count(rx_adapter));
+				rxa_sw_adapter_queue_count(rx_adapter));
 	}
 
 	return ret;
 }
 
-
 int
 rte_event_eth_rx_adapter_start(uint8_t id)
 {
-	return rx_adapter_ctrl(id, 1);
+	return rxa_ctrl(id, 1);
 }
 
 int
 rte_event_eth_rx_adapter_stop(uint8_t id)
 {
-	return rx_adapter_ctrl(id, 0);
+	return rxa_ctrl(id, 0);
 }
 
 int
@@ -1184,7 +2312,7 @@ rte_event_eth_rx_adapter_stats_get(uint8_t id,
 
 	RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL);
 
-	rx_adapter = id_to_rx_adapter(id);
+	rx_adapter = rxa_id_to_adapter(id);
 	if (rx_adapter  == NULL || stats == NULL)
 		return -EINVAL;
 
@@ -1222,7 +2350,7 @@ rte_event_eth_rx_adapter_stats_reset(uint8_t id)
 
 	RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL);
 
-	rx_adapter = id_to_rx_adapter(id);
+	rx_adapter = rxa_id_to_adapter(id);
 	if (rx_adapter == NULL)
 		return -EINVAL;
 
@@ -1247,7 +2375,7 @@ rte_event_eth_rx_adapter_service_id_get(uint8_t id, uint32_t *service_id)
 
 	RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL);
 
-	rx_adapter = id_to_rx_adapter(id);
+	rx_adapter = rxa_id_to_adapter(id);
 	if (rx_adapter == NULL || service_id == NULL)
 		return -EINVAL;
 
@@ -1256,3 +2384,47 @@ rte_event_eth_rx_adapter_service_id_get(uint8_t id, uint32_t *service_id)
 
 	return rx_adapter->service_inited ? 0 : -ESRCH;
 }
+
+int rte_event_eth_rx_adapter_cb_register(uint8_t id,
+					uint16_t eth_dev_id,
+					rte_event_eth_rx_adapter_cb_fn cb_fn,
+					void *cb_arg)
+{
+	struct rte_event_eth_rx_adapter *rx_adapter;
+	struct eth_device_info *dev_info;
+	uint32_t cap;
+	int ret;
+
+	RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL);
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(eth_dev_id, -EINVAL);
+
+	rx_adapter = rxa_id_to_adapter(id);
+	if (rx_adapter == NULL)
+		return -EINVAL;
+
+	dev_info = &rx_adapter->eth_devices[eth_dev_id];
+	if (dev_info->rx_queue == NULL)
+		return -EINVAL;
+
+	ret = rte_event_eth_rx_adapter_caps_get(rx_adapter->eventdev_id,
+						eth_dev_id,
+						&cap);
+	if (ret) {
+		RTE_EDEV_LOG_ERR("Failed to get adapter caps edev %" PRIu8
+			"eth port %" PRIu16, id, eth_dev_id);
+		return ret;
+	}
+
+	if (cap & RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT) {
+		RTE_EDEV_LOG_ERR("Rx callback not supported for eth port %"
+				PRIu16, eth_dev_id);
+		return -EINVAL;
+	}
+
+	rte_spinlock_lock(&rx_adapter->rx_lock);
+	dev_info->cb_fn = cb_fn;
+	dev_info->cb_arg = cb_arg;
+	rte_spinlock_unlock(&rx_adapter->rx_lock);
+
+	return 0;
+}
diff --git a/lib/librte_eventdev/rte_event_eth_rx_adapter.h b/lib/librte_eventdev/rte_event_eth_rx_adapter.h
index 307b2b50..332ee216 100644
--- a/lib/librte_eventdev/rte_event_eth_rx_adapter.h
+++ b/lib/librte_eventdev/rte_event_eth_rx_adapter.h
@@ -63,9 +63,22 @@
  * rte_event_eth_rx_adapter_service_id_get() function can be used to retrieve
  * the service function ID of the adapter in this case.
  *
+ * For SW based packet transfers, i.e., when the
+ * RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT is not set in the adapter's
+ * capabilities flags for a particular ethernet device, the service function
+ * temporarily enqueues mbufs to an event buffer before batch enqueueing these
+ * to the event device. If the buffer fills up, the service function stops
+ * dequeueing packets from the ethernet device. The application may want to
+ * monitor the buffer fill level and instruct the service function to
+ * selectively buffer packets. The application may also use some other
+ * criteria to decide which packets should enter the event device even when
+ * the event buffer fill level is low. The
+ * rte_event_eth_rx_adapter_cb_register() function allows the
+ * application to register a callback that selects which packets to enqueue
+ * to the event device.
+ *
  * Note:
- * 1) Interrupt driven receive queues are currently unimplemented.
- * 2) Devices created after an instance of rte_event_eth_rx_adapter_create
+ * 1) Devices created after an instance of rte_event_eth_rx_adapter_create
  *  should be added to a new instance of the rx adapter.
  */
 
@@ -199,12 +212,55 @@ struct rte_event_eth_rx_adapter_stats {
 	 * block cycles can be used to compute the percentage of
 	 * cycles the service is blocked by the event device.
 	 */
+	uint64_t rx_intr_packets;
+	/**< Received packet count for interrupt mode Rx queues */
 };
 
 /**
  * @warning
  * @b EXPERIMENTAL: this API may change without prior notice
  *
+ * Callback function invoked by the SW adapter before it continues
+ * to process packets. The callback is passed the size of the enqueue
+ * buffer in the SW adapter and the occupancy of the buffer. The
+ * callback can use these values to decide which mbufs should be
+ * enqueued to the event device. If the return value of the callback
+ * is less than nb_mbuf then the SW adapter uses the return value to
+ * enqueue enq_mbuf[] to the event device.
+ *
+ * @param eth_dev_id
+ *  Port identifier of the Ethernet device.
+ * @param queue_id
+ *  Receive queue index.
+ * @param enqueue_buf_size
+ *  Total enqueue buffer size.
+ * @param enqueue_buf_count
+ *  mbuf count in enqueue buffer.
+ * @param mbuf
+ *  mbuf array.
+ * @param nb_mbuf
+ *  mbuf count.
+ * @param cb_arg
+ *  Callback argument.
+ * @param[out] enq_mbuf
+ *  The adapter enqueues enq_mbuf[] if the return value of the
+ *  callback is less than nb_mbuf
+ * @return
+ *  Returns the number of mbufs should be enqueued to eventdev
+ */
+typedef uint16_t (*rte_event_eth_rx_adapter_cb_fn)(uint16_t eth_dev_id,
+						uint16_t queue_id,
+						uint32_t enqueue_buf_size,
+						uint32_t enqueue_buf_count,
+						struct rte_mbuf **mbuf,
+						uint16_t nb_mbuf,
+						void *cb_arg,
+						struct rte_mbuf **enq_buf);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
  * Create a new ethernet Rx event adapter with the specified identifier.
  *
  * @param id
@@ -425,6 +481,32 @@ int rte_event_eth_rx_adapter_stats_reset(uint8_t id);
  */
 int rte_event_eth_rx_adapter_service_id_get(uint8_t id, uint32_t *service_id);
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Register callback to process Rx packets, this is supported for
+ * SW based packet transfers.
+ * @see rte_event_eth_rx_cb_fn
+ *
+ * @param id
+ *  Adapter identifier.
+ * @param eth_dev_id
+ *  Port identifier of Ethernet device.
+ * @param cb_fn
+ *  Callback function.
+ * @param cb_arg
+ *  Callback arg.
+ * @return
+ *  - 0: Success
+ *  - <0: Error code on failure.
+ */
+int __rte_experimental
+rte_event_eth_rx_adapter_cb_register(uint8_t id,
+				uint16_t eth_dev_id,
+				rte_event_eth_rx_adapter_cb_fn cb_fn,
+				void *cb_arg);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_eventdev/rte_event_ring.c b/lib/librte_eventdev/rte_event_ring.c
index eb67751d..16d02a95 100644
--- a/lib/librte_eventdev/rte_event_ring.c
+++ b/lib/librte_eventdev/rte_event_ring.c
@@ -82,11 +82,16 @@ rte_event_ring_create(const char *name, unsigned int count, int socket_id,
 	mz = rte_memzone_reserve(mz_name, ring_size, socket_id, mz_flags);
 	if (mz != NULL) {
 		r = mz->addr;
-		/*
-		 * no need to check return value here, we already checked the
-		 * arguments above
-		 */
-		rte_event_ring_init(r, name, requested_count, flags);
+		/* Check return value in case rte_ring_init() fails on size */
+		int err = rte_event_ring_init(r, name, requested_count, flags);
+		if (err) {
+			RTE_LOG(ERR, RING, "Ring init failed\n");
+			if (rte_memzone_free(mz) != 0)
+				RTE_LOG(ERR, RING, "Cannot free memzone\n");
+			rte_free(te);
+			rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+			return NULL;
+		}
 
 		te->data = (void *) r;
 		r->r.memzone = mz;
diff --git a/lib/librte_eventdev/rte_event_timer_adapter.c b/lib/librte_eventdev/rte_event_timer_adapter.c
index 6f1d672c..79070d48 100644
--- a/lib/librte_eventdev/rte_event_timer_adapter.c
+++ b/lib/librte_eventdev/rte_event_timer_adapter.c
@@ -1282,9 +1282,7 @@ static const struct rte_event_timer_adapter_ops sw_event_adapter_timer_ops = {
 	.cancel_burst = sw_event_timer_cancel_burst,
 };
 
-RTE_INIT(event_timer_adapter_init_log);
-static void
-event_timer_adapter_init_log(void)
+RTE_INIT(event_timer_adapter_init_log)
 {
 	evtim_logtype = rte_log_register("lib.eventdev.adapter.timer");
 	if (evtim_logtype >= 0)
diff --git a/lib/librte_eventdev/rte_eventdev.c b/lib/librte_eventdev/rte_eventdev.c
index 7ca9fd14..801810ed 100644
--- a/lib/librte_eventdev/rte_eventdev.c
+++ b/lib/librte_eventdev/rte_eventdev.c
@@ -57,16 +57,21 @@ int
 rte_event_dev_get_dev_id(const char *name)
 {
 	int i;
+	uint8_t cmp;
 
 	if (!name)
 		return -EINVAL;
 
-	for (i = 0; i < rte_eventdev_globals->nb_devs; i++)
-		if ((strcmp(rte_event_devices[i].data->name, name)
-				== 0) &&
-				(rte_event_devices[i].attached ==
-						RTE_EVENTDEV_ATTACHED))
+	for (i = 0; i < rte_eventdev_globals->nb_devs; i++) {
+		cmp = (strncmp(rte_event_devices[i].data->name, name,
+				RTE_EVENTDEV_NAME_MAX_LEN) == 0) ||
+			(rte_event_devices[i].dev ? (strncmp(
+				rte_event_devices[i].dev->driver->name, name,
+					 RTE_EVENTDEV_NAME_MAX_LEN) == 0) : 0);
+		if (cmp && (rte_event_devices[i].attached ==
+					RTE_EVENTDEV_ATTACHED))
 			return i;
+	}
 	return -ENODEV;
 }
 
diff --git a/lib/librte_eventdev/rte_eventdev_version.map b/lib/librte_eventdev/rte_eventdev_version.map
index c3f18d6d..12835e9f 100644
--- a/lib/librte_eventdev/rte_eventdev_version.map
+++ b/lib/librte_eventdev/rte_eventdev_version.map
@@ -83,6 +83,19 @@ DPDK_18.05 {
 EXPERIMENTAL {
 	global:
 
+	rte_event_crypto_adapter_caps_get;
+	rte_event_crypto_adapter_create;
+	rte_event_crypto_adapter_create_ext;
+	rte_event_crypto_adapter_event_port_get;
+	rte_event_crypto_adapter_free;
+	rte_event_crypto_adapter_queue_pair_add;
+	rte_event_crypto_adapter_queue_pair_del;
+	rte_event_crypto_adapter_service_id_get;
+	rte_event_crypto_adapter_start;
+	rte_event_crypto_adapter_stats_get;
+	rte_event_crypto_adapter_stats_reset;
+	rte_event_crypto_adapter_stop;
+	rte_event_eth_rx_adapter_cb_register;
 	rte_event_timer_adapter_caps_get;
 	rte_event_timer_adapter_create;
 	rte_event_timer_adapter_create_ext;
@@ -97,16 +110,4 @@ EXPERIMENTAL {
 	rte_event_timer_arm_burst;
 	rte_event_timer_arm_tmo_tick_burst;
 	rte_event_timer_cancel_burst;
-	rte_event_crypto_adapter_caps_get;
-	rte_event_crypto_adapter_create;
-	rte_event_crypto_adapter_create_ext;
-	rte_event_crypto_adapter_event_port_get;
-	rte_event_crypto_adapter_free;
-	rte_event_crypto_adapter_queue_pair_add;
-	rte_event_crypto_adapter_queue_pair_del;
-	rte_event_crypto_adapter_service_id_get;
-	rte_event_crypto_adapter_start;
-	rte_event_crypto_adapter_stats_get;
-	rte_event_crypto_adapter_stats_reset;
-	rte_event_crypto_adapter_stop;
 };
diff --git a/lib/librte_flow_classify/rte_flow_classify.c b/lib/librte_flow_classify/rte_flow_classify.c
index 591d98e2..4c3469da 100644
--- a/lib/librte_flow_classify/rte_flow_classify.c
+++ b/lib/librte_flow_classify/rte_flow_classify.c
@@ -673,10 +673,7 @@ rte_flow_classifier_query(struct rte_flow_classifier *cls,
 	return ret;
 }
 
-RTE_INIT(librte_flow_classify_init_log);
-
-static void
-librte_flow_classify_init_log(void)
+RTE_INIT(librte_flow_classify_init_log)
 {
 	librte_flow_classify_logtype =
 		rte_log_register("lib.flow_classify");
diff --git a/lib/librte_gso/Makefile b/lib/librte_gso/Makefile
index 3648ec09..1fac53a8 100644
--- a/lib/librte_gso/Makefile
+++ b/lib/librte_gso/Makefile
@@ -19,6 +19,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_GSO) += rte_gso.c
 SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_common.c
 SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_tcp4.c
 SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_tunnel_tcp4.c
+SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_udp4.c
 
 # install this header file
 SYMLINK-$(CONFIG_RTE_LIBRTE_GSO)-include += rte_gso.h
diff --git a/lib/librte_gso/gso_common.h b/lib/librte_gso/gso_common.h
index 5ca59745..6cd764ff 100644
--- a/lib/librte_gso/gso_common.h
+++ b/lib/librte_gso/gso_common.h
@@ -31,6 +31,9 @@
 		(PKT_TX_TCP_SEG | PKT_TX_IPV4 | PKT_TX_OUTER_IPV4 | \
 		 PKT_TX_TUNNEL_GRE))
 
+#define IS_IPV4_UDP(flag) (((flag) & (PKT_TX_UDP_SEG | PKT_TX_IPV4)) == \
+		(PKT_TX_UDP_SEG | PKT_TX_IPV4))
+
 /**
  * Internal function which updates the UDP header of a packet, following
  * segmentation. This is required to update the header's datagram length field.
diff --git a/lib/librte_gso/gso_udp4.c b/lib/librte_gso/gso_udp4.c
new file mode 100644
index 00000000..927dee12
--- /dev/null
+++ b/lib/librte_gso/gso_udp4.c
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include "gso_common.h"
+#include "gso_udp4.h"
+
+#define IPV4_HDR_MF_BIT (1U << 13)
+
+static inline void
+update_ipv4_udp_headers(struct rte_mbuf *pkt, struct rte_mbuf **segs,
+		uint16_t nb_segs)
+{
+	struct ipv4_hdr *ipv4_hdr;
+	uint16_t frag_offset = 0, is_mf;
+	uint16_t l2_hdrlen = pkt->l2_len, l3_hdrlen = pkt->l3_len;
+	uint16_t tail_idx = nb_segs - 1, length, i;
+
+	/*
+	 * Update IP header fields for output segments. Specifically,
+	 * keep the same IP id, update fragment offset and total
+	 * length.
+	 */
+	for (i = 0; i < nb_segs; i++) {
+		ipv4_hdr = rte_pktmbuf_mtod_offset(segs[i], struct ipv4_hdr *,
+			l2_hdrlen);
+		length = segs[i]->pkt_len - l2_hdrlen;
+		ipv4_hdr->total_length = rte_cpu_to_be_16(length);
+
+		is_mf = i < tail_idx ? IPV4_HDR_MF_BIT : 0;
+		ipv4_hdr->fragment_offset =
+			rte_cpu_to_be_16(frag_offset | is_mf);
+		frag_offset += ((length - l3_hdrlen) >> 3);
+	}
+}
+
+int
+gso_udp4_segment(struct rte_mbuf *pkt,
+		uint16_t gso_size,
+		struct rte_mempool *direct_pool,
+		struct rte_mempool *indirect_pool,
+		struct rte_mbuf **pkts_out,
+		uint16_t nb_pkts_out)
+{
+	struct ipv4_hdr *ipv4_hdr;
+	uint16_t pyld_unit_size, hdr_offset;
+	uint16_t frag_off;
+	int ret;
+
+	/* Don't process the fragmented packet */
+	ipv4_hdr = rte_pktmbuf_mtod_offset(pkt, struct ipv4_hdr *,
+			pkt->l2_len);
+	frag_off = rte_be_to_cpu_16(ipv4_hdr->fragment_offset);
+	if (unlikely(IS_FRAGMENTED(frag_off))) {
+		pkts_out[0] = pkt;
+		return 1;
+	}
+
+	/*
+	 * UDP fragmentation is the same as IP fragmentation.
+	 * Except the first one, other output packets just have l2
+	 * and l3 headers.
+	 */
+	hdr_offset = pkt->l2_len + pkt->l3_len;
+
+	/* Don't process the packet without data. */
+	if (unlikely(hdr_offset + pkt->l4_len >= pkt->pkt_len)) {
+		pkts_out[0] = pkt;
+		return 1;
+	}
+
+	pyld_unit_size = gso_size - hdr_offset;
+
+	/* Segment the payload */
+	ret = gso_do_segment(pkt, hdr_offset, pyld_unit_size, direct_pool,
+			indirect_pool, pkts_out, nb_pkts_out);
+	if (ret > 1)
+		update_ipv4_udp_headers(pkt, pkts_out, ret);
+
+	return ret;
+}
diff --git a/lib/librte_gso/gso_udp4.h b/lib/librte_gso/gso_udp4.h
new file mode 100644
index 00000000..b2a2908e
--- /dev/null
+++ b/lib/librte_gso/gso_udp4.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _GSO_UDP4_H_
+#define _GSO_UDP4_H_
+
+#include <stdint.h>
+#include <rte_mbuf.h>
+
+/**
+ * Segment an UDP/IPv4 packet. This function doesn't check if the input
+ * packet has correct checksums, and doesn't update checksums for output
+ * GSO segments. Furthermore, it doesn't process IP fragment packets.
+ *
+ * @param pkt
+ *  The packet mbuf to segment.
+ * @param gso_size
+ *  The max length of a GSO segment, measured in bytes.
+ * @param direct_pool
+ *  MBUF pool used for allocating direct buffers for output segments.
+ * @param indirect_pool
+ *  MBUF pool used for allocating indirect buffers for output segments.
+ * @param pkts_out
+ *  Pointer array used to store the MBUF addresses of output GSO
+ *  segments, when the function succeeds. If the memory space in
+ *  pkts_out is insufficient, it fails and returns -EINVAL.
+ * @param nb_pkts_out
+ *  The max number of items that 'pkts_out' can keep.
+ *
+ * @return
+ *   - The number of GSO segments filled in pkts_out on success.
+ *   - Return -ENOMEM if run out of memory in MBUF pools.
+ *   - Return -EINVAL for invalid parameters.
+ */
+int gso_udp4_segment(struct rte_mbuf *pkt,
+		uint16_t gso_size,
+		struct rte_mempool *direct_pool,
+		struct rte_mempool *indirect_pool,
+		struct rte_mbuf **pkts_out,
+		uint16_t nb_pkts_out);
+#endif
diff --git a/lib/librte_gso/meson.build b/lib/librte_gso/meson.build
index 056534fb..ad8dd858 100644
--- a/lib/librte_gso/meson.build
+++ b/lib/librte_gso/meson.build
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
-sources = files('gso_common.c', 'gso_tcp4.c',
+sources = files('gso_common.c', 'gso_tcp4.c', 'gso_udp4.c',
  		'gso_tunnel_tcp4.c', 'rte_gso.c')
 headers = files('rte_gso.h')
 deps += ['ethdev']
diff --git a/lib/librte_gso/rte_gso.c b/lib/librte_gso/rte_gso.c
index a44e3d43..751b5b62 100644
--- a/lib/librte_gso/rte_gso.c
+++ b/lib/librte_gso/rte_gso.c
@@ -11,6 +11,17 @@
 #include "gso_common.h"
 #include "gso_tcp4.h"
 #include "gso_tunnel_tcp4.h"
+#include "gso_udp4.h"
+
+#define ILLEGAL_UDP_GSO_CTX(ctx) \
+	((((ctx)->gso_types & DEV_TX_OFFLOAD_UDP_TSO) == 0) || \
+	 (ctx)->gso_size < RTE_GSO_UDP_SEG_SIZE_MIN)
+
+#define ILLEGAL_TCP_GSO_CTX(ctx) \
+	((((ctx)->gso_types & (DEV_TX_OFFLOAD_TCP_TSO | \
+		DEV_TX_OFFLOAD_VXLAN_TNL_TSO | \
+		DEV_TX_OFFLOAD_GRE_TNL_TSO)) == 0) || \
+		(ctx)->gso_size < RTE_GSO_SEG_SIZE_MIN)
 
 int
 rte_gso_segment(struct rte_mbuf *pkt,
@@ -27,14 +38,12 @@ rte_gso_segment(struct rte_mbuf *pkt,
 
 	if (pkt == NULL || pkts_out == NULL || gso_ctx == NULL ||
 			nb_pkts_out < 1 ||
-			gso_ctx->gso_size < RTE_GSO_SEG_SIZE_MIN ||
-			((gso_ctx->gso_types & (DEV_TX_OFFLOAD_TCP_TSO |
-			DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
-			DEV_TX_OFFLOAD_GRE_TNL_TSO)) == 0))
+			(ILLEGAL_UDP_GSO_CTX(gso_ctx) &&
+			 ILLEGAL_TCP_GSO_CTX(gso_ctx)))
 		return -EINVAL;
 
 	if (gso_ctx->gso_size >= pkt->pkt_len) {
-		pkt->ol_flags &= (~PKT_TX_TCP_SEG);
+		pkt->ol_flags &= (~(PKT_TX_TCP_SEG | PKT_TX_UDP_SEG));
 		pkts_out[0] = pkt;
 		return 1;
 	}
@@ -59,6 +68,11 @@ rte_gso_segment(struct rte_mbuf *pkt,
 		ret = gso_tcp4_segment(pkt, gso_size, ipid_delta,
 				direct_pool, indirect_pool,
 				pkts_out, nb_pkts_out);
+	} else if (IS_IPV4_UDP(pkt->ol_flags) &&
+			(gso_ctx->gso_types & DEV_TX_OFFLOAD_UDP_TSO)) {
+		pkt->ol_flags &= (~PKT_TX_UDP_SEG);
+		ret = gso_udp4_segment(pkt, gso_size, direct_pool,
+				indirect_pool, pkts_out, nb_pkts_out);
 	} else {
 		/* unsupported packet, skip */
 		pkts_out[0] = pkt;
diff --git a/lib/librte_gso/rte_gso.h b/lib/librte_gso/rte_gso.h
index f4abd61c..a626a11e 100644
--- a/lib/librte_gso/rte_gso.h
+++ b/lib/librte_gso/rte_gso.h
@@ -17,10 +17,14 @@ extern "C" {
 #include <stdint.h>
 #include <rte_mbuf.h>
 
-/* Minimum GSO segment size. */
+/* Minimum GSO segment size for TCP based packets. */
 #define RTE_GSO_SEG_SIZE_MIN (sizeof(struct ether_hdr) + \
 		sizeof(struct ipv4_hdr) + sizeof(struct tcp_hdr) + 1)
 
+/* Minimum GSO segment size for UDP based packets. */
+#define RTE_GSO_UDP_SEG_SIZE_MIN (sizeof(struct ether_hdr) + \
+		sizeof(struct ipv4_hdr) + sizeof(struct udp_hdr) + 1)
+
 /* GSO flags for rte_gso_ctx. */
 #define RTE_GSO_FLAG_IPID_FIXED (1ULL << 0)
 /**< Use fixed IP ids for output GSO segments. Setting
diff --git a/lib/librte_hash/meson.build b/lib/librte_hash/meson.build
index e139e1d7..efc06ede 100644
--- a/lib/librte_hash/meson.build
+++ b/lib/librte_hash/meson.build
@@ -6,7 +6,6 @@ headers = files('rte_cmp_arm64.h',
 	'rte_cmp_x86.h',
 	'rte_crc_arm64.h',
 	'rte_cuckoo_hash.h',
-	'rte_cuckoo_hash_x86.h',
 	'rte_fbk_hash.h',
 	'rte_hash_crc.h',
 	'rte_hash.h',
diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c
index a07543a2..f7b86c8c 100644
--- a/lib/librte_hash/rte_cuckoo_hash.c
+++ b/lib/librte_hash/rte_cuckoo_hash.c
@@ -31,9 +31,6 @@
 #include "rte_hash.h"
 #include "rte_cuckoo_hash.h"
 
-#if defined(RTE_ARCH_X86)
-#include "rte_cuckoo_hash_x86.h"
-#endif
 
 TAILQ_HEAD(rte_hash_list, rte_tailq_entry);
 
@@ -93,8 +90,10 @@ rte_hash_create(const struct rte_hash_parameters *params)
 	void *buckets = NULL;
 	char ring_name[RTE_RING_NAMESIZE];
 	unsigned num_key_slots;
-	unsigned hw_trans_mem_support = 0;
 	unsigned i;
+	unsigned int hw_trans_mem_support = 0, multi_writer_support = 0;
+	unsigned int readwrite_concur_support = 0;
+
 	rte_hash_function default_hash_func = (rte_hash_function)rte_jhash;
 
 	hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);
@@ -107,7 +106,6 @@ rte_hash_create(const struct rte_hash_parameters *params)
 	/* Check for valid parameters */
 	if ((params->entries > RTE_HASH_ENTRIES_MAX) ||
 			(params->entries < RTE_HASH_BUCKET_ENTRIES) ||
-			!rte_is_power_of_2(RTE_HASH_BUCKET_ENTRIES) ||
 			(params->key_len == 0)) {
 		rte_errno = EINVAL;
 		RTE_LOG(ERR, HASH, "rte_hash_create has invalid parameters\n");
@@ -118,21 +116,29 @@ rte_hash_create(const struct rte_hash_parameters *params)
 	if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT)
 		hw_trans_mem_support = 1;
 
+	if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD)
+		multi_writer_support = 1;
+
+	if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY) {
+		readwrite_concur_support = 1;
+		multi_writer_support = 1;
+	}
+
 	/* Store all keys and leave the first entry as a dummy entry for lookup_bulk */
-	if (hw_trans_mem_support)
+	if (multi_writer_support)
 		/*
 		 * Increase number of slots by total number of indices
 		 * that can be stored in the lcore caches
 		 * except for the first cache
 		 */
 		num_key_slots = params->entries + (RTE_MAX_LCORE - 1) *
-					LCORE_CACHE_SIZE + 1;
+					(LCORE_CACHE_SIZE - 1) + 1;
 	else
 		num_key_slots = params->entries + 1;
 
 	snprintf(ring_name, sizeof(ring_name), "HT_%s", params->name);
 	/* Create ring (Dummy slot index is not enqueued) */
-	r = rte_ring_create(ring_name, rte_align32pow2(num_key_slots - 1),
+	r = rte_ring_create(ring_name, rte_align32pow2(num_key_slots),
 			params->socket_id, 0);
 	if (r == NULL) {
 		RTE_LOG(ERR, HASH, "memory allocation failed\n");
@@ -233,7 +239,7 @@ rte_hash_create(const struct rte_hash_parameters *params)
 	h->cmp_jump_table_idx = KEY_OTHER_BYTES;
 #endif
 
-	if (hw_trans_mem_support) {
+	if (multi_writer_support) {
 		h->local_free_slots = rte_zmalloc_socket(NULL,
 				sizeof(struct lcore_cache) * RTE_MAX_LCORE,
 				RTE_CACHE_LINE_SIZE, params->socket_id);
@@ -261,6 +267,8 @@ rte_hash_create(const struct rte_hash_parameters *params)
 	h->key_store = k;
 	h->free_slots = r;
 	h->hw_trans_mem_support = hw_trans_mem_support;
+	h->multi_writer_support = multi_writer_support;
+	h->readwrite_concur_support = readwrite_concur_support;
 
 #if defined(RTE_ARCH_X86)
 	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
@@ -271,24 +279,20 @@ rte_hash_create(const struct rte_hash_parameters *params)
 #endif
 		h->sig_cmp_fn = RTE_HASH_COMPARE_SCALAR;
 
-	/* Turn on multi-writer only with explicit flat from user and TM
+	/* Turn on multi-writer only with explicit flag from user and TM
 	 * support.
 	 */
-	if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD) {
-		if (h->hw_trans_mem_support) {
-			h->add_key = ADD_KEY_MULTIWRITER_TM;
-		} else {
-			h->add_key = ADD_KEY_MULTIWRITER;
-			h->multiwriter_lock = rte_malloc(NULL,
-							sizeof(rte_spinlock_t),
-							LCORE_CACHE_SIZE);
-			rte_spinlock_init(h->multiwriter_lock);
-		}
-	} else
-		h->add_key = ADD_KEY_SINGLEWRITER;
+	if (h->multi_writer_support) {
+		h->readwrite_lock = rte_malloc(NULL, sizeof(rte_rwlock_t),
+						RTE_CACHE_LINE_SIZE);
+		if (h->readwrite_lock == NULL)
+			goto err_unlock;
+
+		rte_rwlock_init(h->readwrite_lock);
+	}
 
 	/* Populate free slots ring. Entry zero is reserved for key misses. */
-	for (i = 1; i < params->entries + 1; i++)
+	for (i = 1; i < num_key_slots; i++)
 		rte_ring_sp_enqueue(r, (void *)((uintptr_t) i));
 
 	te->data = (void *) h;
@@ -335,11 +339,10 @@ rte_hash_free(struct rte_hash *h)
 
 	rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
 
-	if (h->hw_trans_mem_support)
+	if (h->multi_writer_support) {
 		rte_free(h->local_free_slots);
-
-	if (h->add_key == ADD_KEY_MULTIWRITER)
-		rte_free(h->multiwriter_lock);
+		rte_free(h->readwrite_lock);
+	}
 	rte_ring_free(h->free_slots);
 	rte_free(h->key_store);
 	rte_free(h->buckets);
@@ -366,15 +369,78 @@ rte_hash_secondary_hash(const hash_sig_t primary_hash)
 	return primary_hash ^ ((tag + 1) * alt_bits_xor);
 }
 
+int32_t
+rte_hash_count(const struct rte_hash *h)
+{
+	uint32_t tot_ring_cnt, cached_cnt = 0;
+	uint32_t i, ret;
+
+	if (h == NULL)
+		return -EINVAL;
+
+	if (h->multi_writer_support) {
+		tot_ring_cnt = h->entries + (RTE_MAX_LCORE - 1) *
+					(LCORE_CACHE_SIZE - 1);
+		for (i = 0; i < RTE_MAX_LCORE; i++)
+			cached_cnt += h->local_free_slots[i].len;
+
+		ret = tot_ring_cnt - rte_ring_count(h->free_slots) -
+								cached_cnt;
+	} else {
+		tot_ring_cnt = h->entries;
+		ret = tot_ring_cnt - rte_ring_count(h->free_slots);
+	}
+	return ret;
+}
+
+/* Read write locks implemented using rte_rwlock */
+static inline void
+__hash_rw_writer_lock(const struct rte_hash *h)
+{
+	if (h->multi_writer_support && h->hw_trans_mem_support)
+		rte_rwlock_write_lock_tm(h->readwrite_lock);
+	else if (h->multi_writer_support)
+		rte_rwlock_write_lock(h->readwrite_lock);
+}
+
+
+static inline void
+__hash_rw_reader_lock(const struct rte_hash *h)
+{
+	if (h->readwrite_concur_support && h->hw_trans_mem_support)
+		rte_rwlock_read_lock_tm(h->readwrite_lock);
+	else if (h->readwrite_concur_support)
+		rte_rwlock_read_lock(h->readwrite_lock);
+}
+
+static inline void
+__hash_rw_writer_unlock(const struct rte_hash *h)
+{
+	if (h->multi_writer_support && h->hw_trans_mem_support)
+		rte_rwlock_write_unlock_tm(h->readwrite_lock);
+	else if (h->multi_writer_support)
+		rte_rwlock_write_unlock(h->readwrite_lock);
+}
+
+static inline void
+__hash_rw_reader_unlock(const struct rte_hash *h)
+{
+	if (h->readwrite_concur_support && h->hw_trans_mem_support)
+		rte_rwlock_read_unlock_tm(h->readwrite_lock);
+	else if (h->readwrite_concur_support)
+		rte_rwlock_read_unlock(h->readwrite_lock);
+}
+
 void
 rte_hash_reset(struct rte_hash *h)
 {
 	void *ptr;
-	unsigned i;
+	uint32_t tot_ring_cnt, i;
 
 	if (h == NULL)
 		return;
 
+	__hash_rw_writer_lock(h);
 	memset(h->buckets, 0, h->num_buckets * sizeof(struct rte_hash_bucket));
 	memset(h->key_store, 0, h->key_entry_size * (h->entries + 1));
 
@@ -383,97 +449,260 @@ rte_hash_reset(struct rte_hash *h)
 		rte_pause();
 
 	/* Repopulate the free slots ring. Entry zero is reserved for key misses */
-	for (i = 1; i < h->entries + 1; i++)
+	if (h->multi_writer_support)
+		tot_ring_cnt = h->entries + (RTE_MAX_LCORE - 1) *
+					(LCORE_CACHE_SIZE - 1);
+	else
+		tot_ring_cnt = h->entries;
+
+	for (i = 1; i < tot_ring_cnt + 1; i++)
 		rte_ring_sp_enqueue(h->free_slots, (void *)((uintptr_t) i));
 
-	if (h->hw_trans_mem_support) {
+	if (h->multi_writer_support) {
 		/* Reset local caches per lcore */
 		for (i = 0; i < RTE_MAX_LCORE; i++)
 			h->local_free_slots[i].len = 0;
 	}
+	__hash_rw_writer_unlock(h);
 }
 
-/* Search for an entry that can be pushed to its alternative location */
-static inline int
-make_space_bucket(const struct rte_hash *h, struct rte_hash_bucket *bkt,
-		unsigned int *nr_pushes)
+/*
+ * Function called to enqueue back an index in the cache/ring,
+ * as slot has not being used and it can be used in the
+ * next addition attempt.
+ */
+static inline void
+enqueue_slot_back(const struct rte_hash *h,
+		struct lcore_cache *cached_free_slots,
+		void *slot_id)
 {
-	unsigned i, j;
-	int ret;
-	uint32_t next_bucket_idx;
-	struct rte_hash_bucket *next_bkt[RTE_HASH_BUCKET_ENTRIES];
+	if (h->multi_writer_support) {
+		cached_free_slots->objs[cached_free_slots->len] = slot_id;
+		cached_free_slots->len++;
+	} else
+		rte_ring_sp_enqueue(h->free_slots, slot_id);
+}
+
+/* Search a key from bucket and update its data */
+static inline int32_t
+search_and_update(const struct rte_hash *h, void *data, const void *key,
+	struct rte_hash_bucket *bkt, hash_sig_t sig, hash_sig_t alt_hash)
+{
+	int i;
+	struct rte_hash_key *k, *keys = h->key_store;
 
-	/*
-	 * Push existing item (search for bucket with space in
-	 * alternative locations) to its alternative location
-	 */
 	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
-		/* Search for space in alternative locations */
-		next_bucket_idx = bkt->sig_alt[i] & h->bucket_bitmask;
-		next_bkt[i] = &h->buckets[next_bucket_idx];
-		for (j = 0; j < RTE_HASH_BUCKET_ENTRIES; j++) {
-			if (next_bkt[i]->key_idx[j] == EMPTY_SLOT)
-				break;
+		if (bkt->sig_current[i] == sig &&
+				bkt->sig_alt[i] == alt_hash) {
+			k = (struct rte_hash_key *) ((char *)keys +
+					bkt->key_idx[i] * h->key_entry_size);
+			if (rte_hash_cmp_eq(key, k->key, h) == 0) {
+				/* Update data */
+				k->pdata = data;
+				/*
+				 * Return index where key is stored,
+				 * subtracting the first dummy index
+				 */
+				return bkt->key_idx[i] - 1;
+			}
 		}
-
-		if (j != RTE_HASH_BUCKET_ENTRIES)
-			break;
 	}
+	return -1;
+}
 
-	/* Alternative location has spare room (end of recursive function) */
-	if (i != RTE_HASH_BUCKET_ENTRIES) {
-		next_bkt[i]->sig_alt[j] = bkt->sig_current[i];
-		next_bkt[i]->sig_current[j] = bkt->sig_alt[i];
-		next_bkt[i]->key_idx[j] = bkt->key_idx[i];
-		return i;
+/* Only tries to insert at one bucket (@prim_bkt) without trying to push
+ * buckets around.
+ * return 1 if matching existing key, return 0 if succeeds, return -1 for no
+ * empty entry.
+ */
+static inline int32_t
+rte_hash_cuckoo_insert_mw(const struct rte_hash *h,
+		struct rte_hash_bucket *prim_bkt,
+		struct rte_hash_bucket *sec_bkt,
+		const struct rte_hash_key *key, void *data,
+		hash_sig_t sig, hash_sig_t alt_hash, uint32_t new_idx,
+		int32_t *ret_val)
+{
+	unsigned int i;
+	struct rte_hash_bucket *cur_bkt = prim_bkt;
+	int32_t ret;
+
+	__hash_rw_writer_lock(h);
+	/* Check if key was inserted after last check but before this
+	 * protected region in case of inserting duplicated keys.
+	 */
+	ret = search_and_update(h, data, key, cur_bkt, sig, alt_hash);
+	if (ret != -1) {
+		__hash_rw_writer_unlock(h);
+		*ret_val = ret;
+		return 1;
+	}
+	ret = search_and_update(h, data, key, sec_bkt, alt_hash, sig);
+	if (ret != -1) {
+		__hash_rw_writer_unlock(h);
+		*ret_val = ret;
+		return 1;
 	}
 
-	/* Pick entry that has not been pushed yet */
-	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++)
-		if (bkt->flag[i] == 0)
+	/* Insert new entry if there is room in the primary
+	 * bucket.
+	 */
+	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
+		/* Check if slot is available */
+		if (likely(prim_bkt->key_idx[i] == EMPTY_SLOT)) {
+			prim_bkt->sig_current[i] = sig;
+			prim_bkt->sig_alt[i] = alt_hash;
+			prim_bkt->key_idx[i] = new_idx;
 			break;
+		}
+	}
+	__hash_rw_writer_unlock(h);
 
-	/* All entries have been pushed, so entry cannot be added */
-	if (i == RTE_HASH_BUCKET_ENTRIES || ++(*nr_pushes) > RTE_HASH_MAX_PUSHES)
-		return -ENOSPC;
+	if (i != RTE_HASH_BUCKET_ENTRIES)
+		return 0;
 
-	/* Set flag to indicate that this entry is going to be pushed */
-	bkt->flag[i] = 1;
+	/* no empty entry */
+	return -1;
+}
 
-	/* Need room in alternative bucket to insert the pushed entry */
-	ret = make_space_bucket(h, next_bkt[i], nr_pushes);
-	/*
-	 * After recursive function.
-	 * Clear flags and insert the pushed entry
-	 * in its alternative location if successful,
-	 * or return error
+/* Shift buckets along provided cuckoo_path (@leaf and @leaf_slot) and fill
+ * the path head with new entry (sig, alt_hash, new_idx)
+ * return 1 if matched key found, return -1 if cuckoo path invalided and fail,
+ * return 0 if succeeds.
+ */
+static inline int
+rte_hash_cuckoo_move_insert_mw(const struct rte_hash *h,
+			struct rte_hash_bucket *bkt,
+			struct rte_hash_bucket *alt_bkt,
+			const struct rte_hash_key *key, void *data,
+			struct queue_node *leaf, uint32_t leaf_slot,
+			hash_sig_t sig, hash_sig_t alt_hash, uint32_t new_idx,
+			int32_t *ret_val)
+{
+	uint32_t prev_alt_bkt_idx;
+	struct rte_hash_bucket *cur_bkt = bkt;
+	struct queue_node *prev_node, *curr_node = leaf;
+	struct rte_hash_bucket *prev_bkt, *curr_bkt = leaf->bkt;
+	uint32_t prev_slot, curr_slot = leaf_slot;
+	int32_t ret;
+
+	__hash_rw_writer_lock(h);
+
+	/* In case empty slot was gone before entering protected region */
+	if (curr_bkt->key_idx[curr_slot] != EMPTY_SLOT) {
+		__hash_rw_writer_unlock(h);
+		return -1;
+	}
+
+	/* Check if key was inserted after last check but before this
+	 * protected region.
 	 */
-	bkt->flag[i] = 0;
-	if (ret >= 0) {
-		next_bkt[i]->sig_alt[ret] = bkt->sig_current[i];
-		next_bkt[i]->sig_current[ret] = bkt->sig_alt[i];
-		next_bkt[i]->key_idx[ret] = bkt->key_idx[i];
-		return i;
-	} else
-		return ret;
+	ret = search_and_update(h, data, key, cur_bkt, sig, alt_hash);
+	if (ret != -1) {
+		__hash_rw_writer_unlock(h);
+		*ret_val = ret;
+		return 1;
+	}
+
+	ret = search_and_update(h, data, key, alt_bkt, alt_hash, sig);
+	if (ret != -1) {
+		__hash_rw_writer_unlock(h);
+		*ret_val = ret;
+		return 1;
+	}
+
+	while (likely(curr_node->prev != NULL)) {
+		prev_node = curr_node->prev;
+		prev_bkt = prev_node->bkt;
+		prev_slot = curr_node->prev_slot;
+
+		prev_alt_bkt_idx =
+			prev_bkt->sig_alt[prev_slot] & h->bucket_bitmask;
+
+		if (unlikely(&h->buckets[prev_alt_bkt_idx]
+				!= curr_bkt)) {
+			/* revert it to empty, otherwise duplicated keys */
+			curr_bkt->key_idx[curr_slot] = EMPTY_SLOT;
+			__hash_rw_writer_unlock(h);
+			return -1;
+		}
+
+		/* Need to swap current/alt sig to allow later
+		 * Cuckoo insert to move elements back to its
+		 * primary bucket if available
+		 */
+		curr_bkt->sig_alt[curr_slot] =
+			 prev_bkt->sig_current[prev_slot];
+		curr_bkt->sig_current[curr_slot] =
+			prev_bkt->sig_alt[prev_slot];
+		curr_bkt->key_idx[curr_slot] =
+			prev_bkt->key_idx[prev_slot];
+
+		curr_slot = prev_slot;
+		curr_node = prev_node;
+		curr_bkt = curr_node->bkt;
+	}
+
+	curr_bkt->sig_current[curr_slot] = sig;
+	curr_bkt->sig_alt[curr_slot] = alt_hash;
+	curr_bkt->key_idx[curr_slot] = new_idx;
+
+	__hash_rw_writer_unlock(h);
+
+	return 0;
 
 }
 
 /*
- * Function called to enqueue back an index in the cache/ring,
- * as slot has not being used and it can be used in the
- * next addition attempt.
+ * Make space for new key, using bfs Cuckoo Search and Multi-Writer safe
+ * Cuckoo
  */
-static inline void
-enqueue_slot_back(const struct rte_hash *h,
-		struct lcore_cache *cached_free_slots,
-		void *slot_id)
+static inline int
+rte_hash_cuckoo_make_space_mw(const struct rte_hash *h,
+			struct rte_hash_bucket *bkt,
+			struct rte_hash_bucket *sec_bkt,
+			const struct rte_hash_key *key, void *data,
+			hash_sig_t sig, hash_sig_t alt_hash,
+			uint32_t new_idx, int32_t *ret_val)
 {
-	if (h->hw_trans_mem_support) {
-		cached_free_slots->objs[cached_free_slots->len] = slot_id;
-		cached_free_slots->len++;
-	} else
-		rte_ring_sp_enqueue(h->free_slots, slot_id);
+	unsigned int i;
+	struct queue_node queue[RTE_HASH_BFS_QUEUE_MAX_LEN];
+	struct queue_node *tail, *head;
+	struct rte_hash_bucket *curr_bkt, *alt_bkt;
+
+	tail = queue;
+	head = queue + 1;
+	tail->bkt = bkt;
+	tail->prev = NULL;
+	tail->prev_slot = -1;
+
+	/* Cuckoo bfs Search */
+	while (likely(tail != head && head <
+					queue + RTE_HASH_BFS_QUEUE_MAX_LEN -
+					RTE_HASH_BUCKET_ENTRIES)) {
+		curr_bkt = tail->bkt;
+		for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
+			if (curr_bkt->key_idx[i] == EMPTY_SLOT) {
+				int32_t ret = rte_hash_cuckoo_move_insert_mw(h,
+						bkt, sec_bkt, key, data,
+						tail, i, sig, alt_hash,
+						new_idx, ret_val);
+				if (likely(ret != -1))
+					return ret;
+			}
+
+			/* Enqueue new node and keep prev node info */
+			alt_bkt = &(h->buckets[curr_bkt->sig_alt[i]
+						    & h->bucket_bitmask]);
+			head->bkt = alt_bkt;
+			head->prev = tail;
+			head->prev_slot = i;
+			head++;
+		}
+		tail++;
+	}
+
+	return -ENOSPC;
 }
 
 static inline int32_t
@@ -482,19 +711,15 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
 {
 	hash_sig_t alt_hash;
 	uint32_t prim_bucket_idx, sec_bucket_idx;
-	unsigned i;
 	struct rte_hash_bucket *prim_bkt, *sec_bkt;
-	struct rte_hash_key *new_k, *k, *keys = h->key_store;
+	struct rte_hash_key *new_k, *keys = h->key_store;
 	void *slot_id = NULL;
 	uint32_t new_idx;
 	int ret;
 	unsigned n_slots;
 	unsigned lcore_id;
 	struct lcore_cache *cached_free_slots = NULL;
-	unsigned int nr_pushes = 0;
-
-	if (h->add_key == ADD_KEY_MULTIWRITER)
-		rte_spinlock_lock(h->multiwriter_lock);
+	int32_t ret_val;
 
 	prim_bucket_idx = sig & h->bucket_bitmask;
 	prim_bkt = &h->buckets[prim_bucket_idx];
@@ -505,8 +730,24 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
 	sec_bkt = &h->buckets[sec_bucket_idx];
 	rte_prefetch0(sec_bkt);
 
-	/* Get a new slot for storing the new key */
-	if (h->hw_trans_mem_support) {
+	/* Check if key is already inserted in primary location */
+	__hash_rw_writer_lock(h);
+	ret = search_and_update(h, data, key, prim_bkt, sig, alt_hash);
+	if (ret != -1) {
+		__hash_rw_writer_unlock(h);
+		return ret;
+	}
+
+	/* Check if key is already inserted in secondary location */
+	ret = search_and_update(h, data, key, sec_bkt, alt_hash, sig);
+	if (ret != -1) {
+		__hash_rw_writer_unlock(h);
+		return ret;
+	}
+	__hash_rw_writer_unlock(h);
+
+	/* Did not find a match, so get a new slot for storing the new key */
+	if (h->multi_writer_support) {
 		lcore_id = rte_lcore_id();
 		cached_free_slots = &h->local_free_slots[lcore_id];
 		/* Try to get a free slot from the local cache */
@@ -516,8 +757,7 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
 					cached_free_slots->objs,
 					LCORE_CACHE_SIZE, NULL);
 			if (n_slots == 0) {
-				ret = -ENOSPC;
-				goto failure;
+				return -ENOSPC;
 			}
 
 			cached_free_slots->len += n_slots;
@@ -528,124 +768,50 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
 		slot_id = cached_free_slots->objs[cached_free_slots->len];
 	} else {
 		if (rte_ring_sc_dequeue(h->free_slots, &slot_id) != 0) {
-			ret = -ENOSPC;
-			goto failure;
+			return -ENOSPC;
 		}
 	}
 
 	new_k = RTE_PTR_ADD(keys, (uintptr_t)slot_id * h->key_entry_size);
-	rte_prefetch0(new_k);
 	new_idx = (uint32_t)((uintptr_t) slot_id);
-
-	/* Check if key is already inserted in primary location */
-	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
-		if (prim_bkt->sig_current[i] == sig &&
-				prim_bkt->sig_alt[i] == alt_hash) {
-			k = (struct rte_hash_key *) ((char *)keys +
-					prim_bkt->key_idx[i] * h->key_entry_size);
-			if (rte_hash_cmp_eq(key, k->key, h) == 0) {
-				/* Enqueue index of free slot back in the ring. */
-				enqueue_slot_back(h, cached_free_slots, slot_id);
-				/* Update data */
-				k->pdata = data;
-				/*
-				 * Return index where key is stored,
-				 * subtracting the first dummy index
-				 */
-				ret = prim_bkt->key_idx[i] - 1;
-				goto failure;
-			}
-		}
-	}
-
-	/* Check if key is already inserted in secondary location */
-	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
-		if (sec_bkt->sig_alt[i] == sig &&
-				sec_bkt->sig_current[i] == alt_hash) {
-			k = (struct rte_hash_key *) ((char *)keys +
-					sec_bkt->key_idx[i] * h->key_entry_size);
-			if (rte_hash_cmp_eq(key, k->key, h) == 0) {
-				/* Enqueue index of free slot back in the ring. */
-				enqueue_slot_back(h, cached_free_slots, slot_id);
-				/* Update data */
-				k->pdata = data;
-				/*
-				 * Return index where key is stored,
-				 * subtracting the first dummy index
-				 */
-				ret = sec_bkt->key_idx[i] - 1;
-				goto failure;
-			}
-		}
-	}
-
 	/* Copy key */
 	rte_memcpy(new_k->key, key, h->key_len);
 	new_k->pdata = data;
 
-#if defined(RTE_ARCH_X86) /* currently only x86 support HTM */
-	if (h->add_key == ADD_KEY_MULTIWRITER_TM) {
-		ret = rte_hash_cuckoo_insert_mw_tm(prim_bkt,
-				sig, alt_hash, new_idx);
-		if (ret >= 0)
-			return new_idx - 1;
 
-		/* Primary bucket full, need to make space for new entry */
-		ret = rte_hash_cuckoo_make_space_mw_tm(h, prim_bkt, sig,
-							alt_hash, new_idx);
+	/* Find an empty slot and insert */
+	ret = rte_hash_cuckoo_insert_mw(h, prim_bkt, sec_bkt, key, data,
+					sig, alt_hash, new_idx, &ret_val);
+	if (ret == 0)
+		return new_idx - 1;
+	else if (ret == 1) {
+		enqueue_slot_back(h, cached_free_slots, slot_id);
+		return ret_val;
+	}
 
-		if (ret >= 0)
-			return new_idx - 1;
+	/* Primary bucket full, need to make space for new entry */
+	ret = rte_hash_cuckoo_make_space_mw(h, prim_bkt, sec_bkt, key, data,
+					sig, alt_hash, new_idx, &ret_val);
+	if (ret == 0)
+		return new_idx - 1;
+	else if (ret == 1) {
+		enqueue_slot_back(h, cached_free_slots, slot_id);
+		return ret_val;
+	}
 
-		/* Also search secondary bucket to get better occupancy */
-		ret = rte_hash_cuckoo_make_space_mw_tm(h, sec_bkt, sig,
-							alt_hash, new_idx);
+	/* Also search secondary bucket to get better occupancy */
+	ret = rte_hash_cuckoo_make_space_mw(h, sec_bkt, prim_bkt, key, data,
+					alt_hash, sig, new_idx, &ret_val);
 
-		if (ret >= 0)
-			return new_idx - 1;
+	if (ret == 0)
+		return new_idx - 1;
+	else if (ret == 1) {
+		enqueue_slot_back(h, cached_free_slots, slot_id);
+		return ret_val;
 	} else {
-#endif
-		for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
-			/* Check if slot is available */
-			if (likely(prim_bkt->key_idx[i] == EMPTY_SLOT)) {
-				prim_bkt->sig_current[i] = sig;
-				prim_bkt->sig_alt[i] = alt_hash;
-				prim_bkt->key_idx[i] = new_idx;
-				break;
-			}
-		}
-
-		if (i != RTE_HASH_BUCKET_ENTRIES) {
-			if (h->add_key == ADD_KEY_MULTIWRITER)
-				rte_spinlock_unlock(h->multiwriter_lock);
-			return new_idx - 1;
-		}
-
-		/* Primary bucket full, need to make space for new entry
-		 * After recursive function.
-		 * Insert the new entry in the position of the pushed entry
-		 * if successful or return error and
-		 * store the new slot back in the ring
-		 */
-		ret = make_space_bucket(h, prim_bkt, &nr_pushes);
-		if (ret >= 0) {
-			prim_bkt->sig_current[ret] = sig;
-			prim_bkt->sig_alt[ret] = alt_hash;
-			prim_bkt->key_idx[ret] = new_idx;
-			if (h->add_key == ADD_KEY_MULTIWRITER)
-				rte_spinlock_unlock(h->multiwriter_lock);
-			return new_idx - 1;
-		}
-#if defined(RTE_ARCH_X86)
+		enqueue_slot_back(h, cached_free_slots, slot_id);
+		return ret;
 	}
-#endif
-	/* Error in addition, store new slot back in the ring and return error */
-	enqueue_slot_back(h, cached_free_slots, (void *)((uintptr_t) new_idx));
-
-failure:
-	if (h->add_key == ADD_KEY_MULTIWRITER)
-		rte_spinlock_unlock(h->multiwriter_lock);
-	return ret;
 }
 
 int32_t
@@ -690,20 +856,15 @@ rte_hash_add_key_data(const struct rte_hash *h, const void *key, void *data)
 	else
 		return ret;
 }
+
+/* Search one bucket to find the match key */
 static inline int32_t
-__rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
-					hash_sig_t sig, void **data)
+search_one_bucket(const struct rte_hash *h, const void *key, hash_sig_t sig,
+			void **data, const struct rte_hash_bucket *bkt)
 {
-	uint32_t bucket_idx;
-	hash_sig_t alt_hash;
-	unsigned i;
-	struct rte_hash_bucket *bkt;
+	int i;
 	struct rte_hash_key *k, *keys = h->key_store;
 
-	bucket_idx = sig & h->bucket_bitmask;
-	bkt = &h->buckets[bucket_idx];
-
-	/* Check if key is in primary location */
 	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
 		if (bkt->sig_current[i] == sig &&
 				bkt->key_idx[i] != EMPTY_SLOT) {
@@ -720,30 +881,41 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
 			}
 		}
 	}
+	return -1;
+}
+
+static inline int32_t
+__rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
+					hash_sig_t sig, void **data)
+{
+	uint32_t bucket_idx;
+	hash_sig_t alt_hash;
+	struct rte_hash_bucket *bkt;
+	int ret;
+
+	bucket_idx = sig & h->bucket_bitmask;
+	bkt = &h->buckets[bucket_idx];
 
+	__hash_rw_reader_lock(h);
+
+	/* Check if key is in primary location */
+	ret = search_one_bucket(h, key, sig, data, bkt);
+	if (ret != -1) {
+		__hash_rw_reader_unlock(h);
+		return ret;
+	}
 	/* Calculate secondary hash */
 	alt_hash = rte_hash_secondary_hash(sig);
 	bucket_idx = alt_hash & h->bucket_bitmask;
 	bkt = &h->buckets[bucket_idx];
 
 	/* Check if key is in secondary location */
-	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
-		if (bkt->sig_current[i] == alt_hash &&
-				bkt->sig_alt[i] == sig) {
-			k = (struct rte_hash_key *) ((char *)keys +
-					bkt->key_idx[i] * h->key_entry_size);
-			if (rte_hash_cmp_eq(key, k->key, h) == 0) {
-				if (data != NULL)
-					*data = k->pdata;
-				/*
-				 * Return index where key is stored,
-				 * subtracting the first dummy index
-				 */
-				return bkt->key_idx[i] - 1;
-			}
-		}
+	ret = search_one_bucket(h, key, alt_hash, data, bkt);
+	if (ret != -1) {
+		__hash_rw_reader_unlock(h);
+		return ret;
 	}
-
+	__hash_rw_reader_unlock(h);
 	return -ENOENT;
 }
 
@@ -785,7 +957,7 @@ remove_entry(const struct rte_hash *h, struct rte_hash_bucket *bkt, unsigned i)
 
 	bkt->sig_current[i] = NULL_SIGNATURE;
 	bkt->sig_alt[i] = NULL_SIGNATURE;
-	if (h->hw_trans_mem_support) {
+	if (h->multi_writer_support) {
 		lcore_id = rte_lcore_id();
 		cached_free_slots = &h->local_free_slots[lcore_id];
 		/* Cache full, need to free it. */
@@ -806,20 +978,15 @@ remove_entry(const struct rte_hash *h, struct rte_hash_bucket *bkt, unsigned i)
 	}
 }
 
+/* Search one bucket and remove the matched key */
 static inline int32_t
-__rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
-						hash_sig_t sig)
+search_and_remove(const struct rte_hash *h, const void *key,
+			struct rte_hash_bucket *bkt, hash_sig_t sig)
 {
-	uint32_t bucket_idx;
-	hash_sig_t alt_hash;
-	unsigned i;
-	struct rte_hash_bucket *bkt;
 	struct rte_hash_key *k, *keys = h->key_store;
+	unsigned int i;
 	int32_t ret;
 
-	bucket_idx = sig & h->bucket_bitmask;
-	bkt = &h->buckets[bucket_idx];
-
 	/* Check if key is in primary location */
 	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
 		if (bkt->sig_current[i] == sig &&
@@ -839,32 +1006,42 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
 			}
 		}
 	}
+	return -1;
+}
+
+static inline int32_t
+__rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
+						hash_sig_t sig)
+{
+	uint32_t bucket_idx;
+	hash_sig_t alt_hash;
+	struct rte_hash_bucket *bkt;
+	int32_t ret;
+
+	bucket_idx = sig & h->bucket_bitmask;
+	bkt = &h->buckets[bucket_idx];
+
+	__hash_rw_writer_lock(h);
+	/* look for key in primary bucket */
+	ret = search_and_remove(h, key, bkt, sig);
+	if (ret != -1) {
+		__hash_rw_writer_unlock(h);
+		return ret;
+	}
 
 	/* Calculate secondary hash */
 	alt_hash = rte_hash_secondary_hash(sig);
 	bucket_idx = alt_hash & h->bucket_bitmask;
 	bkt = &h->buckets[bucket_idx];
 
-	/* Check if key is in secondary location */
-	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
-		if (bkt->sig_current[i] == alt_hash &&
-				bkt->key_idx[i] != EMPTY_SLOT) {
-			k = (struct rte_hash_key *) ((char *)keys +
-					bkt->key_idx[i] * h->key_entry_size);
-			if (rte_hash_cmp_eq(key, k->key, h) == 0) {
-				remove_entry(h, bkt, i);
-
-				/*
-				 * Return index where key is stored,
-				 * subtracting the first dummy index
-				 */
-				ret = bkt->key_idx[i] - 1;
-				bkt->key_idx[i] = EMPTY_SLOT;
-				return ret;
-			}
-		}
+	/* look for key in secondary bucket */
+	ret = search_and_remove(h, key, bkt, alt_hash);
+	if (ret != -1) {
+		__hash_rw_writer_unlock(h);
+		return ret;
 	}
 
+	__hash_rw_writer_unlock(h);
 	return -ENOENT;
 }
 
@@ -1006,6 +1183,7 @@ __rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
 		rte_prefetch0(secondary_bkt[i]);
 	}
 
+	__hash_rw_reader_lock(h);
 	/* Compare signatures and prefetch key slot of first hit */
 	for (i = 0; i < num_keys; i++) {
 		compare_signatures(&prim_hitmask[i], &sec_hitmask[i],
@@ -1088,6 +1266,8 @@ next_key:
 		continue;
 	}
 
+	__hash_rw_reader_unlock(h);
+
 	if (hit_mask != NULL)
 		*hit_mask = hits;
 }
@@ -1146,7 +1326,7 @@ rte_hash_iterate(const struct rte_hash *h, const void **key, void **data, uint32
 		bucket_idx = *next / RTE_HASH_BUCKET_ENTRIES;
 		idx = *next % RTE_HASH_BUCKET_ENTRIES;
 	}
-
+	__hash_rw_reader_lock(h);
 	/* Get position of entry in key table */
 	position = h->buckets[bucket_idx].key_idx[idx];
 	next_key = (struct rte_hash_key *) ((char *)h->key_store +
@@ -1155,6 +1335,8 @@ rte_hash_iterate(const struct rte_hash *h, const void **key, void **data, uint32
 	*key = next_key->key;
 	*data = next_key->pdata;
 
+	__hash_rw_reader_unlock(h);
+
 	/* Increment iterator */
 	(*next)++;
 
diff --git a/lib/librte_hash/rte_cuckoo_hash.h b/lib/librte_hash/rte_cuckoo_hash.h
index 7a54e555..b43f467d 100644
--- a/lib/librte_hash/rte_cuckoo_hash.h
+++ b/lib/librte_hash/rte_cuckoo_hash.h
@@ -88,15 +88,14 @@ const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = {
 
 #endif
 
-enum add_key_case {
-	ADD_KEY_SINGLEWRITER = 0,
-	ADD_KEY_MULTIWRITER,
-	ADD_KEY_MULTIWRITER_TM,
-};
 
 /** Number of items per bucket. */
 #define RTE_HASH_BUCKET_ENTRIES		8
 
+#if !RTE_IS_POWER_OF_2(RTE_HASH_BUCKET_ENTRIES)
+#error RTE_HASH_BUCKET_ENTRIES must be a power of 2
+#endif
+
 #define NULL_SIGNATURE			0
 
 #define EMPTY_SLOT			0
@@ -155,18 +154,20 @@ struct rte_hash {
 
 	struct rte_ring *free_slots;
 	/**< Ring that stores all indexes of the free slots in the key table */
-	uint8_t hw_trans_mem_support;
-	/**< Hardware transactional memory support */
+
 	struct lcore_cache *local_free_slots;
 	/**< Local cache per lcore, storing some indexes of the free slots */
-	enum add_key_case add_key; /**< Multi-writer hash add behavior */
-
-	rte_spinlock_t *multiwriter_lock; /**< Multi-writer spinlock for w/o TM */
 
 	/* Fields used in lookup */
 
 	uint32_t key_len __rte_cache_aligned;
 	/**< Length of hash key. */
+	uint8_t hw_trans_mem_support;
+	/**< If hardware transactional memory is used. */
+	uint8_t multi_writer_support;
+	/**< If multi-writer support is enabled. */
+	uint8_t readwrite_concur_support;
+	/**< If read-write concurrency support is enabled */
 	rte_hash_function hash_func;    /**< Function used to calculate hash. */
 	uint32_t hash_func_init_val;    /**< Init value used by hash_func. */
 	rte_hash_cmp_eq_t rte_hash_custom_cmp_eq;
@@ -184,6 +185,7 @@ struct rte_hash {
 	/**< Table with buckets storing all the	hash values and key indexes
 	 * to the key table.
 	 */
+	rte_rwlock_t *readwrite_lock; /**< Read-write lock thread-safety. */
 } __rte_cache_aligned;
 
 struct queue_node {
diff --git a/lib/librte_hash/rte_cuckoo_hash_x86.h b/lib/librte_hash/rte_cuckoo_hash_x86.h
deleted file mode 100644
index 2c5b017e..00000000
--- a/lib/librte_hash/rte_cuckoo_hash_x86.h
+++ /dev/null
@@ -1,164 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2016 Intel Corporation
- */
-
-/* rte_cuckoo_hash_x86.h
- * This file holds all x86 specific Cuckoo Hash functions
- */
-
-/* Only tries to insert at one bucket (@prim_bkt) without trying to push
- * buckets around
- */
-static inline unsigned
-rte_hash_cuckoo_insert_mw_tm(struct rte_hash_bucket *prim_bkt,
-		hash_sig_t sig, hash_sig_t alt_hash, uint32_t new_idx)
-{
-	unsigned i, status;
-	unsigned try = 0;
-
-	while (try < RTE_HASH_TSX_MAX_RETRY) {
-		status = rte_xbegin();
-		if (likely(status == RTE_XBEGIN_STARTED)) {
-			/* Insert new entry if there is room in the primary
-			* bucket.
-			*/
-			for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
-				/* Check if slot is available */
-				if (likely(prim_bkt->key_idx[i] == EMPTY_SLOT)) {
-					prim_bkt->sig_current[i] = sig;
-					prim_bkt->sig_alt[i] = alt_hash;
-					prim_bkt->key_idx[i] = new_idx;
-					break;
-				}
-			}
-			rte_xend();
-
-			if (i != RTE_HASH_BUCKET_ENTRIES)
-				return 0;
-
-			break; /* break off try loop if transaction commits */
-		} else {
-			/* If we abort we give up this cuckoo path. */
-			try++;
-			rte_pause();
-		}
-	}
-
-	return -1;
-}
-
-/* Shift buckets along provided cuckoo_path (@leaf and @leaf_slot) and fill
- * the path head with new entry (sig, alt_hash, new_idx)
- */
-static inline int
-rte_hash_cuckoo_move_insert_mw_tm(const struct rte_hash *h,
-			struct queue_node *leaf, uint32_t leaf_slot,
-			hash_sig_t sig, hash_sig_t alt_hash, uint32_t new_idx)
-{
-	unsigned try = 0;
-	unsigned status;
-	uint32_t prev_alt_bkt_idx;
-
-	struct queue_node *prev_node, *curr_node = leaf;
-	struct rte_hash_bucket *prev_bkt, *curr_bkt = leaf->bkt;
-	uint32_t prev_slot, curr_slot = leaf_slot;
-
-	while (try < RTE_HASH_TSX_MAX_RETRY) {
-		status = rte_xbegin();
-		if (likely(status == RTE_XBEGIN_STARTED)) {
-			while (likely(curr_node->prev != NULL)) {
-				prev_node = curr_node->prev;
-				prev_bkt = prev_node->bkt;
-				prev_slot = curr_node->prev_slot;
-
-				prev_alt_bkt_idx
-					= prev_bkt->sig_alt[prev_slot]
-					    & h->bucket_bitmask;
-
-				if (unlikely(&h->buckets[prev_alt_bkt_idx]
-					     != curr_bkt)) {
-					rte_xabort(RTE_XABORT_CUCKOO_PATH_INVALIDED);
-				}
-
-				/* Need to swap current/alt sig to allow later
-				 * Cuckoo insert to move elements back to its
-				 * primary bucket if available
-				 */
-				curr_bkt->sig_alt[curr_slot] =
-				    prev_bkt->sig_current[prev_slot];
-				curr_bkt->sig_current[curr_slot] =
-				    prev_bkt->sig_alt[prev_slot];
-				curr_bkt->key_idx[curr_slot]
-				    = prev_bkt->key_idx[prev_slot];
-
-				curr_slot = prev_slot;
-				curr_node = prev_node;
-				curr_bkt = curr_node->bkt;
-			}
-
-			curr_bkt->sig_current[curr_slot] = sig;
-			curr_bkt->sig_alt[curr_slot] = alt_hash;
-			curr_bkt->key_idx[curr_slot] = new_idx;
-
-			rte_xend();
-
-			return 0;
-		}
-
-		/* If we abort we give up this cuckoo path, since most likely it's
-		 * no longer valid as TSX detected data conflict
-		 */
-		try++;
-		rte_pause();
-	}
-
-	return -1;
-}
-
-/*
- * Make space for new key, using bfs Cuckoo Search and Multi-Writer safe
- * Cuckoo
- */
-static inline int
-rte_hash_cuckoo_make_space_mw_tm(const struct rte_hash *h,
-			struct rte_hash_bucket *bkt,
-			hash_sig_t sig, hash_sig_t alt_hash,
-			uint32_t new_idx)
-{
-	unsigned i;
-	struct queue_node queue[RTE_HASH_BFS_QUEUE_MAX_LEN];
-	struct queue_node *tail, *head;
-	struct rte_hash_bucket *curr_bkt, *alt_bkt;
-
-	tail = queue;
-	head = queue + 1;
-	tail->bkt = bkt;
-	tail->prev = NULL;
-	tail->prev_slot = -1;
-
-	/* Cuckoo bfs Search */
-	while (likely(tail != head && head <
-					queue + RTE_HASH_BFS_QUEUE_MAX_LEN -
-					RTE_HASH_BUCKET_ENTRIES)) {
-		curr_bkt = tail->bkt;
-		for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
-			if (curr_bkt->key_idx[i] == EMPTY_SLOT) {
-				if (likely(rte_hash_cuckoo_move_insert_mw_tm(h,
-						tail, i, sig,
-						alt_hash, new_idx) == 0))
-					return 0;
-			}
-
-			/* Enqueue new node and keep prev node info */
-			alt_bkt = &(h->buckets[curr_bkt->sig_alt[i]
-						    & h->bucket_bitmask]);
-			head->bkt = alt_bkt;
-			head->prev = tail;
-			head->prev_slot = i;
-			head++;
-		}
-		tail++;
-	}
-
-	return -ENOSPC;
-}
diff --git a/lib/librte_hash/rte_hash.h b/lib/librte_hash/rte_hash.h
index f71ca9fb..9e7d9315 100644
--- a/lib/librte_hash/rte_hash.h
+++ b/lib/librte_hash/rte_hash.h
@@ -34,6 +34,9 @@ extern "C" {
 /** Default behavior of insertion, single writer/multi writer */
 #define RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD 0x02
 
+/** Flag to support reader writer concurrency */
+#define RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY 0x04
+
 /** Signature of key that is stored internally. */
 typedef uint32_t hash_sig_t;
 
@@ -124,9 +127,22 @@ void
 rte_hash_reset(struct rte_hash *h);
 
 /**
+ * Return the number of keys in the hash table
+ * @param h
+ *  Hash table to query from
+ * @return
+ *   - -EINVAL if parameters are invalid
+ *   - A value indicating how many keys were inserted in the table.
+ */
+int32_t
+rte_hash_count(const struct rte_hash *h);
+
+/**
  * Add a key-value pair to an existing hash table.
  * This operation is not multi-thread safe
- * and should only be called from one thread.
+ * and should only be called from one thread by default.
+ * Thread safety can be enabled by setting flag during
+ * table creation.
  *
  * @param h
  *   Hash table to add the key to.
@@ -146,7 +162,9 @@ rte_hash_add_key_data(const struct rte_hash *h, const void *key, void *data);
  * Add a key-value pair with a pre-computed hash value
  * to an existing hash table.
  * This operation is not multi-thread safe
- * and should only be called from one thread.
+ * and should only be called from one thread by default.
+ * Thread safety can be enabled by setting flag during
+ * table creation.
  *
  * @param h
  *   Hash table to add the key to.
@@ -167,7 +185,9 @@ rte_hash_add_key_with_hash_data(const struct rte_hash *h, const void *key,
 
 /**
  * Add a key to an existing hash table. This operation is not multi-thread safe
- * and should only be called from one thread.
+ * and should only be called from one thread by default.
+ * Thread safety can be enabled by setting flag during
+ * table creation.
  *
  * @param h
  *   Hash table to add the key to.
@@ -185,7 +205,9 @@ rte_hash_add_key(const struct rte_hash *h, const void *key);
 /**
  * Add a key to an existing hash table.
  * This operation is not multi-thread safe
- * and should only be called from one thread.
+ * and should only be called from one thread by default.
+ * Thread safety can be enabled by setting flag during
+ * table creation.
  *
  * @param h
  *   Hash table to add the key to.
@@ -205,7 +227,9 @@ rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, hash_sig_t
 /**
  * Remove a key from an existing hash table.
  * This operation is not multi-thread safe
- * and should only be called from one thread.
+ * and should only be called from one thread by default.
+ * Thread safety can be enabled by setting flag during
+ * table creation.
  *
  * @param h
  *   Hash table to remove the key from.
@@ -224,7 +248,9 @@ rte_hash_del_key(const struct rte_hash *h, const void *key);
 /**
  * Remove a key from an existing hash table.
  * This operation is not multi-thread safe
- * and should only be called from one thread.
+ * and should only be called from one thread by default.
+ * Thread safety can be enabled by setting flag during
+ * table creation.
  *
  * @param h
  *   Hash table to remove the key from.
@@ -244,7 +270,9 @@ rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key, hash_sig_t
 
 /**
  * Find a key in the hash table given the position.
- * This operation is multi-thread safe.
+ * This operation is multi-thread safe with regarding to other lookup threads.
+ * Read-write concurrency can be enabled by setting flag during
+ * table creation.
  *
  * @param h
  *   Hash table to get the key from.
@@ -254,8 +282,8 @@ rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key, hash_sig_t
  *   Output containing a pointer to the key
  * @return
  *   - 0 if retrieved successfully
- *   - EINVAL if the parameters are invalid.
- *   - ENOENT if no valid key is found in the given position.
+ *   - -EINVAL if the parameters are invalid.
+ *   - -ENOENT if no valid key is found in the given position.
  */
 int
 rte_hash_get_key_with_position(const struct rte_hash *h, const int32_t position,
@@ -263,7 +291,9 @@ rte_hash_get_key_with_position(const struct rte_hash *h, const int32_t position,
 
 /**
  * Find a key-value pair in the hash table.
- * This operation is multi-thread safe.
+ * This operation is multi-thread safe with regarding to other lookup threads.
+ * Read-write concurrency can be enabled by setting flag during
+ * table creation.
  *
  * @param h
  *   Hash table to look in.
@@ -272,9 +302,11 @@ rte_hash_get_key_with_position(const struct rte_hash *h, const int32_t position,
  * @param data
  *   Output with pointer to data returned from the hash table.
  * @return
- *   0 if successful lookup
- *   - EINVAL if the parameters are invalid.
- *   - ENOENT if the key is not found.
+ *   - A positive value that can be used by the caller as an offset into an
+ *     array of user data. This value is unique for this key, and is the same
+ *     value that was returned when the key was added.
+ *   - -EINVAL if the parameters are invalid.
+ *   - -ENOENT if the key is not found.
  */
 int
 rte_hash_lookup_data(const struct rte_hash *h, const void *key, void **data);
@@ -282,7 +314,9 @@ rte_hash_lookup_data(const struct rte_hash *h, const void *key, void **data);
 /**
  * Find a key-value pair with a pre-computed hash value
  * to an existing hash table.
- * This operation is multi-thread safe.
+ * This operation is multi-thread safe with regarding to other lookup threads.
+ * Read-write concurrency can be enabled by setting flag during
+ * table creation.
  *
  * @param h
  *   Hash table to look in.
@@ -293,9 +327,11 @@ rte_hash_lookup_data(const struct rte_hash *h, const void *key, void **data);
  * @param data
  *   Output with pointer to data returned from the hash table.
  * @return
- *   0 if successful lookup
- *   - EINVAL if the parameters are invalid.
- *   - ENOENT if the key is not found.
+ *   - A positive value that can be used by the caller as an offset into an
+ *     array of user data. This value is unique for this key, and is the same
+ *     value that was returned when the key was added.
+ *   - -EINVAL if the parameters are invalid.
+ *   - -ENOENT if the key is not found.
  */
 int
 rte_hash_lookup_with_hash_data(const struct rte_hash *h, const void *key,
@@ -303,7 +339,9 @@ rte_hash_lookup_with_hash_data(const struct rte_hash *h, const void *key,
 
 /**
  * Find a key in the hash table.
- * This operation is multi-thread safe.
+ * This operation is multi-thread safe with regarding to other lookup threads.
+ * Read-write concurrency can be enabled by setting flag during
+ * table creation.
  *
  * @param h
  *   Hash table to look in.
@@ -321,7 +359,9 @@ rte_hash_lookup(const struct rte_hash *h, const void *key);
 
 /**
  * Find a key in the hash table.
- * This operation is multi-thread safe.
+ * This operation is multi-thread safe with regarding to other lookup threads.
+ * Read-write concurrency can be enabled by setting flag during
+ * table creation.
  *
  * @param h
  *   Hash table to look in.
@@ -356,7 +396,9 @@ rte_hash_hash(const struct rte_hash *h, const void *key);
 
 /**
  * Find multiple keys in the hash table.
- * This operation is multi-thread safe.
+ * This operation is multi-thread safe with regarding to other lookup threads.
+ * Read-write concurrency can be enabled by setting flag during
+ * table creation.
  *
  * @param h
  *   Hash table to look in.
@@ -377,7 +419,9 @@ rte_hash_lookup_bulk_data(const struct rte_hash *h, const void **keys,
 
 /**
  * Find multiple keys in the hash table.
- * This operation is multi-thread safe.
+ * This operation is multi-thread safe with regarding to other lookup threads.
+ * Read-write concurrency can be enabled by setting flag during
+ * table creation.
  *
  * @param h
  *   Hash table to look in.
diff --git a/lib/librte_hash/rte_hash_version.map b/lib/librte_hash/rte_hash_version.map
index 52a2576f..e216ac8e 100644
--- a/lib/librte_hash/rte_hash_version.map
+++ b/lib/librte_hash/rte_hash_version.map
@@ -45,3 +45,11 @@ DPDK_16.07 {
 	rte_hash_get_key_with_position;
 
 } DPDK_2.2;
+
+
+DPDK_18.08 {
+	global:
+
+	rte_hash_count;
+
+} DPDK_16.07;
diff --git a/lib/librte_kni/meson.build b/lib/librte_kni/meson.build
index c4b21961..a738a033 100644
--- a/lib/librte_kni/meson.build
+++ b/lib/librte_kni/meson.build
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
-if host_machine.system() != 'linux'
+if host_machine.system() != 'linux' or cc.sizeof('void *') == 4
 	build = false
 endif
 version = 2
diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c
index 8a8f6c1c..65f6a2b0 100644
--- a/lib/librte_kni/rte_kni.c
+++ b/lib/librte_kni/rte_kni.c
@@ -715,6 +715,9 @@ rte_kni_get(const char *name)
 	struct rte_kni_memzone_slot *it;
 	struct rte_kni *kni;
 
+	if (name == NULL || name[0] == '\0')
+		return NULL;
+
 	/* Note: could be improved perf-wise if necessary */
 	for (i = 0; i < kni_memzone_pool.max_ifaces; i++) {
 		it = &kni_memzone_pool.slots[i];
diff --git a/lib/librte_kvargs/Makefile b/lib/librte_kvargs/Makefile
index 39d5ac33..87593954 100644
--- a/lib/librte_kvargs/Makefile
+++ b/lib/librte_kvargs/Makefile
@@ -7,7 +7,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
 LIB = librte_kvargs.a
 
 CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
-LDLIBS += -lrte_eal
+CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common/include
 
 EXPORT_MAP := rte_kvargs_version.map
 
diff --git a/lib/librte_kvargs/meson.build b/lib/librte_kvargs/meson.build
index 0c5b9cb2..acd3e543 100644
--- a/lib/librte_kvargs/meson.build
+++ b/lib/librte_kvargs/meson.build
@@ -1,6 +1,11 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
+includes = [global_inc]
+includes += include_directories('../librte_eal/common/include')
+
 version = 1
 sources = files('rte_kvargs.c')
 headers = files('rte_kvargs.h')
+
+deps += 'compat'
diff --git a/lib/librte_kvargs/rte_kvargs.c b/lib/librte_kvargs/rte_kvargs.c
index d92a5f9d..a28f7694 100644
--- a/lib/librte_kvargs/rte_kvargs.c
+++ b/lib/librte_kvargs/rte_kvargs.c
@@ -6,7 +6,6 @@
 #include <string.h>
 #include <stdlib.h>
 
-#include <rte_log.h>
 #include <rte_string_fns.h>
 
 #include "rte_kvargs.h"
@@ -28,29 +27,22 @@ rte_kvargs_tokenize(struct rte_kvargs *kvlist, const char *params)
 	 * to pass to rte_strsplit
 	 */
 	kvlist->str = strdup(params);
-	if (kvlist->str == NULL) {
-		RTE_LOG(ERR, PMD, "Cannot parse arguments: not enough memory\n");
+	if (kvlist->str == NULL)
 		return -1;
-	}
 
 	/* browse each key/value pair and add it in kvlist */
 	str = kvlist->str;
 	while ((str = strtok_r(str, RTE_KVARGS_PAIRS_DELIM, &ctx1)) != NULL) {
 
 		i = kvlist->count;
-		if (i >= RTE_KVARGS_MAX) {
-			RTE_LOG(ERR, PMD, "Cannot parse arguments: list full\n");
+		if (i >= RTE_KVARGS_MAX)
 			return -1;
-		}
 
 		kvlist->pairs[i].key = strtok_r(str, RTE_KVARGS_KV_DELIM, &ctx2);
 		kvlist->pairs[i].value = strtok_r(NULL, RTE_KVARGS_KV_DELIM, &ctx2);
-		if (kvlist->pairs[i].key == NULL || kvlist->pairs[i].value == NULL) {
-			RTE_LOG(ERR, PMD,
-				"Cannot parse arguments: wrong key or value\n"
-				"params=<%s>\n", params);
+		if (kvlist->pairs[i].key == NULL ||
+		    kvlist->pairs[i].value == NULL)
 			return -1;
-		}
 
 		kvlist->count++;
 		str = NULL;
@@ -89,12 +81,8 @@ check_for_valid_keys(struct rte_kvargs *kvlist,
 	for (i = 0; i < kvlist->count; i++) {
 		pair = &kvlist->pairs[i];
 		ret = is_valid_key(valid, pair->key);
-		if (!ret) {
-			RTE_LOG(ERR, PMD,
-				"Error parsing device, invalid key <%s>\n",
-				pair->key);
+		if (!ret)
 			return -1;
-		}
 	}
 	return 0;
 }
@@ -180,3 +168,38 @@ rte_kvargs_parse(const char *args, const char * const valid_keys[])
 
 	return kvlist;
 }
+
+__rte_experimental
+struct rte_kvargs *
+rte_kvargs_parse_delim(const char *args, const char * const valid_keys[],
+		       const char *valid_ends)
+{
+	struct rte_kvargs *kvlist = NULL;
+	char *copy;
+	size_t len;
+
+	if (valid_ends == NULL)
+		return rte_kvargs_parse(args, valid_keys);
+
+	copy = strdup(args);
+	if (copy == NULL)
+		return NULL;
+
+	len = strcspn(copy, valid_ends);
+	copy[len] = '\0';
+
+	kvlist = rte_kvargs_parse(copy, valid_keys);
+
+	free(copy);
+	return kvlist;
+}
+
+__rte_experimental
+int
+rte_kvargs_strcmp(const char *key __rte_unused,
+		  const char *value, void *opaque)
+{
+	const char *str = opaque;
+
+	return -abs(strcmp(str, value));
+}
diff --git a/lib/librte_kvargs/rte_kvargs.h b/lib/librte_kvargs/rte_kvargs.h
index 51b8120b..fc041956 100644
--- a/lib/librte_kvargs/rte_kvargs.h
+++ b/lib/librte_kvargs/rte_kvargs.h
@@ -25,6 +25,8 @@
 extern "C" {
 #endif
 
+#include <rte_compat.h>
+
 /** Maximum number of key/value associations */
 #define RTE_KVARGS_MAX 32
 
@@ -72,6 +74,36 @@ struct rte_kvargs *rte_kvargs_parse(const char *args,
 		const char *const valid_keys[]);
 
 /**
+ * Allocate a rte_kvargs and store key/value associations from a string.
+ * This version will consider any byte from valid_ends as a possible
+ * terminating character, and will not parse beyond any of their occurrence.
+ *
+ * The function allocates and fills an rte_kvargs structure from a given
+ * string whose format is key1=value1,key2=value2,...
+ *
+ * The structure can be freed with rte_kvargs_free().
+ *
+ * @param args
+ *   The input string containing the key/value associations
+ *
+ * @param valid_keys
+ *   A list of valid keys (table of const char *, the last must be NULL).
+ *   This argument is ignored if NULL
+ *
+ * @param valid_ends
+ *   Acceptable terminating characters.
+ *   If NULL, the behavior is the same as ``rte_kvargs_parse``.
+ *
+ * @return
+ *   - A pointer to an allocated rte_kvargs structure on success
+ *   - NULL on error
+ */
+__rte_experimental
+struct rte_kvargs *rte_kvargs_parse_delim(const char *args,
+		const char *const valid_keys[],
+		const char *valid_ends);
+
+/**
  * Free a rte_kvargs structure
  *
  * Free a rte_kvargs structure previously allocated with
@@ -121,6 +153,32 @@ int rte_kvargs_process(const struct rte_kvargs *kvlist,
 unsigned rte_kvargs_count(const struct rte_kvargs *kvlist,
 	const char *key_match);
 
+/**
+ * Generic kvarg handler for string comparison.
+ *
+ * This function can be used for a generic string comparison processing
+ * on a list of kvargs.
+ *
+ * @param key
+ *   kvarg pair key.
+ *
+ * @param value
+ *   kvarg pair value.
+ *
+ * @param opaque
+ *   Opaque pointer to a string.
+ *
+ * @return
+ *   0 if the strings match.
+ *   !0 otherwise or on error.
+ *
+ *   Unless strcmp, comparison ordering is not kept.
+ *   In order for rte_kvargs_process to stop processing on match error,
+ *   a negative value is returned even if strcmp had returned a positive one.
+ */
+__rte_experimental
+int rte_kvargs_strcmp(const char *key, const char *value, void *opaque);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_kvargs/rte_kvargs_version.map b/lib/librte_kvargs/rte_kvargs_version.map
index 2030ec46..8f4b4e3f 100644
--- a/lib/librte_kvargs/rte_kvargs_version.map
+++ b/lib/librte_kvargs/rte_kvargs_version.map
@@ -8,3 +8,11 @@ DPDK_2.0 {
 
 	local: *;
 };
+
+EXPERIMENTAL {
+	global:
+
+	rte_kvargs_parse_delim;
+	rte_kvargs_strcmp;
+
+} DPDK_2.0;
diff --git a/lib/librte_latencystats/rte_latencystats.c b/lib/librte_latencystats/rte_latencystats.c
index 46c69bf0..1fdec68e 100644
--- a/lib/librte_latencystats/rte_latencystats.c
+++ b/lib/librte_latencystats/rte_latencystats.c
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2017 Intel Corporation
+ * Copyright(c) 2018 Intel Corporation
  */
 
 #include <unistd.h>
@@ -265,6 +265,7 @@ rte_latencystats_uninit(void)
 	uint16_t qid;
 	int ret = 0;
 	struct rxtx_cbs *cbs = NULL;
+	const struct rte_memzone *mz = NULL;
 
 	/** De register Rx/Tx callbacks */
 	RTE_ETH_FOREACH_DEV(pid) {
@@ -288,6 +289,11 @@ rte_latencystats_uninit(void)
 		}
 	}
 
+	/* free up the memzone */
+	mz = rte_memzone_lookup(MZ_RTE_LATENCY_STATS);
+	if (mz)
+		rte_memzone_free(mz);
+
 	return 0;
 }
 
diff --git a/lib/librte_mbuf/Makefile b/lib/librte_mbuf/Makefile
index 8749a00f..e2b98a25 100644
--- a/lib/librte_mbuf/Makefile
+++ b/lib/librte_mbuf/Makefile
@@ -6,7 +6,6 @@ include $(RTE_SDK)/mk/rte.vars.mk
 # library name
 LIB = librte_mbuf.a
 
-CFLAGS += -DALLOW_EXPERIMENTAL_API
 CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
 LDLIBS += -lrte_eal -lrte_mempool
 
diff --git a/lib/librte_mbuf/meson.build b/lib/librte_mbuf/meson.build
index 869c17c1..45ffb0db 100644
--- a/lib/librte_mbuf/meson.build
+++ b/lib/librte_mbuf/meson.build
@@ -2,7 +2,6 @@
 # Copyright(c) 2017 Intel Corporation
 
 version = 3
-allow_experimental_apis = true
 sources = files('rte_mbuf.c', 'rte_mbuf_ptype.c', 'rte_mbuf_pool_ops.c')
 headers = files('rte_mbuf.h', 'rte_mbuf_ptype.h', 'rte_mbuf_pool_ops.h')
 deps += ['mempool']
diff --git a/lib/librte_mbuf/rte_mbuf.c b/lib/librte_mbuf/rte_mbuf.c
index fca580ef..e714c5a5 100644
--- a/lib/librte_mbuf/rte_mbuf.c
+++ b/lib/librte_mbuf/rte_mbuf.c
@@ -107,7 +107,7 @@ rte_pktmbuf_init(struct rte_mempool *mp,
 }
 
 /* Helper to create a mbuf pool with given mempool ops name*/
-struct rte_mempool * __rte_experimental
+struct rte_mempool *
 rte_pktmbuf_pool_create_by_ops(const char *name, unsigned int n,
 	unsigned int cache_size, uint16_t priv_size, uint16_t data_room_size,
 	int socket_id, const char *ops_name)
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index 8e6b4d29..9ce5d76d 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -729,6 +729,24 @@ rte_mbuf_to_baddr(struct rte_mbuf *md)
 }
 
 /**
+ * Return the starting address of the private data area embedded in
+ * the given mbuf.
+ *
+ * Note that no check is made to ensure that a private data area
+ * actually exists in the supplied mbuf.
+ *
+ * @param m
+ *   The pointer to the mbuf.
+ * @return
+ *   The starting address of the private data area of the given mbuf.
+ */
+static inline void * __rte_experimental
+rte_mbuf_to_priv(struct rte_mbuf *m)
+{
+	return RTE_PTR_ADD(m, sizeof(struct rte_mbuf));
+}
+
+/**
  * Returns TRUE if given mbuf is cloned by mbuf indirection, or FALSE
  * otherwise.
  *
@@ -1146,7 +1164,7 @@ rte_pktmbuf_pool_create(const char *name, unsigned n,
  *    - EEXIST - a memzone with the same name already exists
  *    - ENOMEM - no appropriate memory area found in which to create memzone
  */
-struct rte_mempool * __rte_experimental
+struct rte_mempool *
 rte_pktmbuf_pool_create_by_ops(const char *name, unsigned int n,
 	unsigned int cache_size, uint16_t priv_size, uint16_t data_room_size,
 	int socket_id, const char *ops_name);
diff --git a/lib/librte_mbuf/rte_mbuf_pool_ops.c b/lib/librte_mbuf/rte_mbuf_pool_ops.c
index a1d4699f..5722976f 100644
--- a/lib/librte_mbuf/rte_mbuf_pool_ops.c
+++ b/lib/librte_mbuf/rte_mbuf_pool_ops.c
@@ -9,7 +9,7 @@
 #include <rte_errno.h>
 #include <rte_mbuf_pool_ops.h>
 
-int __rte_experimental
+int
 rte_mbuf_set_platform_mempool_ops(const char *ops_name)
 {
 	const struct rte_memzone *mz;
@@ -35,7 +35,7 @@ rte_mbuf_set_platform_mempool_ops(const char *ops_name)
 	return -EEXIST;
 }
 
-const char * __rte_experimental
+const char *
 rte_mbuf_platform_mempool_ops(void)
 {
 	const struct rte_memzone *mz;
@@ -46,7 +46,7 @@ rte_mbuf_platform_mempool_ops(void)
 	return mz->addr;
 }
 
-int __rte_experimental
+int
 rte_mbuf_set_user_mempool_ops(const char *ops_name)
 {
 	const struct rte_memzone *mz;
@@ -67,7 +67,7 @@ rte_mbuf_set_user_mempool_ops(const char *ops_name)
 
 }
 
-const char * __rte_experimental
+const char *
 rte_mbuf_user_mempool_ops(void)
 {
 	const struct rte_memzone *mz;
@@ -79,7 +79,7 @@ rte_mbuf_user_mempool_ops(void)
 }
 
 /* Return mbuf pool ops name */
-const char * __rte_experimental
+const char *
 rte_mbuf_best_mempool_ops(void)
 {
 	/* User defined mempool ops takes the priority */
diff --git a/lib/librte_mbuf/rte_mbuf_pool_ops.h b/lib/librte_mbuf/rte_mbuf_pool_ops.h
index ebf5bf0f..7ed95a49 100644
--- a/lib/librte_mbuf/rte_mbuf_pool_ops.h
+++ b/lib/librte_mbuf/rte_mbuf_pool_ops.h
@@ -12,9 +12,6 @@
  * These APIs are for configuring the mbuf pool ops names to be largely used by
  * rte_pktmbuf_pool_create(). However, this can also be used to set and inquire
  * the best mempool ops available.
- *
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
  */
 
 #include <rte_compat.h>
@@ -34,7 +31,7 @@ extern "C" {
  *   - On success, zero.
  *   - On failure, a negative value.
  */
-int __rte_experimental
+int
 rte_mbuf_set_platform_mempool_ops(const char *ops_name);
 
 /**
@@ -46,7 +43,7 @@ rte_mbuf_set_platform_mempool_ops(const char *ops_name);
  *   - On success, platform pool ops name.
  *   - On failure, NULL.
  */
-const char * __rte_experimental
+const char *
 rte_mbuf_platform_mempool_ops(void);
 
 /**
@@ -60,7 +57,7 @@ rte_mbuf_platform_mempool_ops(void);
  *   - On success, zero.
  *   - On failure, a negative value.
  */
-int __rte_experimental
+int
 rte_mbuf_set_user_mempool_ops(const char *ops_name);
 
 /**
@@ -72,7 +69,7 @@ rte_mbuf_set_user_mempool_ops(const char *ops_name);
  *   - On success, user pool ops name..
  *   - On failure, NULL.
  */
-const char * __rte_experimental
+const char *
 rte_mbuf_user_mempool_ops(void);
 
 /**
@@ -87,7 +84,7 @@ rte_mbuf_user_mempool_ops(void);
  * @return
  *   returns preferred mbuf pool ops name
  */
-const char * __rte_experimental
+const char *
 rte_mbuf_best_mempool_ops(void);
 
 
diff --git a/lib/librte_mbuf/rte_mbuf_ptype.h b/lib/librte_mbuf/rte_mbuf_ptype.h
index 79ea3142..01acc66e 100644
--- a/lib/librte_mbuf/rte_mbuf_ptype.h
+++ b/lib/librte_mbuf/rte_mbuf_ptype.h
@@ -653,9 +653,9 @@ extern "C" {
 #define  RTE_ETH_IS_IPV4_HDR(ptype) ((ptype) & RTE_PTYPE_L3_IPV4)
 
 /**
- * Check if the (outer) L3 header is IPv4. To avoid comparing IPv4 types one by
- * one, bit 6 is selected to be used for IPv4 only. Then checking bit 6 can
- * determine if it is an IPV4 packet.
+ * Check if the (outer) L3 header is IPv6. To avoid comparing IPv6 types one by
+ * one, bit 6 is selected to be used for IPv6 only. Then checking bit 6 can
+ * determine if it is an IPV6 packet.
  */
 #define  RTE_ETH_IS_IPV6_HDR(ptype) ((ptype) & RTE_PTYPE_L3_IPV6)
 
diff --git a/lib/librte_mbuf/rte_mbuf_version.map b/lib/librte_mbuf/rte_mbuf_version.map
index 1bb9538d..cae68db8 100644
--- a/lib/librte_mbuf/rte_mbuf_version.map
+++ b/lib/librte_mbuf/rte_mbuf_version.map
@@ -35,7 +35,7 @@ DPDK_16.11 {
 
 } DPDK_2.1;
 
-EXPERIMENTAL {
+DPDK_18.08 {
 	global:
 
 	rte_mbuf_best_mempool_ops;
@@ -44,4 +44,4 @@ EXPERIMENTAL {
 	rte_mbuf_set_user_mempool_ops;
 	rte_mbuf_user_mempool_ops;
 	rte_pktmbuf_pool_create_by_ops;
-};
+} DPDK_16.11;
diff --git a/lib/librte_member/rte_member.c b/lib/librte_member/rte_member.c
index e147dd1f..702c01d3 100644
--- a/lib/librte_member/rte_member.c
+++ b/lib/librte_member/rte_member.c
@@ -297,10 +297,7 @@ rte_member_reset(const struct rte_member_setsum *setsum)
 	}
 }
 
-RTE_INIT(librte_member_init_log);
-
-static void
-librte_member_init_log(void)
+RTE_INIT(librte_member_init_log)
 {
 	librte_member_logtype = rte_log_register("lib.member");
 	if (librte_member_logtype >= 0)
diff --git a/lib/librte_mempool/Makefile b/lib/librte_mempool/Makefile
index e3c32b14..20bf63fb 100644
--- a/lib/librte_mempool/Makefile
+++ b/lib/librte_mempool/Makefile
@@ -7,15 +7,12 @@ include $(RTE_SDK)/mk/rte.vars.mk
 LIB = librte_mempool.a
 
 CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
-# Allow deprecated symbol to use deprecated rte_mempool_populate_iova_tab()
-# from earlier deprecated rte_mempool_populate_phys_tab()
-CFLAGS += -Wno-deprecated-declarations
 CFLAGS += -DALLOW_EXPERIMENTAL_API
 LDLIBS += -lrte_eal -lrte_ring
 
 EXPORT_MAP := rte_mempool_version.map
 
-LIBABIVER := 4
+LIBABIVER := 5
 
 # memseg walk is not yet part of stable API
 CFLAGS += -DALLOW_EXPERIMENTAL_API
diff --git a/lib/librte_mempool/meson.build b/lib/librte_mempool/meson.build
index d507e551..38d7ae89 100644
--- a/lib/librte_mempool/meson.build
+++ b/lib/librte_mempool/meson.build
@@ -5,17 +5,13 @@ allow_experimental_apis = true
 
 extra_flags = []
 
-# Allow deprecated symbol to use deprecated rte_mempool_populate_iova_tab()
-# from earlier deprecated rte_mempool_populate_phys_tab()
-extra_flags += '-Wno-deprecated-declarations'
-
 foreach flag: extra_flags
 	if cc.has_argument(flag)
 		cflags += flag
 	endif
 endforeach
 
-version = 4
+version = 5
 sources = files('rte_mempool.c', 'rte_mempool_ops.c',
 		'rte_mempool_ops_default.c')
 headers = files('rte_mempool.h')
diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
index 8c8b9f80..03e6b5f7 100644
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -225,93 +225,6 @@ rte_mempool_calc_obj_size(uint32_t elt_size, uint32_t flags,
 	return sz->total_size;
 }
 
-
-/*
- * Internal function to calculate required memory chunk size shared
- * by default implementation of the corresponding callback and
- * deprecated external function.
- */
-size_t
-rte_mempool_calc_mem_size_helper(uint32_t elt_num, size_t total_elt_sz,
-				 uint32_t pg_shift)
-{
-	size_t obj_per_page, pg_num, pg_sz;
-
-	if (total_elt_sz == 0)
-		return 0;
-
-	if (pg_shift == 0)
-		return total_elt_sz * elt_num;
-
-	pg_sz = (size_t)1 << pg_shift;
-	obj_per_page = pg_sz / total_elt_sz;
-	if (obj_per_page == 0)
-		return RTE_ALIGN_CEIL(total_elt_sz, pg_sz) * elt_num;
-
-	pg_num = (elt_num + obj_per_page - 1) / obj_per_page;
-	return pg_num << pg_shift;
-}
-
-/*
- * Calculate maximum amount of memory required to store given number of objects.
- */
-size_t
-rte_mempool_xmem_size(uint32_t elt_num, size_t total_elt_sz, uint32_t pg_shift,
-		      __rte_unused unsigned int flags)
-{
-	return rte_mempool_calc_mem_size_helper(elt_num, total_elt_sz,
-						pg_shift);
-}
-
-/*
- * Calculate how much memory would be actually required with the
- * given memory footprint to store required number of elements.
- */
-ssize_t
-rte_mempool_xmem_usage(__rte_unused void *vaddr, uint32_t elt_num,
-	size_t total_elt_sz, const rte_iova_t iova[], uint32_t pg_num,
-	uint32_t pg_shift, __rte_unused unsigned int flags)
-{
-	uint32_t elt_cnt = 0;
-	rte_iova_t start, end;
-	uint32_t iova_idx;
-	size_t pg_sz = (size_t)1 << pg_shift;
-
-	/* if iova is NULL, assume contiguous memory */
-	if (iova == NULL) {
-		start = 0;
-		end = pg_sz * pg_num;
-		iova_idx = pg_num;
-	} else {
-		start = iova[0];
-		end = iova[0] + pg_sz;
-		iova_idx = 1;
-	}
-	while (elt_cnt < elt_num) {
-
-		if (end - start >= total_elt_sz) {
-			/* enough contiguous memory, add an object */
-			start += total_elt_sz;
-			elt_cnt++;
-		} else if (iova_idx < pg_num) {
-			/* no room to store one obj, add a page */
-			if (end == iova[iova_idx]) {
-				end += pg_sz;
-			} else {
-				start = iova[iova_idx];
-				end = iova[iova_idx] + pg_sz;
-			}
-			iova_idx++;
-
-		} else {
-			/* no more page, return how many elements fit */
-			return -(size_t)elt_cnt;
-		}
-	}
-
-	return (size_t)iova_idx << pg_shift;
-}
-
 /* free a memchunk allocated with rte_memzone_reserve() */
 static void
 rte_mempool_memchunk_mz_free(__rte_unused struct rte_mempool_memhdr *memhdr,
@@ -423,63 +336,6 @@ fail:
 	return ret;
 }
 
-int
-rte_mempool_populate_phys(struct rte_mempool *mp, char *vaddr,
-	phys_addr_t paddr, size_t len, rte_mempool_memchunk_free_cb_t *free_cb,
-	void *opaque)
-{
-	return rte_mempool_populate_iova(mp, vaddr, paddr, len, free_cb, opaque);
-}
-
-/* Add objects in the pool, using a table of physical pages. Return the
- * number of objects added, or a negative value on error.
- */
-int
-rte_mempool_populate_iova_tab(struct rte_mempool *mp, char *vaddr,
-	const rte_iova_t iova[], uint32_t pg_num, uint32_t pg_shift,
-	rte_mempool_memchunk_free_cb_t *free_cb, void *opaque)
-{
-	uint32_t i, n;
-	int ret, cnt = 0;
-	size_t pg_sz = (size_t)1 << pg_shift;
-
-	/* mempool must not be populated */
-	if (mp->nb_mem_chunks != 0)
-		return -EEXIST;
-
-	if (mp->flags & MEMPOOL_F_NO_IOVA_CONTIG)
-		return rte_mempool_populate_iova(mp, vaddr, RTE_BAD_IOVA,
-			pg_num * pg_sz, free_cb, opaque);
-
-	for (i = 0; i < pg_num && mp->populated_size < mp->size; i += n) {
-
-		/* populate with the largest group of contiguous pages */
-		for (n = 1; (i + n) < pg_num &&
-			     iova[i + n - 1] + pg_sz == iova[i + n]; n++)
-			;
-
-		ret = rte_mempool_populate_iova(mp, vaddr + i * pg_sz,
-			iova[i], n * pg_sz, free_cb, opaque);
-		if (ret < 0) {
-			rte_mempool_free_memchunks(mp);
-			return ret;
-		}
-		/* no need to call the free callback for next chunks */
-		free_cb = NULL;
-		cnt += ret;
-	}
-	return cnt;
-}
-
-int
-rte_mempool_populate_phys_tab(struct rte_mempool *mp, char *vaddr,
-	const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift,
-	rte_mempool_memchunk_free_cb_t *free_cb, void *opaque)
-{
-	return rte_mempool_populate_iova_tab(mp, vaddr, paddr, pg_num, pg_shift,
-			free_cb, opaque);
-}
-
 /* Populate the mempool with a virtual area. Return the number of
  * objects added, or a negative value on error.
  */
@@ -916,6 +772,12 @@ rte_mempool_create_empty(const char *name, unsigned n, unsigned elt_size,
 
 	mempool_list = RTE_TAILQ_CAST(rte_mempool_tailq.head, rte_mempool_list);
 
+	/* asked for zero items */
+	if (n == 0) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
 	/* asked cache too big */
 	if (cache_size > RTE_MEMPOOL_CACHE_MAX_SIZE ||
 	    CALC_CACHE_FLUSHTHRESH(cache_size) > n) {
@@ -1065,66 +927,6 @@ rte_mempool_create(const char *name, unsigned n, unsigned elt_size,
 	return NULL;
 }
 
-/*
- * Create the mempool over already allocated chunk of memory.
- * That external memory buffer can consists of physically disjoint pages.
- * Setting vaddr to NULL, makes mempool to fallback to rte_mempool_create()
- * behavior.
- */
-struct rte_mempool *
-rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
-		unsigned cache_size, unsigned private_data_size,
-		rte_mempool_ctor_t *mp_init, void *mp_init_arg,
-		rte_mempool_obj_cb_t *obj_init, void *obj_init_arg,
-		int socket_id, unsigned flags, void *vaddr,
-		const rte_iova_t iova[], uint32_t pg_num, uint32_t pg_shift)
-{
-	struct rte_mempool *mp = NULL;
-	int ret;
-
-	/* no virtual address supplied, use rte_mempool_create() */
-	if (vaddr == NULL)
-		return rte_mempool_create(name, n, elt_size, cache_size,
-			private_data_size, mp_init, mp_init_arg,
-			obj_init, obj_init_arg, socket_id, flags);
-
-	/* check that we have both VA and PA */
-	if (iova == NULL) {
-		rte_errno = EINVAL;
-		return NULL;
-	}
-
-	/* Check that pg_shift parameter is valid. */
-	if (pg_shift > MEMPOOL_PG_SHIFT_MAX) {
-		rte_errno = EINVAL;
-		return NULL;
-	}
-
-	mp = rte_mempool_create_empty(name, n, elt_size, cache_size,
-		private_data_size, socket_id, flags);
-	if (mp == NULL)
-		return NULL;
-
-	/* call the mempool priv initializer */
-	if (mp_init)
-		mp_init(mp, mp_init_arg);
-
-	ret = rte_mempool_populate_iova_tab(mp, vaddr, iova, pg_num, pg_shift,
-		NULL, NULL);
-	if (ret < 0 || ret != (int)mp->size)
-		goto fail;
-
-	/* call the object initializers */
-	if (obj_init)
-		rte_mempool_obj_iter(mp, obj_init, obj_init_arg);
-
-	return mp;
-
- fail:
-	rte_mempool_free(mp);
-	return NULL;
-}
-
 /* Return the number of entries in the mempool */
 unsigned int
 rte_mempool_avail_count(const struct rte_mempool *mp)
diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index 1f59553b..7c9cd9a2 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -488,28 +488,6 @@ ssize_t rte_mempool_op_calc_mem_size_default(const struct rte_mempool *mp,
 		size_t *min_chunk_size, size_t *align);
 
 /**
- * @internal Helper function to calculate memory size required to store
- * specified number of objects in assumption that the memory buffer will
- * be aligned at page boundary.
- *
- * Note that if object size is bigger than page size, then it assumes
- * that pages are grouped in subsets of physically continuous pages big
- * enough to store at least one object.
- *
- * @param elt_num
- *   Number of elements.
- * @param total_elt_sz
- *   The size of each element, including header and trailer, as returned
- *   by rte_mempool_calc_obj_size().
- * @param pg_shift
- *   LOG2 of the physical pages size. If set to 0, ignore page boundaries.
- * @return
- *   Required memory size aligned at page boundary.
- */
-size_t rte_mempool_calc_mem_size_helper(uint32_t elt_num, size_t total_elt_sz,
-		uint32_t pg_shift);
-
-/**
  * Function to be called for each populated object.
  *
  * @param[in] mp
@@ -974,74 +952,6 @@ rte_mempool_create(const char *name, unsigned n, unsigned elt_size,
 		   int socket_id, unsigned flags);
 
 /**
- * @deprecated
- * Create a new mempool named *name* in memory.
- *
- * The pool contains n elements of elt_size. Its size is set to n.
- * This function uses ``memzone_reserve()`` to allocate the mempool header
- * (and the objects if vaddr is NULL).
- * Depending on the input parameters, mempool elements can be either allocated
- * together with the mempool header, or an externally provided memory buffer
- * could be used to store mempool objects. In later case, that external
- * memory buffer can consist of set of disjoint physical pages.
- *
- * @param name
- *   The name of the mempool.
- * @param n
- *   The number of elements in the mempool. The optimum size (in terms of
- *   memory usage) for a mempool is when n is a power of two minus one:
- *   n = (2^q - 1).
- * @param elt_size
- *   The size of each element.
- * @param cache_size
- *   Size of the cache. See rte_mempool_create() for details.
- * @param private_data_size
- *   The size of the private data appended after the mempool
- *   structure. This is useful for storing some private data after the
- *   mempool structure, as is done for rte_mbuf_pool for example.
- * @param mp_init
- *   A function pointer that is called for initialization of the pool,
- *   before object initialization. The user can initialize the private
- *   data in this function if needed. This parameter can be NULL if
- *   not needed.
- * @param mp_init_arg
- *   An opaque pointer to data that can be used in the mempool
- *   constructor function.
- * @param obj_init
- *   A function called for each object at initialization of the pool.
- *   See rte_mempool_create() for details.
- * @param obj_init_arg
- *   An opaque pointer passed to the object constructor function.
- * @param socket_id
- *   The *socket_id* argument is the socket identifier in the case of
- *   NUMA. The value can be *SOCKET_ID_ANY* if there is no NUMA
- *   constraint for the reserved zone.
- * @param flags
- *   Flags controlling the behavior of the mempool. See
- *   rte_mempool_create() for details.
- * @param vaddr
- *   Virtual address of the externally allocated memory buffer.
- *   Will be used to store mempool objects.
- * @param iova
- *   Array of IO addresses of the pages that comprises given memory buffer.
- * @param pg_num
- *   Number of elements in the iova array.
- * @param pg_shift
- *   LOG2 of the physical pages size.
- * @return
- *   The pointer to the new allocated mempool, on success. NULL on error
- *   with rte_errno set appropriately. See rte_mempool_create() for details.
- */
-__rte_deprecated
-struct rte_mempool *
-rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
-		unsigned cache_size, unsigned private_data_size,
-		rte_mempool_ctor_t *mp_init, void *mp_init_arg,
-		rte_mempool_obj_cb_t *obj_init, void *obj_init_arg,
-		int socket_id, unsigned flags, void *vaddr,
-		const rte_iova_t iova[], uint32_t pg_num, uint32_t pg_shift);
-
-/**
  * Create an empty mempool
  *
  * The mempool is allocated and initialized, but it is not populated: no
@@ -1123,48 +1033,6 @@ int rte_mempool_populate_iova(struct rte_mempool *mp, char *vaddr,
 	rte_iova_t iova, size_t len, rte_mempool_memchunk_free_cb_t *free_cb,
 	void *opaque);
 
-__rte_deprecated
-int rte_mempool_populate_phys(struct rte_mempool *mp, char *vaddr,
-	phys_addr_t paddr, size_t len, rte_mempool_memchunk_free_cb_t *free_cb,
-	void *opaque);
-
-/**
- * @deprecated
- * Add physical memory for objects in the pool at init
- *
- * Add a virtually contiguous memory chunk in the pool where objects can
- * be instantiated. The IO addresses corresponding to the virtual
- * area are described in iova[], pg_num, pg_shift.
- *
- * @param mp
- *   A pointer to the mempool structure.
- * @param vaddr
- *   The virtual address of memory that should be used to store objects.
- * @param iova
- *   An array of IO addresses of each page composing the virtual area.
- * @param pg_num
- *   Number of elements in the iova array.
- * @param pg_shift
- *   LOG2 of the physical pages size.
- * @param free_cb
- *   The callback used to free this chunk when destroying the mempool.
- * @param opaque
- *   An opaque argument passed to free_cb.
- * @return
- *   The number of objects added on success.
- *   On error, the chunks are not added in the memory list of the
- *   mempool and a negative errno is returned.
- */
-__rte_deprecated
-int rte_mempool_populate_iova_tab(struct rte_mempool *mp, char *vaddr,
-	const rte_iova_t iova[], uint32_t pg_num, uint32_t pg_shift,
-	rte_mempool_memchunk_free_cb_t *free_cb, void *opaque);
-
-__rte_deprecated
-int rte_mempool_populate_phys_tab(struct rte_mempool *mp, char *vaddr,
-	const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift,
-	rte_mempool_memchunk_free_cb_t *free_cb, void *opaque);
-
 /**
  * Add virtually contiguous memory for objects in the pool at init
  *
@@ -1746,13 +1614,6 @@ rte_mempool_virt2iova(const void *elt)
 	return hdr->iova;
 }
 
-__rte_deprecated
-static inline phys_addr_t
-rte_mempool_virt2phy(__rte_unused const struct rte_mempool *mp, const void *elt)
-{
-	return rte_mempool_virt2iova(elt);
-}
-
 /**
  * Check the consistency of mempool objects.
  *
@@ -1822,68 +1683,6 @@ uint32_t rte_mempool_calc_obj_size(uint32_t elt_size, uint32_t flags,
 	struct rte_mempool_objsz *sz);
 
 /**
- * @deprecated
- * Get the size of memory required to store mempool elements.
- *
- * Calculate the maximum amount of memory required to store given number
- * of objects. Assume that the memory buffer will be aligned at page
- * boundary.
- *
- * Note that if object size is bigger than page size, then it assumes
- * that pages are grouped in subsets of physically continuous pages big
- * enough to store at least one object.
- *
- * @param elt_num
- *   Number of elements.
- * @param total_elt_sz
- *   The size of each element, including header and trailer, as returned
- *   by rte_mempool_calc_obj_size().
- * @param pg_shift
- *   LOG2 of the physical pages size. If set to 0, ignore page boundaries.
- * @param flags
- *  The mempool flags.
- * @return
- *   Required memory size aligned at page boundary.
- */
-__rte_deprecated
-size_t rte_mempool_xmem_size(uint32_t elt_num, size_t total_elt_sz,
-	uint32_t pg_shift, unsigned int flags);
-
-/**
- * @deprecated
- * Get the size of memory required to store mempool elements.
- *
- * Calculate how much memory would be actually required with the given
- * memory footprint to store required number of objects.
- *
- * @param vaddr
- *   Virtual address of the externally allocated memory buffer.
- *   Will be used to store mempool objects.
- * @param elt_num
- *   Number of elements.
- * @param total_elt_sz
- *   The size of each element, including header and trailer, as returned
- *   by rte_mempool_calc_obj_size().
- * @param iova
- *   Array of IO addresses of the pages that comprises given memory buffer.
- * @param pg_num
- *   Number of elements in the iova array.
- * @param pg_shift
- *   LOG2 of the physical pages size.
- * @param flags
- *  The mempool flags.
- * @return
- *   On success, the number of bytes needed to store given number of
- *   objects, aligned to the given page size. If the provided memory
- *   buffer is too small, return a negative value whose absolute value
- *   is the actual number of elements that can be stored in that buffer.
- */
-__rte_deprecated
-ssize_t rte_mempool_xmem_usage(void *vaddr, uint32_t elt_num,
-	size_t total_elt_sz, const rte_iova_t iova[], uint32_t pg_num,
-	uint32_t pg_shift, unsigned int flags);
-
-/**
  * Walk list of all memory pools
  *
  * @param func
diff --git a/lib/librte_mempool/rte_mempool_ops_default.c b/lib/librte_mempool/rte_mempool_ops_default.c
index fd63ca13..4e2bfc82 100644
--- a/lib/librte_mempool/rte_mempool_ops_default.c
+++ b/lib/librte_mempool/rte_mempool_ops_default.c
@@ -12,12 +12,31 @@ rte_mempool_op_calc_mem_size_default(const struct rte_mempool *mp,
 				     size_t *min_chunk_size, size_t *align)
 {
 	size_t total_elt_sz;
+	size_t obj_per_page, pg_num, pg_sz;
 	size_t mem_size;
 
 	total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
-
-	mem_size = rte_mempool_calc_mem_size_helper(obj_num, total_elt_sz,
-						    pg_shift);
+	if (total_elt_sz == 0) {
+		mem_size = 0;
+	} else if (pg_shift == 0) {
+		mem_size = total_elt_sz * obj_num;
+	} else {
+		pg_sz = (size_t)1 << pg_shift;
+		obj_per_page = pg_sz / total_elt_sz;
+		if (obj_per_page == 0) {
+			/*
+			 * Note that if object size is bigger than page size,
+			 * then it is assumed that pages are grouped in subsets
+			 * of physically continuous pages big enough to store
+			 * at least one object.
+			 */
+			mem_size =
+				RTE_ALIGN_CEIL(total_elt_sz, pg_sz) * obj_num;
+		} else {
+			pg_num = (obj_num + obj_per_page - 1) / obj_per_page;
+			mem_size = pg_num << pg_shift;
+		}
+	}
 
 	*min_chunk_size = RTE_MAX((size_t)1 << pg_shift, total_elt_sz);
 
diff --git a/lib/librte_mempool/rte_mempool_version.map b/lib/librte_mempool/rte_mempool_version.map
index 7091b954..17cbca46 100644
--- a/lib/librte_mempool/rte_mempool_version.map
+++ b/lib/librte_mempool/rte_mempool_version.map
@@ -8,9 +8,6 @@ DPDK_2.0 {
 	rte_mempool_list_dump;
 	rte_mempool_lookup;
 	rte_mempool_walk;
-	rte_mempool_xmem_create;
-	rte_mempool_xmem_size;
-	rte_mempool_xmem_usage;
 
 	local: *;
 };
@@ -34,8 +31,6 @@ DPDK_16.07 {
 	rte_mempool_ops_table;
 	rte_mempool_populate_anon;
 	rte_mempool_populate_default;
-	rte_mempool_populate_phys;
-	rte_mempool_populate_phys_tab;
 	rte_mempool_populate_virt;
 	rte_mempool_register_ops;
 	rte_mempool_set_ops_byname;
@@ -46,7 +41,6 @@ DPDK_17.11 {
 	global:
 
 	rte_mempool_populate_iova;
-	rte_mempool_populate_iova_tab;
 
 } DPDK_16.07;
 
diff --git a/lib/librte_meter/rte_meter.c b/lib/librte_meter/rte_meter.c
index 59af5ef2..473f69ab 100644
--- a/lib/librte_meter/rte_meter.c
+++ b/lib/librte_meter/rte_meter.c
@@ -30,7 +30,7 @@ rte_meter_get_tb_params(uint64_t hz, uint64_t rate, uint64_t *tb_period, uint64_
 	}
 }
 
-int __rte_experimental
+int
 rte_meter_srtcm_profile_config(struct rte_meter_srtcm_profile *p,
 	struct rte_meter_srtcm_params *params)
 {
@@ -68,7 +68,7 @@ rte_meter_srtcm_config(struct rte_meter_srtcm *m,
 	return 0;
 }
 
-int __rte_experimental
+int
 rte_meter_trtcm_profile_config(struct rte_meter_trtcm_profile *p,
 	struct rte_meter_trtcm_params *params)
 {
diff --git a/lib/librte_meter/rte_meter.h b/lib/librte_meter/rte_meter.h
index 03d80566..58a05158 100644
--- a/lib/librte_meter/rte_meter.h
+++ b/lib/librte_meter/rte_meter.h
@@ -20,7 +20,6 @@ extern "C" {
  ***/
 
 #include <stdint.h>
-#include <rte_compat.h>
 
 /*
  * Application Programmer's Interface (API)
@@ -82,7 +81,7 @@ struct rte_meter_trtcm;
  * @return
  *    0 upon success, error code otherwise
  */
-int __rte_experimental
+int
 rte_meter_srtcm_profile_config(struct rte_meter_srtcm_profile *p,
 	struct rte_meter_srtcm_params *params);
 
@@ -96,7 +95,7 @@ rte_meter_srtcm_profile_config(struct rte_meter_srtcm_profile *p,
  * @return
  *    0 upon success, error code otherwise
  */
-int __rte_experimental
+int
 rte_meter_trtcm_profile_config(struct rte_meter_trtcm_profile *p,
 	struct rte_meter_trtcm_params *params);
 
diff --git a/lib/librte_meter/rte_meter_version.map b/lib/librte_meter/rte_meter_version.map
index 9215d4cb..cb79f0c2 100644
--- a/lib/librte_meter/rte_meter_version.map
+++ b/lib/librte_meter/rte_meter_version.map
@@ -11,7 +11,7 @@ DPDK_2.0 {
 	local: *;
 };
 
-EXPERIMENTAL {
+DPDK_18.08 {
 	global:
 
 	rte_meter_srtcm_profile_config;
diff --git a/lib/librte_metrics/rte_metrics.c b/lib/librte_metrics/rte_metrics.c
index 258f0582..99a96b65 100644
--- a/lib/librte_metrics/rte_metrics.c
+++ b/lib/librte_metrics/rte_metrics.c
@@ -96,6 +96,9 @@ rte_metrics_reg_names(const char * const *names, uint16_t cnt_names)
 	/* Some sanity checks */
 	if (cnt_names < 1 || names == NULL)
 		return -EINVAL;
+	for (idx_name = 0; idx_name < cnt_names; idx_name++)
+		if (names[idx_name] == NULL)
+			return -EINVAL;
 
 	memzone = rte_memzone_lookup(RTE_METRICS_MEMZONE_NAME);
 	if (memzone == NULL)
@@ -159,6 +162,11 @@ rte_metrics_update_values(int port_id,
 	stats = memzone->addr;
 
 	rte_spinlock_lock(&stats->lock);
+
+	if (key >= stats->cnt_stats) {
+		rte_spinlock_unlock(&stats->lock);
+		return -EINVAL;
+	}
 	idx_metric = key;
 	cnt_setsize = 1;
 	while (idx_metric < stats->cnt_stats) {
@@ -200,9 +208,8 @@ rte_metrics_get_names(struct rte_metric_name *names,
 	int return_value;
 
 	memzone = rte_memzone_lookup(RTE_METRICS_MEMZONE_NAME);
-	/* If not allocated, fail silently */
 	if (memzone == NULL)
-		return 0;
+		return -EIO;
 
 	stats = memzone->addr;
 	rte_spinlock_lock(&stats->lock);
@@ -238,9 +245,9 @@ rte_metrics_get_values(int port_id,
 		return -EINVAL;
 
 	memzone = rte_memzone_lookup(RTE_METRICS_MEMZONE_NAME);
-	/* If not allocated, fail silently */
 	if (memzone == NULL)
-		return 0;
+		return -EIO;
+
 	stats = memzone->addr;
 	rte_spinlock_lock(&stats->lock);
 
diff --git a/lib/librte_net/rte_ip.h b/lib/librte_net/rte_ip.h
index 72dc2456..f2a8904a 100644
--- a/lib/librte_net/rte_ip.h
+++ b/lib/librte_net/rte_ip.h
@@ -108,25 +108,25 @@ __rte_raw_cksum(const void *buf, size_t len, uint32_t sum)
 	/* workaround gcc strict-aliasing warning */
 	uintptr_t ptr = (uintptr_t)buf;
 	typedef uint16_t __attribute__((__may_alias__)) u16_p;
-	const u16_p *u16 = (const u16_p *)ptr;
-
-	while (len >= (sizeof(*u16) * 4)) {
-		sum += u16[0];
-		sum += u16[1];
-		sum += u16[2];
-		sum += u16[3];
-		len -= sizeof(*u16) * 4;
-		u16 += 4;
+	const u16_p *u16_buf = (const u16_p *)ptr;
+
+	while (len >= (sizeof(*u16_buf) * 4)) {
+		sum += u16_buf[0];
+		sum += u16_buf[1];
+		sum += u16_buf[2];
+		sum += u16_buf[3];
+		len -= sizeof(*u16_buf) * 4;
+		u16_buf += 4;
 	}
-	while (len >= sizeof(*u16)) {
-		sum += *u16;
-		len -= sizeof(*u16);
-		u16 += 1;
+	while (len >= sizeof(*u16_buf)) {
+		sum += *u16_buf;
+		len -= sizeof(*u16_buf);
+		u16_buf += 1;
 	}
 
 	/* if length is in odd bytes */
 	if (len == 1)
-		sum += *((const uint8_t *)u16);
+		sum += *((const uint8_t *)u16_buf);
 
 	return sum;
 }
diff --git a/lib/librte_power/channel_commands.h b/lib/librte_power/channel_commands.h
index 5e8b4ab5..ee638eef 100644
--- a/lib/librte_power/channel_commands.h
+++ b/lib/librte_power/channel_commands.h
@@ -48,7 +48,8 @@ enum workload {HIGH, MEDIUM, LOW};
 enum policy_to_use {
 	TRAFFIC,
 	TIME,
-	WORKLOAD
+	WORKLOAD,
+	BRANCH_RATIO
 };
 
 struct traffic {
diff --git a/lib/librte_power/power_acpi_cpufreq.c b/lib/librte_power/power_acpi_cpufreq.c
index bce933e9..cd5978d5 100644
--- a/lib/librte_power/power_acpi_cpufreq.c
+++ b/lib/librte_power/power_acpi_cpufreq.c
@@ -623,3 +623,24 @@ power_acpi_disable_turbo(unsigned int lcore_id)
 
 	return 0;
 }
+
+int power_acpi_get_capabilities(unsigned int lcore_id,
+		struct rte_power_core_capabilities *caps)
+{
+	struct rte_power_info *pi;
+
+	if (lcore_id >= RTE_MAX_LCORE) {
+		RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
+		return -1;
+	}
+	if (caps == NULL) {
+		RTE_LOG(ERR, POWER, "Invalid argument\n");
+		return -1;
+	}
+
+	pi = &lcore_power_info[lcore_id];
+	caps->capabilities = 0;
+	caps->turbo = !!(pi->turbo_available);
+
+	return 0;
+}
diff --git a/lib/librte_power/power_acpi_cpufreq.h b/lib/librte_power/power_acpi_cpufreq.h
index edeeb27a..1af74160 100644
--- a/lib/librte_power/power_acpi_cpufreq.h
+++ b/lib/librte_power/power_acpi_cpufreq.h
@@ -14,6 +14,7 @@
 #include <rte_byteorder.h>
 #include <rte_log.h>
 #include <rte_string_fns.h>
+#include "rte_power.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -196,6 +197,21 @@ int power_acpi_enable_turbo(unsigned int lcore_id);
  */
 int power_acpi_disable_turbo(unsigned int lcore_id);
 
+/**
+ * Returns power capabilities for a specific lcore.
+ *
+ * @param lcore_id
+ *  lcore id.
+ * @param caps
+ *  pointer to rte_power_core_capabilities object.
+ *
+ * @return
+ *  - 0 on success.
+ *  - Negative on error.
+ */
+int power_acpi_get_capabilities(unsigned int lcore_id,
+		struct rte_power_core_capabilities *caps);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_power/power_kvm_vm.c b/lib/librte_power/power_kvm_vm.c
index 38e9066f..20659b72 100644
--- a/lib/librte_power/power_kvm_vm.c
+++ b/lib/librte_power/power_kvm_vm.c
@@ -124,3 +124,11 @@ power_kvm_vm_disable_turbo(unsigned int lcore_id)
 {
 	return send_msg(lcore_id, CPU_POWER_DISABLE_TURBO);
 }
+
+struct rte_power_core_capabilities;
+int power_kvm_vm_get_capabilities(__rte_unused unsigned int lcore_id,
+		__rte_unused struct rte_power_core_capabilities *caps)
+{
+	RTE_LOG(ERR, POWER, "rte_power_get_capabilities is not implemented for Virtual Machine Power Management\n");
+	return -ENOTSUP;
+}
diff --git a/lib/librte_power/power_kvm_vm.h b/lib/librte_power/power_kvm_vm.h
index 446d6997..94d4aa12 100644
--- a/lib/librte_power/power_kvm_vm.h
+++ b/lib/librte_power/power_kvm_vm.h
@@ -14,6 +14,7 @@
 #include <rte_byteorder.h>
 #include <rte_log.h>
 #include <rte_string_fns.h>
+#include "rte_power.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -177,6 +178,22 @@ int power_kvm_vm_enable_turbo(unsigned int lcore_id);
  *  - Negative on error.
  */
 int power_kvm_vm_disable_turbo(unsigned int lcore_id);
+
+/**
+ * Returns power capabilities for a specific lcore.
+ *
+ * @param lcore_id
+ *  lcore id.
+ * @param caps
+ *  pointer to rte_power_core_capabilities object.
+ *
+ * @return
+ *  - 0 on success.
+ *  - Negative on error.
+ */
+int power_kvm_vm_get_capabilities(unsigned int lcore_id,
+		struct rte_power_core_capabilities *caps);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_power/rte_power.c b/lib/librte_power/rte_power.c
index 6c8fb403..208b7919 100644
--- a/lib/librte_power/rte_power.c
+++ b/lib/librte_power/rte_power.c
@@ -24,6 +24,7 @@ rte_power_freq_change_t rte_power_freq_min = NULL;
 rte_power_freq_change_t rte_power_turbo_status;
 rte_power_freq_change_t rte_power_freq_enable_turbo;
 rte_power_freq_change_t rte_power_freq_disable_turbo;
+rte_power_get_capabilities_t rte_power_get_capabilities;
 
 int
 rte_power_set_env(enum power_management_env env)
@@ -42,6 +43,7 @@ rte_power_set_env(enum power_management_env env)
 		rte_power_turbo_status = power_acpi_turbo_status;
 		rte_power_freq_enable_turbo = power_acpi_enable_turbo;
 		rte_power_freq_disable_turbo = power_acpi_disable_turbo;
+		rte_power_get_capabilities = power_acpi_get_capabilities;
 	} else if (env == PM_ENV_KVM_VM) {
 		rte_power_freqs = power_kvm_vm_freqs;
 		rte_power_get_freq = power_kvm_vm_get_freq;
@@ -53,6 +55,7 @@ rte_power_set_env(enum power_management_env env)
 		rte_power_turbo_status = power_kvm_vm_turbo_status;
 		rte_power_freq_enable_turbo = power_kvm_vm_enable_turbo;
 		rte_power_freq_disable_turbo = power_kvm_vm_disable_turbo;
+		rte_power_get_capabilities = power_kvm_vm_get_capabilities;
 	} else {
 		RTE_LOG(ERR, POWER, "Invalid Power Management Environment(%d) set\n",
 				env);
diff --git a/lib/librte_power/rte_power.h b/lib/librte_power/rte_power.h
index b4b7357b..d70bc0b3 100644
--- a/lib/librte_power/rte_power.h
+++ b/lib/librte_power/rte_power.h
@@ -247,6 +247,38 @@ extern rte_power_freq_change_t rte_power_freq_enable_turbo;
  */
 extern rte_power_freq_change_t rte_power_freq_disable_turbo;
 
+/**
+ * Power capabilities summary.
+ */
+struct rte_power_core_capabilities {
+	RTE_STD_C11
+	union {
+		uint64_t capabilities;
+		RTE_STD_C11
+		struct {
+			uint64_t turbo:1;	/**< Turbo can be enabled. */
+		};
+	};
+};
+
+/**
+ * Returns power capabilities for a specific lcore.
+ * Function pointer definition. Review each environments
+ * specific documentation for usage.
+ *
+ * @param lcore_id
+ *  lcore id.
+ * @param caps
+ *  pointer to rte_power_core_capabilities object.
+ *
+ * @return
+ *  - 0 on success.
+ *  - Negative on error.
+ */
+typedef int (*rte_power_get_capabilities_t)(unsigned int lcore_id,
+		struct rte_power_core_capabilities *caps);
+
+extern rte_power_get_capabilities_t rte_power_get_capabilities;
 
 #ifdef __cplusplus
 }
diff --git a/lib/librte_power/rte_power_version.map b/lib/librte_power/rte_power_version.map
index 96dc42ec..dd587dfb 100644
--- a/lib/librte_power/rte_power_version.map
+++ b/lib/librte_power/rte_power_version.map
@@ -25,4 +25,11 @@ DPDK_17.11 {
 	rte_power_freq_enable_turbo;
 	rte_power_turbo_status;
 
-} DPDK_2.0;
-\ No newline at end of file
+} DPDK_2.0;
+
+DPDK_18.08 {
+	global:
+
+	rte_power_get_capabilities;
+
+} DPDK_17.11;
diff --git a/lib/librte_rawdev/Makefile b/lib/librte_rawdev/Makefile
index b9105b06..addb288d 100644
--- a/lib/librte_rawdev/Makefile
+++ b/lib/librte_rawdev/Makefile
@@ -10,7 +10,6 @@ LIB = librte_rawdev.a
 LIBABIVER := 1
 
 # build flags
-CFLAGS += -DALLOW_EXPERIMENTAL_API
 CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS)
 LDLIBS += -lrte_eal
diff --git a/lib/librte_rawdev/meson.build b/lib/librte_rawdev/meson.build
index dcd37ad4..a20fbdc0 100644
--- a/lib/librte_rawdev/meson.build
+++ b/lib/librte_rawdev/meson.build
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2018 Intel Corporation
 
-allow_experimental_apis = true
 sources = files('rte_rawdev.c')
 headers = files('rte_rawdev.h', 'rte_rawdev_pmd.h')
diff --git a/lib/librte_rawdev/rte_rawdev.c b/lib/librte_rawdev/rte_rawdev.c
index 284e6aec..62b6b97e 100644
--- a/lib/librte_rawdev/rte_rawdev.c
+++ b/lib/librte_rawdev/rte_rawdev.c
@@ -46,13 +46,13 @@ static struct rte_rawdev_global rawdev_globals = {
 struct rte_rawdev_global *rte_rawdev_globals = &rawdev_globals;
 
 /* Raw device, northbound API implementation */
-uint8_t __rte_experimental
+uint8_t
 rte_rawdev_count(void)
 {
 	return rte_rawdev_globals->nb_devs;
 }
 
-uint16_t __rte_experimental
+uint16_t
 rte_rawdev_get_dev_id(const char *name)
 {
 	uint16_t i;
@@ -69,7 +69,7 @@ rte_rawdev_get_dev_id(const char *name)
 	return -ENODEV;
 }
 
-int __rte_experimental
+int
 rte_rawdev_socket_id(uint16_t dev_id)
 {
 	struct rte_rawdev *dev;
@@ -80,7 +80,7 @@ rte_rawdev_socket_id(uint16_t dev_id)
 	return dev->socket_id;
 }
 
-int __rte_experimental
+int
 rte_rawdev_info_get(uint16_t dev_id, struct rte_rawdev_info *dev_info)
 {
 	struct rte_rawdev *rawdev;
@@ -102,7 +102,7 @@ rte_rawdev_info_get(uint16_t dev_id, struct rte_rawdev_info *dev_info)
 	return 0;
 }
 
-int __rte_experimental
+int
 rte_rawdev_configure(uint16_t dev_id, struct rte_rawdev_info *dev_conf)
 {
 	struct rte_rawdev *dev;
@@ -131,7 +131,7 @@ rte_rawdev_configure(uint16_t dev_id, struct rte_rawdev_info *dev_conf)
 	return diag;
 }
 
-int __rte_experimental
+int
 rte_rawdev_queue_conf_get(uint16_t dev_id,
 			  uint16_t queue_id,
 			  rte_rawdev_obj_t queue_conf)
@@ -146,7 +146,7 @@ rte_rawdev_queue_conf_get(uint16_t dev_id,
 	return 0;
 }
 
-int __rte_experimental
+int
 rte_rawdev_queue_setup(uint16_t dev_id,
 		       uint16_t queue_id,
 		       rte_rawdev_obj_t queue_conf)
@@ -160,7 +160,7 @@ rte_rawdev_queue_setup(uint16_t dev_id,
 	return (*dev->dev_ops->queue_setup)(dev, queue_id, queue_conf);
 }
 
-int __rte_experimental
+int
 rte_rawdev_queue_release(uint16_t dev_id, uint16_t queue_id)
 {
 	struct rte_rawdev *dev;
@@ -172,7 +172,19 @@ rte_rawdev_queue_release(uint16_t dev_id, uint16_t queue_id)
 	return (*dev->dev_ops->queue_release)(dev, queue_id);
 }
 
-int __rte_experimental
+uint16_t
+rte_rawdev_queue_count(uint16_t dev_id)
+{
+	struct rte_rawdev *dev;
+
+	RTE_RAWDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL);
+	dev = &rte_rawdevs[dev_id];
+
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->queue_count, -ENOTSUP);
+	return (*dev->dev_ops->queue_count)(dev);
+}
+
+int
 rte_rawdev_get_attr(uint16_t dev_id,
 		    const char *attr_name,
 		    uint64_t *attr_value)
@@ -186,7 +198,7 @@ rte_rawdev_get_attr(uint16_t dev_id,
 	return (*dev->dev_ops->attr_get)(dev, attr_name, attr_value);
 }
 
-int __rte_experimental
+int
 rte_rawdev_set_attr(uint16_t dev_id,
 		    const char *attr_name,
 		    const uint64_t attr_value)
@@ -200,7 +212,7 @@ rte_rawdev_set_attr(uint16_t dev_id,
 	return (*dev->dev_ops->attr_set)(dev, attr_name, attr_value);
 }
 
-int __rte_experimental
+int
 rte_rawdev_enqueue_buffers(uint16_t dev_id,
 			   struct rte_rawdev_buf **buffers,
 			   unsigned int count,
@@ -215,7 +227,7 @@ rte_rawdev_enqueue_buffers(uint16_t dev_id,
 	return (*dev->dev_ops->enqueue_bufs)(dev, buffers, count, context);
 }
 
-int __rte_experimental
+int
 rte_rawdev_dequeue_buffers(uint16_t dev_id,
 			   struct rte_rawdev_buf **buffers,
 			   unsigned int count,
@@ -230,7 +242,7 @@ rte_rawdev_dequeue_buffers(uint16_t dev_id,
 	return (*dev->dev_ops->dequeue_bufs)(dev, buffers, count, context);
 }
 
-int __rte_experimental
+int
 rte_rawdev_dump(uint16_t dev_id, FILE *f)
 {
 	struct rte_rawdev *dev;
@@ -251,7 +263,7 @@ xstats_get_count(uint16_t dev_id)
 	return (*dev->dev_ops->xstats_get_names)(dev, NULL, 0);
 }
 
-int __rte_experimental
+int
 rte_rawdev_xstats_names_get(uint16_t dev_id,
 		struct rte_rawdev_xstats_name *xstats_names,
 		unsigned int size)
@@ -274,7 +286,7 @@ rte_rawdev_xstats_names_get(uint16_t dev_id,
 }
 
 /* retrieve rawdev extended statistics */
-int __rte_experimental
+int
 rte_rawdev_xstats_get(uint16_t dev_id,
 		      const unsigned int ids[],
 		      uint64_t values[],
@@ -287,7 +299,7 @@ rte_rawdev_xstats_get(uint16_t dev_id,
 	return (*dev->dev_ops->xstats_get)(dev, ids, values, n);
 }
 
-uint64_t __rte_experimental
+uint64_t
 rte_rawdev_xstats_by_name_get(uint16_t dev_id,
 			      const char *name,
 			      unsigned int *id)
@@ -306,7 +318,7 @@ rte_rawdev_xstats_by_name_get(uint16_t dev_id,
 	return (*dev->dev_ops->xstats_get_by_name)(dev, name, id);
 }
 
-int __rte_experimental
+int
 rte_rawdev_xstats_reset(uint16_t dev_id,
 			const uint32_t ids[], uint32_t nb_ids)
 {
@@ -317,7 +329,7 @@ rte_rawdev_xstats_reset(uint16_t dev_id,
 	return (*dev->dev_ops->xstats_reset)(dev, ids, nb_ids);
 }
 
-int __rte_experimental
+int
 rte_rawdev_firmware_status_get(uint16_t dev_id, rte_rawdev_obj_t status_info)
 {
 	RTE_RAWDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL);
@@ -327,7 +339,7 @@ rte_rawdev_firmware_status_get(uint16_t dev_id, rte_rawdev_obj_t status_info)
 	return (*dev->dev_ops->firmware_status_get)(dev, status_info);
 }
 
-int __rte_experimental
+int
 rte_rawdev_firmware_version_get(uint16_t dev_id, rte_rawdev_obj_t version_info)
 {
 	RTE_RAWDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL);
@@ -337,7 +349,7 @@ rte_rawdev_firmware_version_get(uint16_t dev_id, rte_rawdev_obj_t version_info)
 	return (*dev->dev_ops->firmware_version_get)(dev, version_info);
 }
 
-int __rte_experimental
+int
 rte_rawdev_firmware_load(uint16_t dev_id, rte_rawdev_obj_t firmware_image)
 {
 	RTE_RAWDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL);
@@ -350,7 +362,7 @@ rte_rawdev_firmware_load(uint16_t dev_id, rte_rawdev_obj_t firmware_image)
 	return (*dev->dev_ops->firmware_load)(dev, firmware_image);
 }
 
-int __rte_experimental
+int
 rte_rawdev_firmware_unload(uint16_t dev_id)
 {
 	RTE_RAWDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL);
@@ -360,7 +372,7 @@ rte_rawdev_firmware_unload(uint16_t dev_id)
 	return (*dev->dev_ops->firmware_unload)(dev);
 }
 
-int __rte_experimental
+int
 rte_rawdev_selftest(uint16_t dev_id)
 {
 	RTE_RAWDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL);
@@ -370,7 +382,7 @@ rte_rawdev_selftest(uint16_t dev_id)
 	return (*dev->dev_ops->dev_selftest)();
 }
 
-int __rte_experimental
+int
 rte_rawdev_start(uint16_t dev_id)
 {
 	struct rte_rawdev *dev;
@@ -397,7 +409,7 @@ rte_rawdev_start(uint16_t dev_id)
 	return 0;
 }
 
-void __rte_experimental
+void
 rte_rawdev_stop(uint16_t dev_id)
 {
 	struct rte_rawdev *dev;
@@ -419,7 +431,7 @@ rte_rawdev_stop(uint16_t dev_id)
 	dev->started = 0;
 }
 
-int __rte_experimental
+int
 rte_rawdev_close(uint16_t dev_id)
 {
 	struct rte_rawdev *dev;
@@ -438,7 +450,7 @@ rte_rawdev_close(uint16_t dev_id)
 	return (*dev->dev_ops->dev_close)(dev);
 }
 
-int __rte_experimental
+int
 rte_rawdev_reset(uint16_t dev_id)
 {
 	struct rte_rawdev *dev;
@@ -465,7 +477,7 @@ rte_rawdev_find_free_device_index(void)
 	return RTE_RAWDEV_MAX_DEVS;
 }
 
-struct rte_rawdev * __rte_experimental
+struct rte_rawdev *
 rte_rawdev_pmd_allocate(const char *name, size_t dev_priv_size, int socket_id)
 {
 	struct rte_rawdev *rawdev;
@@ -506,7 +518,7 @@ rte_rawdev_pmd_allocate(const char *name, size_t dev_priv_size, int socket_id)
 	return rawdev;
 }
 
-int __rte_experimental
+int
 rte_rawdev_pmd_release(struct rte_rawdev *rawdev)
 {
 	int ret;
@@ -532,10 +544,7 @@ rte_rawdev_pmd_release(struct rte_rawdev *rawdev)
 	return 0;
 }
 
-RTE_INIT(librawdev_init_log);
-
-static void
-librawdev_init_log(void)
+RTE_INIT(librawdev_init_log)
 {
 	librawdev_logtype = rte_log_register("lib.rawdev");
 	if (librawdev_logtype >= 0)
diff --git a/lib/librte_rawdev/rte_rawdev.h b/lib/librte_rawdev/rte_rawdev.h
index 2e14919b..684bfdb8 100644
--- a/lib/librte_rawdev/rte_rawdev.h
+++ b/lib/librte_rawdev/rte_rawdev.h
@@ -35,7 +35,7 @@ typedef void *rte_rawdev_obj_t;
  * @return
  *   The total number of usable raw devices.
  */
-uint8_t __rte_experimental
+uint8_t
 rte_rawdev_count(void);
 
 /**
@@ -48,7 +48,7 @@ rte_rawdev_count(void);
  *   Returns raw device identifier on success.
  *   - <0: Failure to find named raw device.
  */
-uint16_t __rte_experimental
+uint16_t
 rte_rawdev_get_dev_id(const char *name);
 
 /**
@@ -61,7 +61,7 @@ rte_rawdev_get_dev_id(const char *name);
  *   a default of zero if the socket could not be determined.
  *   -(-EINVAL)  dev_id value is out of range.
  */
-int __rte_experimental
+int
 rte_rawdev_socket_id(uint16_t dev_id);
 
 /**
@@ -84,7 +84,7 @@ struct rte_rawdev_info;
  *   - <0: Error code returned by the driver info get function.
  *
  */
-int __rte_experimental
+int
 rte_rawdev_info_get(uint16_t dev_id, struct rte_rawdev_info *dev_info);
 
 /**
@@ -111,7 +111,7 @@ rte_rawdev_info_get(uint16_t dev_id, struct rte_rawdev_info *dev_info);
  *   - 0: Success, device configured.
  *   - <0: Error code returned by the driver configuration function.
  */
-int __rte_experimental
+int
 rte_rawdev_configure(uint16_t dev_id, struct rte_rawdev_info *dev_conf);
 
 
@@ -137,7 +137,7 @@ rte_rawdev_configure(uint16_t dev_id, struct rte_rawdev_info *dev_conf);
  * @see rte_raw_queue_setup()
  *
  */
-int __rte_experimental
+int
 rte_rawdev_queue_conf_get(uint16_t dev_id,
 			  uint16_t queue_id,
 			  rte_rawdev_obj_t queue_conf);
@@ -160,7 +160,7 @@ rte_rawdev_queue_conf_get(uint16_t dev_id,
  *   - 0: Success, raw queue correctly set up.
  *   - <0: raw queue configuration failed
  */
-int __rte_experimental
+int
 rte_rawdev_queue_setup(uint16_t dev_id,
 		       uint16_t queue_id,
 		       rte_rawdev_obj_t queue_conf);
@@ -180,8 +180,9 @@ rte_rawdev_queue_setup(uint16_t dev_id,
  *   - 0: Success, raw queue released.
  *   - <0: raw queue configuration failed
  */
-int __rte_experimental
+int
 rte_rawdev_queue_release(uint16_t dev_id, uint16_t queue_id);
+
 /**
  * Get the number of raw queues on a specific raw device
  *
@@ -190,7 +191,7 @@ rte_rawdev_queue_release(uint16_t dev_id, uint16_t queue_id);
  * @return
  *   - The number of configured raw queues
  */
-uint16_t __rte_experimental
+uint16_t
 rte_rawdev_queue_count(uint16_t dev_id);
 
 /**
@@ -208,7 +209,7 @@ rte_rawdev_queue_count(uint16_t dev_id);
  *   - 0: Success, device started.
  *   < 0: Failure
  */
-int __rte_experimental
+int
 rte_rawdev_start(uint16_t dev_id);
 
 /**
@@ -218,7 +219,7 @@ rte_rawdev_start(uint16_t dev_id);
  * @param dev_id
  *   Raw device identifier.
  */
-void __rte_experimental
+void
 rte_rawdev_stop(uint16_t dev_id);
 
 /**
@@ -232,7 +233,7 @@ rte_rawdev_stop(uint16_t dev_id);
  *  - <0 on failure to close device
  *  - (-EAGAIN) if device is busy
  */
-int __rte_experimental
+int
 rte_rawdev_close(uint16_t dev_id);
 
 /**
@@ -246,7 +247,7 @@ rte_rawdev_close(uint16_t dev_id);
  *   0 for sucessful reset,
  *  !0 for failure in resetting
  */
-int __rte_experimental
+int
 rte_rawdev_reset(uint16_t dev_id);
 
 #define RTE_RAWDEV_NAME_MAX_LEN	(64)
@@ -316,7 +317,7 @@ struct rte_rawdev_buf {
  *   - 0: on success
  *   - <0: on failure.
  */
-int __rte_experimental
+int
 rte_rawdev_dump(uint16_t dev_id, FILE *f);
 
 /**
@@ -338,7 +339,7 @@ rte_rawdev_dump(uint16_t dev_id, FILE *f);
  *   0 for success
  *  !0 Error; attr_value remains untouched in case of error.
  */
-int __rte_experimental
+int
 rte_rawdev_get_attr(uint16_t dev_id,
 		    const char *attr_name,
 		    uint64_t *attr_value);
@@ -357,7 +358,7 @@ rte_rawdev_get_attr(uint16_t dev_id,
  *   0 for success
  *  !0 Error
  */
-int __rte_experimental
+int
 rte_rawdev_set_attr(uint16_t dev_id,
 		    const char *attr_name,
 		    const uint64_t attr_value);
@@ -383,7 +384,7 @@ rte_rawdev_set_attr(uint16_t dev_id,
  *  Whether partial enqueue is failure or success is defined between app
  *  and driver implementation.
  */
-int __rte_experimental
+int
 rte_rawdev_enqueue_buffers(uint16_t dev_id,
 			   struct rte_rawdev_buf **buffers,
 			   unsigned int count,
@@ -414,7 +415,7 @@ rte_rawdev_enqueue_buffers(uint16_t dev_id,
  *  Whether partial enqueue is failure or success is defined between app
  *  and driver implementation.
  */
-int __rte_experimental
+int
 rte_rawdev_dequeue_buffers(uint16_t dev_id,
 			   struct rte_rawdev_buf **buffers,
 			   unsigned int count,
@@ -454,7 +455,7 @@ struct rte_rawdev_xstats_name {
  *        -ENODEV for invalid *dev_id*
  *        -ENOTSUP if the device doesn't support this function.
  */
-int __rte_experimental
+int
 rte_rawdev_xstats_names_get(uint16_t dev_id,
 			    struct rte_rawdev_xstats_name *xstats_names,
 			    unsigned int size);
@@ -478,7 +479,7 @@ rte_rawdev_xstats_names_get(uint16_t dev_id,
  *        -ENODEV for invalid *dev_id*
  *        -ENOTSUP if the device doesn't support this function.
  */
-int __rte_experimental
+int
 rte_rawdev_xstats_get(uint16_t dev_id,
 		      const unsigned int ids[],
 		      uint64_t values[],
@@ -500,7 +501,7 @@ rte_rawdev_xstats_get(uint16_t dev_id,
  *   - positive value or zero: the stat value
  *   - negative value: -EINVAL if stat not found, -ENOTSUP if not supported.
  */
-uint64_t __rte_experimental
+uint64_t
 rte_rawdev_xstats_by_name_get(uint16_t dev_id,
 			      const char *name,
 			      unsigned int *id);
@@ -520,7 +521,7 @@ rte_rawdev_xstats_by_name_get(uint16_t dev_id,
  *   - zero: successfully reset the statistics to zero
  *   - negative value: -EINVAL invalid parameters, -ENOTSUP if not supported.
  */
-int __rte_experimental
+int
 rte_rawdev_xstats_reset(uint16_t dev_id,
 			const uint32_t ids[],
 			uint32_t nb_ids);
@@ -539,7 +540,7 @@ rte_rawdev_xstats_reset(uint16_t dev_id,
  *   0 for success,
  *  !0 for failure, `status_info` argument state is undefined
  */
-int __rte_experimental
+int
 rte_rawdev_firmware_status_get(uint16_t dev_id,
 			       rte_rawdev_obj_t status_info);
 
@@ -557,7 +558,7 @@ rte_rawdev_firmware_status_get(uint16_t dev_id,
  *   0 for success,
  *  !0 for failure, `version_info` argument state is undefined
  */
-int __rte_experimental
+int
 rte_rawdev_firmware_version_get(uint16_t dev_id,
 				rte_rawdev_obj_t version_info);
 
@@ -574,7 +575,7 @@ rte_rawdev_firmware_version_get(uint16_t dev_id,
  *   0 for successful load
  *  !0 for failure to load the provided image, or image incorrect.
  */
-int __rte_experimental
+int
 rte_rawdev_firmware_load(uint16_t dev_id, rte_rawdev_obj_t firmware_image);
 
 /**
@@ -586,7 +587,7 @@ rte_rawdev_firmware_load(uint16_t dev_id, rte_rawdev_obj_t firmware_image);
  *   0 for successful Unload
  *  !0 for failure in unloading
  */
-int __rte_experimental
+int
 rte_rawdev_firmware_unload(uint16_t dev_id);
 
 /**
@@ -599,7 +600,7 @@ rte_rawdev_firmware_unload(uint16_t dev_id);
  *   - -ENOTSUP if the device doesn't support selftest
  *   - other values < 0 on failure.
  */
-int __rte_experimental
+int
 rte_rawdev_selftest(uint16_t dev_id);
 
 #ifdef __cplusplus
diff --git a/lib/librte_rawdev/rte_rawdev_pmd.h b/lib/librte_rawdev/rte_rawdev_pmd.h
index 408adf0f..bb9bbc35 100644
--- a/lib/librte_rawdev/rte_rawdev_pmd.h
+++ b/lib/librte_rawdev/rte_rawdev_pmd.h
@@ -251,6 +251,24 @@ typedef int (*rawdev_queue_release_t)(struct rte_rawdev *dev,
 				      uint16_t queue_id);
 
 /**
+ * Get the count of number of queues configured on this device.
+ *
+ * Another way to fetch this information is to fetch the device configuration.
+ * But, that assumes that the device configuration managed by the driver has
+ * that kind of information.
+ *
+ * This function helps in getting queue count supported, independently. It
+ * can help in cases where iterator needs to be implemented.
+ *
+ * @param
+ *   Raw device pointer
+ * @return
+ *   Number of queues; 0 is assumed to be a valid response.
+ *
+ */
+typedef uint16_t (*rawdev_queue_count_t)(struct rte_rawdev *dev);
+
+/**
  * Enqueue an array of raw buffers to the device.
  *
  * Buffer being used is opaque - it can be obtained from mempool or from
@@ -506,6 +524,8 @@ struct rte_rawdev_ops {
 	rawdev_queue_setup_t queue_setup;
 	/**< Release an raw queue. */
 	rawdev_queue_release_t queue_release;
+	/**< Get the number of queues attached to the device */
+	rawdev_queue_count_t queue_count;
 
 	/**< Enqueue an array of raw buffers to device. */
 	rawdev_enqueue_bufs_t enqueue_bufs;
@@ -556,7 +576,7 @@ struct rte_rawdev_ops {
  * @return
  *   - Slot in the rte_dev_devices array for a new device;
  */
-struct rte_rawdev * __rte_experimental
+struct rte_rawdev *
 rte_rawdev_pmd_allocate(const char *name, size_t dev_private_size,
 			int socket_id);
 
@@ -568,7 +588,7 @@ rte_rawdev_pmd_allocate(const char *name, size_t dev_private_size,
  * @return
  *   - 0 on success, negative on error
  */
-int __rte_experimental
+int
 rte_rawdev_pmd_release(struct rte_rawdev *rawdev);
 
 /**
@@ -585,7 +605,7 @@ rte_rawdev_pmd_release(struct rte_rawdev *rawdev);
  *   - Raw device pointer if device is successfully created.
  *   - NULL if device cannot be created.
  */
-struct rte_rawdev * __rte_experimental
+struct rte_rawdev *
 rte_rawdev_pmd_init(const char *name, size_t dev_private_size,
 		    int socket_id);
 
@@ -597,7 +617,7 @@ rte_rawdev_pmd_init(const char *name, size_t dev_private_size,
  * @return
  *   - 0 on success, negative on error
  */
-int __rte_experimental
+int
 rte_rawdev_pmd_uninit(const char *name);
 
 #ifdef __cplusplus
diff --git a/lib/librte_rawdev/rte_rawdev_version.map b/lib/librte_rawdev/rte_rawdev_version.map
index af4465e2..b61dbff1 100644
--- a/lib/librte_rawdev/rte_rawdev_version.map
+++ b/lib/librte_rawdev/rte_rawdev_version.map
@@ -1,4 +1,4 @@
-EXPERIMENTAL {
+DPDK_18.08 {
 	global:
 
 	rte_rawdev_close;
@@ -16,6 +16,7 @@ EXPERIMENTAL {
 	rte_rawdev_pmd_allocate;
 	rte_rawdev_pmd_release;
 	rte_rawdev_queue_conf_get;
+	rte_rawdev_queue_count;
 	rte_rawdev_queue_setup;
 	rte_rawdev_queue_release;
 	rte_rawdev_reset;
diff --git a/lib/librte_ring/rte_ring.h b/lib/librte_ring/rte_ring.h
index 12458225..7a731d07 100644
--- a/lib/librte_ring/rte_ring.h
+++ b/lib/librte_ring/rte_ring.h
@@ -26,8 +26,9 @@
  * - Bulk dequeue.
  * - Bulk enqueue.
  *
- * Note: the ring implementation is not preemptable. A lcore must not
- * be interrupted by another task that uses the same ring.
+ * Note: the ring implementation is not preemptible. Refer to Programmer's
+ * guide/Environment Abstraction Layer/Multiple pthread/Known Issues/rte_ring
+ * for more information.
  *
  */
 
@@ -382,7 +383,7 @@ __rte_ring_do_dequeue(struct rte_ring *r, void **obj_table,
 	uint32_t cons_head, cons_next;
 	uint32_t entries;
 
-	n = __rte_ring_move_cons_head(r, is_sc, n, behavior,
+	n = __rte_ring_move_cons_head(r, (int)is_sc, n, behavior,
 			&cons_head, &cons_next, &entries);
 	if (n == 0)
 		goto end;
diff --git a/lib/librte_ring/rte_ring_c11_mem.h b/lib/librte_ring/rte_ring_c11_mem.h
index cb3f82b1..94df3c4a 100644
--- a/lib/librte_ring/rte_ring_c11_mem.h
+++ b/lib/librte_ring/rte_ring_c11_mem.h
@@ -66,14 +66,14 @@ __rte_ring_move_prod_head(struct rte_ring *r, unsigned int is_sp,
 
 		*old_head = __atomic_load_n(&r->prod.head,
 					__ATOMIC_ACQUIRE);
-		const uint32_t cons_tail = r->cons.tail;
+
 		/*
 		 *  The subtraction is done between two unsigned 32bits value
 		 * (the result is always modulo 32 bits even if we have
 		 * *old_head > cons_tail). So 'free_entries' is always between 0
 		 * and capacity (which is < size).
 		 */
-		*free_entries = (capacity + cons_tail - *old_head);
+		*free_entries = (capacity + r->cons.tail - *old_head);
 
 		/* check that we have enough room in ring */
 		if (unlikely(n > *free_entries))
@@ -133,13 +133,13 @@ __rte_ring_move_cons_head(struct rte_ring *r, int is_sc,
 		n = max;
 		*old_head = __atomic_load_n(&r->cons.head,
 					__ATOMIC_ACQUIRE);
-		const uint32_t prod_tail = r->prod.tail;
+
 		/* The subtraction is done between two unsigned 32bits value
 		 * (the result is always modulo 32 bits even if we have
 		 * cons_head > prod_tail). So 'entries' is always between 0
 		 * and size(ring)-1.
 		 */
-		*entries = (prod_tail - *old_head);
+		*entries = (r->prod.tail - *old_head);
 
 		/* Set the actual entries for dequeue */
 		if (n > *entries)
diff --git a/lib/librte_security/rte_security.c b/lib/librte_security/rte_security.c
index 1e559c99..1954960a 100644
--- a/lib/librte_security/rte_security.c
+++ b/lib/librte_security/rte_security.c
@@ -1,34 +1,6 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright 2017 NXP.
- *   Copyright(c) 2017 Intel Corporation. All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of NXP nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2017 NXP.
+ * Copyright(c) 2017 Intel Corporation.
  */
 
 #include <rte_malloc.h>
@@ -91,7 +63,6 @@ rte_security_session_destroy(struct rte_security_ctx *instance,
 			     struct rte_security_session *sess)
 {
 	int ret;
-	struct rte_mempool *mp = rte_mempool_from_obj(sess);
 
 	RTE_FUNC_PTR_OR_ERR_RET(*instance->ops->session_destroy, -ENOTSUP);
 
@@ -100,7 +71,7 @@ rte_security_session_destroy(struct rte_security_ctx *instance,
 
 	ret = instance->ops->session_destroy(instance->device, sess);
 	if (!ret)
-		rte_mempool_put(mp, (void *)sess);
+		rte_mempool_put(rte_mempool_from_obj(sess), (void *)sess);
 
 	return ret;
 }
diff --git a/lib/librte_security/rte_security.h b/lib/librte_security/rte_security.h
index afa2861f..b0d1b97e 100644
--- a/lib/librte_security/rte_security.h
+++ b/lib/librte_security/rte_security.h
@@ -1,34 +1,6 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright 2017 NXP.
- *   Copyright(c) 2017 Intel Corporation. All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of NXP nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2017 NXP.
+ * Copyright(c) 2017 Intel Corporation.
  */
 
 #ifndef _RTE_SECURITY_H_
diff --git a/lib/librte_security/rte_security_driver.h b/lib/librte_security/rte_security_driver.h
index 0583f889..42f42ffe 100644
--- a/lib/librte_security/rte_security_driver.h
+++ b/lib/librte_security/rte_security_driver.h
@@ -1,34 +1,6 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) 2017 Intel Corporation. All rights reserved.
- *   Copyright 2017 NXP.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2017 NXP.
+ * Copyright(c) 2017 Intel Corporation.
  */
 
 #ifndef _RTE_SECURITY_DRIVER_H_
diff --git a/lib/librte_vhost/iotlb.c b/lib/librte_vhost/iotlb.c
index c11ebcaa..c6354fef 100644
--- a/lib/librte_vhost/iotlb.c
+++ b/lib/librte_vhost/iotlb.c
@@ -303,6 +303,13 @@ out:
 	return vva;
 }
 
+void
+vhost_user_iotlb_flush_all(struct vhost_virtqueue *vq)
+{
+	vhost_user_iotlb_cache_remove_all(vq);
+	vhost_user_iotlb_pending_remove_all(vq);
+}
+
 int
 vhost_user_iotlb_init(struct virtio_net *dev, int vq_index)
 {
@@ -315,8 +322,7 @@ vhost_user_iotlb_init(struct virtio_net *dev, int vq_index)
 		 * The cache has already been initialized,
 		 * just drop all cached and pending entries.
 		 */
-		vhost_user_iotlb_cache_remove_all(vq);
-		vhost_user_iotlb_pending_remove_all(vq);
+		vhost_user_iotlb_flush_all(vq);
 	}
 
 #ifdef RTE_LIBRTE_VHOST_NUMA
diff --git a/lib/librte_vhost/iotlb.h b/lib/librte_vhost/iotlb.h
index e7083e37..60b9e4c5 100644
--- a/lib/librte_vhost/iotlb.h
+++ b/lib/librte_vhost/iotlb.h
@@ -73,7 +73,7 @@ void vhost_user_iotlb_pending_insert(struct vhost_virtqueue *vq, uint64_t iova,
 						uint8_t perm);
 void vhost_user_iotlb_pending_remove(struct vhost_virtqueue *vq, uint64_t iova,
 						uint64_t size, uint8_t perm);
-
+void vhost_user_iotlb_flush_all(struct vhost_virtqueue *vq);
 int vhost_user_iotlb_init(struct virtio_net *dev, int vq_index);
 
 #endif /* _VHOST_IOTLB_H_ */
diff --git a/lib/librte_vhost/rte_vhost.h b/lib/librte_vhost/rte_vhost.h
index 7f0cb9bc..b02673d4 100644
--- a/lib/librte_vhost/rte_vhost.h
+++ b/lib/librte_vhost/rte_vhost.h
@@ -58,6 +58,14 @@ extern "C" {
 #define VHOST_USER_PROTOCOL_F_CRYPTO_SESSION 7
 #endif
 
+#ifndef VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD
+#define VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD 10
+#endif
+
+#ifndef VHOST_USER_PROTOCOL_F_HOST_NOTIFIER
+#define VHOST_USER_PROTOCOL_F_HOST_NOTIFIER 11
+#endif
+
 /** Indicate whether protocol features negotiation is supported. */
 #ifndef VHOST_USER_F_PROTOCOL_FEATURES
 #define VHOST_USER_F_PROTOCOL_FEATURES	30
diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c
index 0399c37b..d6303174 100644
--- a/lib/librte_vhost/socket.c
+++ b/lib/librte_vhost/socket.c
@@ -853,6 +853,12 @@ rte_vhost_driver_register(const char *path, uint64_t flags)
 	vsocket->supported_features = VIRTIO_NET_SUPPORTED_FEATURES;
 	vsocket->features           = VIRTIO_NET_SUPPORTED_FEATURES;
 
+	/* Dequeue zero copy can't assure descriptors returned in order */
+	if (vsocket->dequeue_zero_copy) {
+		vsocket->supported_features &= ~(1ULL << VIRTIO_F_IN_ORDER);
+		vsocket->features &= ~(1ULL << VIRTIO_F_IN_ORDER);
+	}
+
 	if (!(flags & RTE_VHOST_USER_IOMMU_SUPPORT)) {
 		vsocket->supported_features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
 		vsocket->features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index afded495..3c9be10a 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -93,9 +93,12 @@ cleanup_device(struct virtio_net *dev, int destroy)
 }
 
 void
-free_vq(struct vhost_virtqueue *vq)
+free_vq(struct virtio_net *dev, struct vhost_virtqueue *vq)
 {
-	rte_free(vq->shadow_used_ring);
+	if (vq_is_packed(dev))
+		rte_free(vq->shadow_used_packed);
+	else
+		rte_free(vq->shadow_used_split);
 	rte_free(vq->batch_copy_elems);
 	rte_mempool_free(vq->iotlb_pool);
 	rte_free(vq);
@@ -110,19 +113,16 @@ free_device(struct virtio_net *dev)
 	uint32_t i;
 
 	for (i = 0; i < dev->nr_vring; i++)
-		free_vq(dev->virtqueue[i]);
+		free_vq(dev, dev->virtqueue[i]);
 
 	rte_free(dev);
 }
 
-int
-vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
+static int
+vring_translate_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
 {
 	uint64_t req_size, size;
 
-	if (!(dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)))
-		goto out;
-
 	req_size = sizeof(struct vring_desc) * vq->size;
 	size = req_size;
 	vq->desc = (struct vring_desc *)(uintptr_t)vhost_iova_to_vva(dev, vq,
@@ -153,6 +153,55 @@ vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
 	if (!vq->used || size != req_size)
 		return -1;
 
+	return 0;
+}
+
+static int
+vring_translate_packed(struct virtio_net *dev, struct vhost_virtqueue *vq)
+{
+	uint64_t req_size, size;
+
+	req_size = sizeof(struct vring_packed_desc) * vq->size;
+	size = req_size;
+	vq->desc_packed = (struct vring_packed_desc *)(uintptr_t)
+		vhost_iova_to_vva(dev, vq, vq->ring_addrs.desc_user_addr,
+				&size, VHOST_ACCESS_RW);
+	if (!vq->desc_packed || size != req_size)
+		return -1;
+
+	req_size = sizeof(struct vring_packed_desc_event);
+	size = req_size;
+	vq->driver_event = (struct vring_packed_desc_event *)(uintptr_t)
+		vhost_iova_to_vva(dev, vq, vq->ring_addrs.avail_user_addr,
+				&size, VHOST_ACCESS_RW);
+	if (!vq->driver_event || size != req_size)
+		return -1;
+
+	req_size = sizeof(struct vring_packed_desc_event);
+	size = req_size;
+	vq->device_event = (struct vring_packed_desc_event *)(uintptr_t)
+		vhost_iova_to_vva(dev, vq, vq->ring_addrs.used_user_addr,
+				&size, VHOST_ACCESS_RW);
+	if (!vq->device_event || size != req_size)
+		return -1;
+
+	return 0;
+}
+
+int
+vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
+{
+
+	if (!(dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)))
+		goto out;
+
+	if (vq_is_packed(dev)) {
+		if (vring_translate_packed(dev, vq) < 0)
+			return -1;
+	} else {
+		if (vring_translate_split(dev, vq) < 0)
+			return -1;
+	}
 out:
 	vq->access_ok = 1;
 
@@ -234,6 +283,9 @@ alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
 	dev->virtqueue[vring_idx] = vq;
 	init_vring_queue(dev, vring_idx);
 	rte_spinlock_init(&vq->access_lock);
+	vq->avail_wrap_counter = 1;
+	vq->used_wrap_counter = 1;
+	vq->signalled_used_valid = false;
 
 	dev->nr_vring += 1;
 
@@ -268,21 +320,21 @@ vhost_new_device(void)
 	struct virtio_net *dev;
 	int i;
 
-	dev = rte_zmalloc(NULL, sizeof(struct virtio_net), 0);
-	if (dev == NULL) {
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"Failed to allocate memory for new dev.\n");
-		return -1;
-	}
-
 	for (i = 0; i < MAX_VHOST_DEVICE; i++) {
 		if (vhost_devices[i] == NULL)
 			break;
 	}
+
 	if (i == MAX_VHOST_DEVICE) {
 		RTE_LOG(ERR, VHOST_CONFIG,
 			"Failed to find a free slot for new device.\n");
-		rte_free(dev);
+		return -1;
+	}
+
+	dev = rte_zmalloc(NULL, sizeof(struct virtio_net), 0);
+	if (dev == NULL) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Failed to allocate memory for new dev.\n");
 		return -1;
 	}
 
@@ -291,10 +343,27 @@ vhost_new_device(void)
 	dev->flags = VIRTIO_DEV_BUILTIN_VIRTIO_NET;
 	dev->slave_req_fd = -1;
 	dev->vdpa_dev_id = -1;
+	rte_spinlock_init(&dev->slave_req_lock);
 
 	return i;
 }
 
+void
+vhost_destroy_device_notify(struct virtio_net *dev)
+{
+	struct rte_vdpa_device *vdpa_dev;
+	int did;
+
+	if (dev->flags & VIRTIO_DEV_RUNNING) {
+		did = dev->vdpa_dev_id;
+		vdpa_dev = rte_vdpa_get_device(did);
+		if (vdpa_dev && vdpa_dev->ops->dev_close)
+			vdpa_dev->ops->dev_close(dev->vid);
+		dev->flags &= ~VIRTIO_DEV_RUNNING;
+		dev->notify_ops->destroy_device(dev->vid);
+	}
+}
+
 /*
  * Invoked when there is the vhost-user connection is broken (when
  * the virtio device is being detached).
@@ -303,20 +372,11 @@ void
 vhost_destroy_device(int vid)
 {
 	struct virtio_net *dev = get_device(vid);
-	struct rte_vdpa_device *vdpa_dev;
-	int did = -1;
 
 	if (dev == NULL)
 		return;
 
-	if (dev->flags & VIRTIO_DEV_RUNNING) {
-		did = dev->vdpa_dev_id;
-		vdpa_dev = rte_vdpa_get_device(did);
-		if (vdpa_dev && vdpa_dev->ops->dev_close)
-			vdpa_dev->ops->dev_close(dev->vid);
-		dev->flags &= ~VIRTIO_DEV_RUNNING;
-		dev->notify_ops->destroy_device(vid);
-	}
+	vhost_destroy_device_notify(dev);
 
 	cleanup_device(dev, 1);
 	free_device(dev);
@@ -346,6 +406,8 @@ vhost_detach_vdpa_device(int vid)
 	if (dev == NULL)
 		return;
 
+	vhost_user_host_notifier_ctrl(vid, false);
+
 	dev->vdpa_dev_id = -1;
 }
 
@@ -558,7 +620,11 @@ rte_vhost_vring_call(int vid, uint16_t vring_idx)
 	if (!vq)
 		return -1;
 
-	vhost_vring_call(dev, vq);
+	if (vq_is_packed(dev))
+		vhost_vring_call_packed(dev, vq);
+	else
+		vhost_vring_call_split(dev, vq);
+
 	return 0;
 }
 
@@ -579,19 +645,52 @@ rte_vhost_avail_entries(int vid, uint16_t queue_id)
 	return *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx;
 }
 
+static inline void
+vhost_enable_notify_split(struct vhost_virtqueue *vq, int enable)
+{
+	if (enable)
+		vq->used->flags &= ~VRING_USED_F_NO_NOTIFY;
+	else
+		vq->used->flags |= VRING_USED_F_NO_NOTIFY;
+}
+
+static inline void
+vhost_enable_notify_packed(struct virtio_net *dev,
+		struct vhost_virtqueue *vq, int enable)
+{
+	uint16_t flags;
+
+	if (!enable)
+		vq->device_event->flags = VRING_EVENT_F_DISABLE;
+
+	flags = VRING_EVENT_F_ENABLE;
+	if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) {
+		flags = VRING_EVENT_F_DESC;
+		vq->device_event->off_wrap = vq->last_avail_idx |
+			vq->avail_wrap_counter << 15;
+	}
+
+	rte_smp_wmb();
+
+	vq->device_event->flags = flags;
+}
+
 int
 rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable)
 {
 	struct virtio_net *dev = get_device(vid);
+	struct vhost_virtqueue *vq;
 
 	if (!dev)
 		return -1;
 
-	if (enable)
-		dev->virtqueue[queue_id]->used->flags &=
-			~VRING_USED_F_NO_NOTIFY;
+	vq = dev->virtqueue[queue_id];
+
+	if (vq_is_packed(dev))
+		vhost_enable_notify_packed(dev, vq, enable);
 	else
-		dev->virtqueue[queue_id]->used->flags |= VRING_USED_F_NO_NOTIFY;
+		vhost_enable_notify_split(vq, enable);
+
 	return 0;
 }
 
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 58c425a5..760a09c0 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -43,6 +43,7 @@
  * from vring to do scatter RX.
  */
 struct buf_vector {
+	uint64_t buf_iova;
 	uint64_t buf_addr;
 	uint32_t buf_len;
 	uint32_t desc_idx;
@@ -55,6 +56,7 @@ struct buf_vector {
 struct zcopy_mbuf {
 	struct rte_mbuf *mbuf;
 	uint32_t desc_idx;
+	uint16_t desc_count;
 	uint16_t in_use;
 
 	TAILQ_ENTRY(zcopy_mbuf) next;
@@ -79,19 +81,35 @@ struct log_cache_entry {
 	unsigned long val;
 };
 
+struct vring_used_elem_packed {
+	uint16_t id;
+	uint32_t len;
+	uint32_t count;
+};
+
 /**
  * Structure contains variables relevant to RX/TX virtqueues.
  */
 struct vhost_virtqueue {
-	struct vring_desc	*desc;
-	struct vring_avail	*avail;
-	struct vring_used	*used;
+	union {
+		struct vring_desc	*desc;
+		struct vring_packed_desc   *desc_packed;
+	};
+	union {
+		struct vring_avail	*avail;
+		struct vring_packed_desc_event *driver_event;
+	};
+	union {
+		struct vring_used	*used;
+		struct vring_packed_desc_event *device_event;
+	};
 	uint32_t		size;
 
 	uint16_t		last_avail_idx;
 	uint16_t		last_used_idx;
 	/* Last used index we notify to front end. */
 	uint16_t		signalled_used;
+	bool			signalled_used_valid;
 #define VIRTIO_INVALID_EVENTFD		(-1)
 #define VIRTIO_UNINITIALIZED_EVENTFD	(-2)
 
@@ -115,12 +133,17 @@ struct vhost_virtqueue {
 	struct zcopy_mbuf	*zmbufs;
 	struct zcopy_mbuf_list	zmbuf_list;
 
-	struct vring_used_elem  *shadow_used_ring;
+	union {
+		struct vring_used_elem  *shadow_used_split;
+		struct vring_used_elem_packed *shadow_used_packed;
+	};
 	uint16_t                shadow_used_idx;
 	struct vhost_vring_addr ring_addrs;
 
 	struct batch_copy_elem	*batch_copy_elems;
 	uint16_t		batch_copy_nb_elems;
+	bool			used_wrap_counter;
+	bool			avail_wrap_counter;
 
 	struct log_cache_entry log_cache[VHOST_LOG_CACHE_NR];
 	uint16_t log_cache_nb_elem;
@@ -191,6 +214,42 @@ struct vhost_msg {
  #define VIRTIO_F_VERSION_1 32
 #endif
 
+/* Declare packed ring related bits for older kernels */
+#ifndef VIRTIO_F_RING_PACKED
+
+#define VIRTIO_F_RING_PACKED 34
+
+#define VRING_DESC_F_NEXT	1
+#define VRING_DESC_F_WRITE	2
+#define VRING_DESC_F_INDIRECT	4
+
+#define VRING_DESC_F_AVAIL	(1ULL << 7)
+#define VRING_DESC_F_USED	(1ULL << 15)
+
+struct vring_packed_desc {
+	uint64_t addr;
+	uint32_t len;
+	uint16_t id;
+	uint16_t flags;
+};
+
+#define VRING_EVENT_F_ENABLE 0x0
+#define VRING_EVENT_F_DISABLE 0x1
+#define VRING_EVENT_F_DESC 0x2
+
+struct vring_packed_desc_event {
+	uint16_t off_wrap;
+	uint16_t flags;
+};
+#endif
+
+/*
+ * Available and used descs are in same order
+ */
+#ifndef VIRTIO_F_IN_ORDER
+#define VIRTIO_F_IN_ORDER      35
+#endif
+
 /* Features supported by this builtin vhost-user net driver. */
 #define VIRTIO_NET_SUPPORTED_FEATURES ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | \
 				(1ULL << VIRTIO_F_ANY_LAYOUT) | \
@@ -214,7 +273,8 @@ struct vhost_msg {
 				(1ULL << VIRTIO_NET_F_GUEST_ECN) | \
 				(1ULL << VIRTIO_RING_F_INDIRECT_DESC) | \
 				(1ULL << VIRTIO_RING_F_EVENT_IDX) | \
-				(1ULL << VIRTIO_NET_F_MTU) | \
+				(1ULL << VIRTIO_NET_F_MTU)  | \
+				(1ULL << VIRTIO_F_IN_ORDER) | \
 				(1ULL << VIRTIO_F_IOMMU_PLATFORM))
 
 
@@ -301,6 +361,7 @@ struct virtio_net {
 	struct guest_page       *guest_pages;
 
 	int			slave_req_fd;
+	rte_spinlock_t		slave_req_lock;
 
 	/*
 	 * Device id to identify a specific backend device.
@@ -314,6 +375,19 @@ struct virtio_net {
 	struct vhost_user_extern_ops extern_ops;
 } __rte_cache_aligned;
 
+static __rte_always_inline bool
+vq_is_packed(struct virtio_net *dev)
+{
+	return dev->features & (1ull << VIRTIO_F_RING_PACKED);
+}
+
+static inline bool
+desc_is_avail(struct vring_packed_desc *desc, bool wrap_counter)
+{
+	return wrap_counter == !!(desc->flags & VRING_DESC_F_AVAIL) &&
+		wrap_counter != !!(desc->flags & VRING_DESC_F_USED);
+}
+
 #define VHOST_LOG_PAGE	4096
 
 /*
@@ -428,6 +502,7 @@ vhost_log_cache_page(struct virtio_net *dev, struct vhost_virtqueue *vq,
 
 	vq->log_cache[i].offset = offset;
 	vq->log_cache[i].val = (1UL << bit_nr);
+	vq->log_cache_nb_elem++;
 }
 
 static __rte_always_inline void
@@ -535,9 +610,10 @@ int vhost_new_device(void);
 void cleanup_device(struct virtio_net *dev, int destroy);
 void reset_device(struct virtio_net *dev);
 void vhost_destroy_device(int);
+void vhost_destroy_device_notify(struct virtio_net *dev);
 
 void cleanup_vq(struct vhost_virtqueue *vq, int destroy);
-void free_vq(struct vhost_virtqueue *vq);
+void free_vq(struct virtio_net *dev, struct vhost_virtqueue *vq);
 
 int alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx);
 
@@ -588,10 +664,10 @@ vhost_need_event(uint16_t event_idx, uint16_t new_idx, uint16_t old)
 }
 
 static __rte_always_inline void
-vhost_vring_call(struct virtio_net *dev, struct vhost_virtqueue *vq)
+vhost_vring_call_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
 {
 	/* Flush used->idx update before we read avail->flags. */
-	rte_mb();
+	rte_smp_mb();
 
 	/* Don't kick guest if we don't reach index specified by guest. */
 	if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) {
@@ -615,4 +691,55 @@ vhost_vring_call(struct virtio_net *dev, struct vhost_virtqueue *vq)
 	}
 }
 
+static __rte_always_inline void
+vhost_vring_call_packed(struct virtio_net *dev, struct vhost_virtqueue *vq)
+{
+	uint16_t old, new, off, off_wrap;
+	bool signalled_used_valid, kick = false;
+
+	/* Flush used desc update. */
+	rte_smp_mb();
+
+	if (!(dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))) {
+		if (vq->driver_event->flags !=
+				VRING_EVENT_F_DISABLE)
+			kick = true;
+		goto kick;
+	}
+
+	old = vq->signalled_used;
+	new = vq->last_used_idx;
+	vq->signalled_used = new;
+	signalled_used_valid = vq->signalled_used_valid;
+	vq->signalled_used_valid = true;
+
+	if (vq->driver_event->flags != VRING_EVENT_F_DESC) {
+		if (vq->driver_event->flags != VRING_EVENT_F_DISABLE)
+			kick = true;
+		goto kick;
+	}
+
+	if (unlikely(!signalled_used_valid)) {
+		kick = true;
+		goto kick;
+	}
+
+	rte_smp_rmb();
+
+	off_wrap = vq->driver_event->off_wrap;
+	off = off_wrap & ~(1 << 15);
+
+	if (new <= old)
+		old -= vq->size;
+
+	if (vq->used_wrap_counter != off_wrap >> 15)
+		off -= vq->size;
+
+	if (vhost_need_event(off, new, old))
+		kick = true;
+kick:
+	if (kick)
+		eventfd_write(vq->callfd, (eventfd_t)1);
+}
+
 #endif /* _VHOST_NET_CDEV_H_ */
diff --git a/lib/librte_vhost/vhost_crypto.c b/lib/librte_vhost/vhost_crypto.c
index f1650738..57341ef8 100644
--- a/lib/librte_vhost/vhost_crypto.c
+++ b/lib/librte_vhost/vhost_crypto.c
@@ -940,8 +940,7 @@ vhost_crypto_process_one_req(struct vhost_crypto *vcrypto,
 		struct vhost_virtqueue *vq, struct rte_crypto_op *op,
 		struct vring_desc *head, uint16_t desc_idx)
 {
-	struct vhost_crypto_data_req *vc_req = RTE_PTR_ADD(op->sym->m_src,
-			sizeof(struct rte_mbuf));
+	struct vhost_crypto_data_req *vc_req = rte_mbuf_to_priv(op->sym->m_src);
 	struct rte_cryptodev_sym_session *session;
 	struct virtio_crypto_op_data_req *req, tmp_req;
 	struct virtio_crypto_inhdr *inhdr;
@@ -1062,8 +1061,7 @@ vhost_crypto_finalize_one_request(struct rte_crypto_op *op,
 {
 	struct rte_mbuf *m_src = op->sym->m_src;
 	struct rte_mbuf *m_dst = op->sym->m_dst;
-	struct vhost_crypto_data_req *vc_req = RTE_PTR_ADD(m_src,
-			sizeof(struct rte_mbuf));
+	struct vhost_crypto_data_req *vc_req = rte_mbuf_to_priv(m_src);
 	uint16_t desc_idx;
 	int ret = 0;
 
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index 947290fc..a2d4c9ff 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -135,17 +135,7 @@ vhost_user_set_owner(void)
 static int
 vhost_user_reset_owner(struct virtio_net *dev)
 {
-	struct rte_vdpa_device *vdpa_dev;
-	int did = -1;
-
-	if (dev->flags & VIRTIO_DEV_RUNNING) {
-		did = dev->vdpa_dev_id;
-		vdpa_dev = rte_vdpa_get_device(did);
-		if (vdpa_dev && vdpa_dev->ops->dev_close)
-			vdpa_dev->ops->dev_close(dev->vid);
-		dev->flags &= ~VIRTIO_DEV_RUNNING;
-		dev->notify_ops->destroy_device(dev->vid);
-	}
+	vhost_destroy_device_notify(dev);
 
 	cleanup_device(dev, 0);
 	reset_device(dev);
@@ -243,7 +233,7 @@ vhost_user_set_features(struct virtio_net *dev, uint64_t features)
 
 			dev->virtqueue[dev->nr_vring] = NULL;
 			cleanup_vq(vq, 1);
-			free_vq(vq);
+			free_vq(dev, vq);
 		}
 	}
 
@@ -292,13 +282,26 @@ vhost_user_set_vring_num(struct virtio_net *dev,
 		TAILQ_INIT(&vq->zmbuf_list);
 	}
 
-	vq->shadow_used_ring = rte_malloc(NULL,
+	if (vq_is_packed(dev)) {
+		vq->shadow_used_packed = rte_malloc(NULL,
+				vq->size *
+				sizeof(struct vring_used_elem_packed),
+				RTE_CACHE_LINE_SIZE);
+		if (!vq->shadow_used_packed) {
+			RTE_LOG(ERR, VHOST_CONFIG,
+					"failed to allocate memory for shadow used ring.\n");
+			return -1;
+		}
+
+	} else {
+		vq->shadow_used_split = rte_malloc(NULL,
 				vq->size * sizeof(struct vring_used_elem),
 				RTE_CACHE_LINE_SIZE);
-	if (!vq->shadow_used_ring) {
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"failed to allocate memory for shadow used ring.\n");
-		return -1;
+		if (!vq->shadow_used_split) {
+			RTE_LOG(ERR, VHOST_CONFIG,
+					"failed to allocate memory for shadow used ring.\n");
+			return -1;
+		}
 	}
 
 	vq->batch_copy_elems = rte_malloc(NULL,
@@ -325,7 +328,8 @@ numa_realloc(struct virtio_net *dev, int index)
 	struct virtio_net *old_dev;
 	struct vhost_virtqueue *old_vq, *vq;
 	struct zcopy_mbuf *new_zmbuf;
-	struct vring_used_elem *new_shadow_used_ring;
+	struct vring_used_elem *new_shadow_used_split;
+	struct vring_used_elem_packed *new_shadow_used_packed;
 	struct batch_copy_elem *new_batch_copy_elems;
 	int ret;
 
@@ -360,13 +364,26 @@ numa_realloc(struct virtio_net *dev, int index)
 			vq->zmbufs = new_zmbuf;
 		}
 
-		new_shadow_used_ring = rte_malloc_socket(NULL,
-			vq->size * sizeof(struct vring_used_elem),
-			RTE_CACHE_LINE_SIZE,
-			newnode);
-		if (new_shadow_used_ring) {
-			rte_free(vq->shadow_used_ring);
-			vq->shadow_used_ring = new_shadow_used_ring;
+		if (vq_is_packed(dev)) {
+			new_shadow_used_packed = rte_malloc_socket(NULL,
+					vq->size *
+					sizeof(struct vring_used_elem_packed),
+					RTE_CACHE_LINE_SIZE,
+					newnode);
+			if (new_shadow_used_packed) {
+				rte_free(vq->shadow_used_packed);
+				vq->shadow_used_packed = new_shadow_used_packed;
+			}
+		} else {
+			new_shadow_used_split = rte_malloc_socket(NULL,
+					vq->size *
+					sizeof(struct vring_used_elem),
+					RTE_CACHE_LINE_SIZE,
+					newnode);
+			if (new_shadow_used_split) {
+				rte_free(vq->shadow_used_split);
+				vq->shadow_used_split = new_shadow_used_split;
+			}
 		}
 
 		new_batch_copy_elems = rte_malloc_socket(NULL,
@@ -477,6 +494,51 @@ translate_ring_addresses(struct virtio_net *dev, int vq_index)
 	struct vhost_vring_addr *addr = &vq->ring_addrs;
 	uint64_t len;
 
+	if (vq_is_packed(dev)) {
+		len = sizeof(struct vring_packed_desc) * vq->size;
+		vq->desc_packed = (struct vring_packed_desc *)(uintptr_t)
+			ring_addr_to_vva(dev, vq, addr->desc_user_addr, &len);
+		vq->log_guest_addr = 0;
+		if (vq->desc_packed == NULL ||
+				len != sizeof(struct vring_packed_desc) *
+				vq->size) {
+			RTE_LOG(DEBUG, VHOST_CONFIG,
+				"(%d) failed to map desc_packed ring.\n",
+				dev->vid);
+			return dev;
+		}
+
+		dev = numa_realloc(dev, vq_index);
+		vq = dev->virtqueue[vq_index];
+		addr = &vq->ring_addrs;
+
+		len = sizeof(struct vring_packed_desc_event);
+		vq->driver_event = (struct vring_packed_desc_event *)
+					(uintptr_t)ring_addr_to_vva(dev,
+					vq, addr->avail_user_addr, &len);
+		if (vq->driver_event == NULL ||
+				len != sizeof(struct vring_packed_desc_event)) {
+			RTE_LOG(DEBUG, VHOST_CONFIG,
+				"(%d) failed to find driver area address.\n",
+				dev->vid);
+			return dev;
+		}
+
+		len = sizeof(struct vring_packed_desc_event);
+		vq->device_event = (struct vring_packed_desc_event *)
+					(uintptr_t)ring_addr_to_vva(dev,
+					vq, addr->used_user_addr, &len);
+		if (vq->device_event == NULL ||
+				len != sizeof(struct vring_packed_desc_event)) {
+			RTE_LOG(DEBUG, VHOST_CONFIG,
+				"(%d) failed to find device area address.\n",
+				dev->vid);
+			return dev;
+		}
+
+		return dev;
+	}
+
 	/* The addresses are converted from QEMU virtual to Vhost virtual. */
 	if (vq->desc && vq->avail && vq->used)
 		return dev;
@@ -751,6 +813,11 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *pmsg)
 		dev->mem = NULL;
 	}
 
+	/* Flush IOTLB cache as previous HVAs are now invalid */
+	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+		for (i = 0; i < dev->nr_vring; i++)
+			vhost_user_iotlb_flush_all(dev->virtqueue[i]);
+
 	dev->nr_guest_pages = 0;
 	if (!dev->guest_pages) {
 		dev->max_guest_pages = 8;
@@ -885,10 +952,20 @@ err_mmap:
 	return -1;
 }
 
-static int
-vq_is_ready(struct vhost_virtqueue *vq)
+static bool
+vq_is_ready(struct virtio_net *dev, struct vhost_virtqueue *vq)
 {
-	return vq && vq->desc && vq->avail && vq->used &&
+	bool rings_ok;
+
+	if (!vq)
+		return false;
+
+	if (vq_is_packed(dev))
+		rings_ok = !!vq->desc_packed;
+	else
+		rings_ok = vq->desc && vq->avail && vq->used;
+
+	return rings_ok &&
 	       vq->kickfd != VIRTIO_UNINITIALIZED_EVENTFD &&
 	       vq->callfd != VIRTIO_UNINITIALIZED_EVENTFD;
 }
@@ -905,7 +982,7 @@ virtio_is_ready(struct virtio_net *dev)
 	for (i = 0; i < dev->nr_vring; i++) {
 		vq = dev->virtqueue[i];
 
-		if (!vq_is_ready(vq))
+		if (!vq_is_ready(dev, vq))
 			return 0;
 	}
 
@@ -996,18 +1073,9 @@ vhost_user_get_vring_base(struct virtio_net *dev,
 			  VhostUserMsg *msg)
 {
 	struct vhost_virtqueue *vq = dev->virtqueue[msg->payload.state.index];
-	struct rte_vdpa_device *vdpa_dev;
-	int did = -1;
 
 	/* We have to stop the queue (virtio) if it is running. */
-	if (dev->flags & VIRTIO_DEV_RUNNING) {
-		did = dev->vdpa_dev_id;
-		vdpa_dev = rte_vdpa_get_device(did);
-		if (vdpa_dev && vdpa_dev->ops->dev_close)
-			vdpa_dev->ops->dev_close(dev->vid);
-		dev->flags &= ~VIRTIO_DEV_RUNNING;
-		dev->notify_ops->destroy_device(dev->vid);
-	}
+	vhost_destroy_device_notify(dev);
 
 	dev->flags &= ~VIRTIO_DEV_READY;
 	dev->flags &= ~VIRTIO_DEV_VDPA_CONFIGURED;
@@ -1035,8 +1103,13 @@ vhost_user_get_vring_base(struct virtio_net *dev,
 
 	if (dev->dequeue_zero_copy)
 		free_zmbufs(vq);
-	rte_free(vq->shadow_used_ring);
-	vq->shadow_used_ring = NULL;
+	if (vq_is_packed(dev)) {
+		rte_free(vq->shadow_used_packed);
+		vq->shadow_used_packed = NULL;
+	} else {
+		rte_free(vq->shadow_used_split);
+		vq->shadow_used_split = NULL;
+	}
 
 	rte_free(vq->batch_copy_elems);
 	vq->batch_copy_elems = NULL;
@@ -1384,6 +1457,22 @@ send_vhost_reply(int sockfd, struct VhostUserMsg *msg)
 	return send_vhost_message(sockfd, msg, NULL, 0);
 }
 
+static int
+send_vhost_slave_message(struct virtio_net *dev, struct VhostUserMsg *msg,
+			 int *fds, int fd_num)
+{
+	int ret;
+
+	if (msg->flags & VHOST_USER_NEED_REPLY)
+		rte_spinlock_lock(&dev->slave_req_lock);
+
+	ret = send_vhost_message(dev->slave_req_fd, msg, fds, fd_num);
+	if (ret < 0 && (msg->flags & VHOST_USER_NEED_REPLY))
+		rte_spinlock_unlock(&dev->slave_req_lock);
+
+	return ret;
+}
+
 /*
  * Allocate a queue pair if it hasn't been allocated yet
  */
@@ -1705,11 +1794,45 @@ skip_to_reply:
 		if (vdpa_dev->ops->dev_conf)
 			vdpa_dev->ops->dev_conf(dev->vid);
 		dev->flags |= VIRTIO_DEV_VDPA_CONFIGURED;
+		if (vhost_user_host_notifier_ctrl(dev->vid, true) != 0) {
+			RTE_LOG(INFO, VHOST_CONFIG,
+				"(%d) software relay is used for vDPA, performance may be low.\n",
+				dev->vid);
+		}
 	}
 
 	return 0;
 }
 
+static int process_slave_message_reply(struct virtio_net *dev,
+				       const VhostUserMsg *msg)
+{
+	VhostUserMsg msg_reply;
+	int ret;
+
+	if ((msg->flags & VHOST_USER_NEED_REPLY) == 0)
+		return 0;
+
+	if (read_vhost_message(dev->slave_req_fd, &msg_reply) < 0) {
+		ret = -1;
+		goto out;
+	}
+
+	if (msg_reply.request.slave != msg->request.slave) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Received unexpected msg type (%u), expected %u\n",
+			msg_reply.request.slave, msg->request.slave);
+		ret = -1;
+		goto out;
+	}
+
+	ret = msg_reply.payload.u64 ? -1 : 0;
+
+out:
+	rte_spinlock_unlock(&dev->slave_req_lock);
+	return ret;
+}
+
 int
 vhost_user_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm)
 {
@@ -1735,3 +1858,101 @@ vhost_user_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm)
 
 	return 0;
 }
+
+static int vhost_user_slave_set_vring_host_notifier(struct virtio_net *dev,
+						    int index, int fd,
+						    uint64_t offset,
+						    uint64_t size)
+{
+	int *fdp = NULL;
+	size_t fd_num = 0;
+	int ret;
+	struct VhostUserMsg msg = {
+		.request.slave = VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG,
+		.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY,
+		.size = sizeof(msg.payload.area),
+		.payload.area = {
+			.u64 = index & VHOST_USER_VRING_IDX_MASK,
+			.size = size,
+			.offset = offset,
+		},
+	};
+
+	if (fd < 0)
+		msg.payload.area.u64 |= VHOST_USER_VRING_NOFD_MASK;
+	else {
+		fdp = &fd;
+		fd_num = 1;
+	}
+
+	ret = send_vhost_slave_message(dev, &msg, fdp, fd_num);
+	if (ret < 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Failed to set host notifier (%d)\n", ret);
+		return ret;
+	}
+
+	return process_slave_message_reply(dev, &msg);
+}
+
+int vhost_user_host_notifier_ctrl(int vid, bool enable)
+{
+	struct virtio_net *dev;
+	struct rte_vdpa_device *vdpa_dev;
+	int vfio_device_fd, did, ret = 0;
+	uint64_t offset, size;
+	unsigned int i;
+
+	dev = get_device(vid);
+	if (!dev)
+		return -ENODEV;
+
+	did = dev->vdpa_dev_id;
+	if (did < 0)
+		return -EINVAL;
+
+	if (!(dev->features & (1ULL << VIRTIO_F_VERSION_1)) ||
+	    !(dev->features & (1ULL << VHOST_USER_F_PROTOCOL_FEATURES)) ||
+	    !(dev->protocol_features &
+			(1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ)) ||
+	    !(dev->protocol_features &
+			(1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD)) ||
+	    !(dev->protocol_features &
+			(1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER)))
+		return -ENOTSUP;
+
+	vdpa_dev = rte_vdpa_get_device(did);
+	if (!vdpa_dev)
+		return -ENODEV;
+
+	RTE_FUNC_PTR_OR_ERR_RET(vdpa_dev->ops->get_vfio_device_fd, -ENOTSUP);
+	RTE_FUNC_PTR_OR_ERR_RET(vdpa_dev->ops->get_notify_area, -ENOTSUP);
+
+	vfio_device_fd = vdpa_dev->ops->get_vfio_device_fd(vid);
+	if (vfio_device_fd < 0)
+		return -ENOTSUP;
+
+	if (enable) {
+		for (i = 0; i < dev->nr_vring; i++) {
+			if (vdpa_dev->ops->get_notify_area(vid, i, &offset,
+					&size) < 0) {
+				ret = -ENOTSUP;
+				goto disable;
+			}
+
+			if (vhost_user_slave_set_vring_host_notifier(dev, i,
+					vfio_device_fd, offset, size) < 0) {
+				ret = -EFAULT;
+				goto disable;
+			}
+		}
+	} else {
+disable:
+		for (i = 0; i < dev->nr_vring; i++) {
+			vhost_user_slave_set_vring_host_notifier(dev, i, -1,
+					0, 0);
+		}
+	}
+
+	return ret;
+}
diff --git a/lib/librte_vhost/vhost_user.h b/lib/librte_vhost/vhost_user.h
index 1ad5cf46..42166adf 100644
--- a/lib/librte_vhost/vhost_user.h
+++ b/lib/librte_vhost/vhost_user.h
@@ -20,7 +20,9 @@
 					 (1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK) | \
 					 (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU) | \
 					 (1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \
-					 (1ULL << VHOST_USER_PROTOCOL_F_CRYPTO_SESSION))
+					 (1ULL << VHOST_USER_PROTOCOL_F_CRYPTO_SESSION) | \
+					 (1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD) | \
+					 (1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER))
 
 typedef enum VhostUserRequest {
 	VHOST_USER_NONE = 0,
@@ -54,6 +56,7 @@ typedef enum VhostUserRequest {
 typedef enum VhostUserSlaveRequest {
 	VHOST_USER_SLAVE_NONE = 0,
 	VHOST_USER_SLAVE_IOTLB_MSG = 1,
+	VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
 	VHOST_USER_SLAVE_MAX
 } VhostUserSlaveRequest;
 
@@ -99,6 +102,12 @@ typedef struct VhostUserCryptoSessionParam {
 	uint8_t auth_key_buf[VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH];
 } VhostUserCryptoSessionParam;
 
+typedef struct VhostUserVringArea {
+	uint64_t u64;
+	uint64_t size;
+	uint64_t offset;
+} VhostUserVringArea;
+
 typedef struct VhostUserMsg {
 	union {
 		uint32_t master; /* a VhostUserRequest value */
@@ -120,6 +129,7 @@ typedef struct VhostUserMsg {
 		VhostUserLog    log;
 		struct vhost_iotlb_msg iotlb;
 		VhostUserCryptoSessionParam crypto_session;
+		VhostUserVringArea area;
 	} payload;
 	int fds[VHOST_MEMORY_MAX_NREGIONS];
 } __attribute((packed)) VhostUserMsg;
@@ -133,6 +143,7 @@ typedef struct VhostUserMsg {
 /* vhost_user.c */
 int vhost_user_msg_handler(int vid, int fd);
 int vhost_user_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm);
+int vhost_user_host_notifier_ctrl(int vid, bool enable);
 
 /* socket.c */
 int read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num);
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 76ec5f08..99c7afc8 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -25,22 +25,27 @@
 
 #define MAX_BATCH_LEN 256
 
+static  __rte_always_inline bool
+rxvq_is_mergeable(struct virtio_net *dev)
+{
+	return dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF);
+}
+
 static bool
 is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t nr_vring)
 {
 	return (is_tx ^ (idx & 1)) == 0 && idx < nr_vring;
 }
 
-static __rte_always_inline struct vring_desc *
+static __rte_always_inline void *
 alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
-					 struct vring_desc *desc)
+		uint64_t desc_addr, uint64_t desc_len)
 {
-	struct vring_desc *idesc;
+	void *idesc;
 	uint64_t src, dst;
-	uint64_t len, remain = desc->len;
-	uint64_t desc_addr = desc->addr;
+	uint64_t len, remain = desc_len;
 
-	idesc = rte_malloc(__func__, desc->len, 0);
+	idesc = rte_malloc(__func__, desc_len, 0);
 	if (unlikely(!idesc))
 		return 0;
 
@@ -66,17 +71,18 @@ alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
 }
 
 static __rte_always_inline void
-free_ind_table(struct vring_desc *idesc)
+free_ind_table(void *idesc)
 {
 	rte_free(idesc);
 }
 
 static __rte_always_inline void
-do_flush_shadow_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq,
-			  uint16_t to, uint16_t from, uint16_t size)
+do_flush_shadow_used_ring_split(struct virtio_net *dev,
+			struct vhost_virtqueue *vq,
+			uint16_t to, uint16_t from, uint16_t size)
 {
 	rte_memcpy(&vq->used->ring[to],
-			&vq->shadow_used_ring[from],
+			&vq->shadow_used_split[from],
 			size * sizeof(struct vring_used_elem));
 	vhost_log_cache_used_vring(dev, vq,
 			offsetof(struct vring_used, ring[to]),
@@ -84,22 +90,22 @@ do_flush_shadow_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq,
 }
 
 static __rte_always_inline void
-flush_shadow_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq)
+flush_shadow_used_ring_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
 {
 	uint16_t used_idx = vq->last_used_idx & (vq->size - 1);
 
 	if (used_idx + vq->shadow_used_idx <= vq->size) {
-		do_flush_shadow_used_ring(dev, vq, used_idx, 0,
+		do_flush_shadow_used_ring_split(dev, vq, used_idx, 0,
 					  vq->shadow_used_idx);
 	} else {
 		uint16_t size;
 
 		/* update used ring interval [used_idx, vq->size] */
 		size = vq->size - used_idx;
-		do_flush_shadow_used_ring(dev, vq, used_idx, 0, size);
+		do_flush_shadow_used_ring_split(dev, vq, used_idx, 0, size);
 
 		/* update the left half used ring interval [0, left_size] */
-		do_flush_shadow_used_ring(dev, vq, 0, size,
+		do_flush_shadow_used_ring_split(dev, vq, 0, size,
 					  vq->shadow_used_idx - size);
 	}
 	vq->last_used_idx += vq->shadow_used_idx;
@@ -109,18 +115,84 @@ flush_shadow_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq)
 	vhost_log_cache_sync(dev, vq);
 
 	*(volatile uint16_t *)&vq->used->idx += vq->shadow_used_idx;
+	vq->shadow_used_idx = 0;
 	vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx),
 		sizeof(vq->used->idx));
 }
 
 static __rte_always_inline void
-update_shadow_used_ring(struct vhost_virtqueue *vq,
+update_shadow_used_ring_split(struct vhost_virtqueue *vq,
 			 uint16_t desc_idx, uint16_t len)
 {
 	uint16_t i = vq->shadow_used_idx++;
 
-	vq->shadow_used_ring[i].id  = desc_idx;
-	vq->shadow_used_ring[i].len = len;
+	vq->shadow_used_split[i].id  = desc_idx;
+	vq->shadow_used_split[i].len = len;
+}
+
+static __rte_always_inline void
+flush_shadow_used_ring_packed(struct virtio_net *dev,
+			struct vhost_virtqueue *vq)
+{
+	int i;
+	uint16_t used_idx = vq->last_used_idx;
+
+	/* Split loop in two to save memory barriers */
+	for (i = 0; i < vq->shadow_used_idx; i++) {
+		vq->desc_packed[used_idx].id = vq->shadow_used_packed[i].id;
+		vq->desc_packed[used_idx].len = vq->shadow_used_packed[i].len;
+
+		used_idx += vq->shadow_used_packed[i].count;
+		if (used_idx >= vq->size)
+			used_idx -= vq->size;
+	}
+
+	rte_smp_wmb();
+
+	for (i = 0; i < vq->shadow_used_idx; i++) {
+		uint16_t flags;
+
+		if (vq->shadow_used_packed[i].len)
+			flags = VRING_DESC_F_WRITE;
+		else
+			flags = 0;
+
+		if (vq->used_wrap_counter) {
+			flags |= VRING_DESC_F_USED;
+			flags |= VRING_DESC_F_AVAIL;
+		} else {
+			flags &= ~VRING_DESC_F_USED;
+			flags &= ~VRING_DESC_F_AVAIL;
+		}
+
+		vq->desc_packed[vq->last_used_idx].flags = flags;
+
+		vhost_log_cache_used_vring(dev, vq,
+					vq->last_used_idx *
+					sizeof(struct vring_packed_desc),
+					sizeof(struct vring_packed_desc));
+
+		vq->last_used_idx += vq->shadow_used_packed[i].count;
+		if (vq->last_used_idx >= vq->size) {
+			vq->used_wrap_counter ^= 1;
+			vq->last_used_idx -= vq->size;
+		}
+	}
+
+	rte_smp_wmb();
+	vq->shadow_used_idx = 0;
+	vhost_log_cache_sync(dev, vq);
+}
+
+static __rte_always_inline void
+update_shadow_used_ring_packed(struct vhost_virtqueue *vq,
+			 uint16_t desc_idx, uint16_t len, uint16_t count)
+{
+	uint16_t i = vq->shadow_used_idx++;
+
+	vq->shadow_used_packed[i].id  = desc_idx;
+	vq->shadow_used_packed[i].len = len;
+	vq->shadow_used_packed[i].count = count;
 }
 
 static inline void
@@ -135,6 +207,8 @@ do_data_copy_enqueue(struct virtio_net *dev, struct vhost_virtqueue *vq)
 		vhost_log_cache_write(dev, vq, elem[i].log_addr, elem[i].len);
 		PRINT_PACKET(dev, (uintptr_t)elem[i].dst, elem[i].len, 0);
 	}
+
+	vq->batch_copy_nb_elems = 0;
 }
 
 static inline void
@@ -146,6 +220,8 @@ do_data_copy_dequeue(struct vhost_virtqueue *vq)
 
 	for (i = 0; i < count; i++)
 		rte_memcpy(elem[i].dst, elem[i].src, elem[i].len);
+
+	vq->batch_copy_nb_elems = 0;
 }
 
 /* avoid write operation when necessary, to lessen cache issues */
@@ -154,7 +230,7 @@ do_data_copy_dequeue(struct vhost_virtqueue *vq)
 		(var) = (val);			\
 } while (0)
 
-static void
+static __rte_always_inline void
 virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr)
 {
 	uint64_t csum_l4 = m_buf->ol_flags & PKT_TX_L4_MASK;
@@ -216,324 +292,47 @@ virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr)
 }
 
 static __rte_always_inline int
-copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
-		  struct vring_desc *descs, struct rte_mbuf *m,
-		  uint16_t desc_idx, uint32_t size)
+map_one_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
+		struct buf_vector *buf_vec, uint16_t *vec_idx,
+		uint64_t desc_iova, uint64_t desc_len, uint8_t perm)
 {
-	uint32_t desc_avail, desc_offset;
-	uint32_t mbuf_avail, mbuf_offset;
-	uint32_t cpy_len;
-	uint64_t desc_chunck_len;
-	struct vring_desc *desc;
-	uint64_t desc_addr, desc_gaddr;
-	/* A counter to avoid desc dead loop chain */
-	uint16_t nr_desc = 1;
-	struct batch_copy_elem *batch_copy = vq->batch_copy_elems;
-	uint16_t copy_nb = vq->batch_copy_nb_elems;
-	int error = 0;
+	uint16_t vec_id = *vec_idx;
 
-	desc = &descs[desc_idx];
-	desc_chunck_len = desc->len;
-	desc_gaddr = desc->addr;
-	desc_addr = vhost_iova_to_vva(dev, vq, desc_gaddr,
-					&desc_chunck_len, VHOST_ACCESS_RW);
-	/*
-	 * Checking of 'desc_addr' placed outside of 'unlikely' macro to avoid
-	 * performance issue with some versions of gcc (4.8.4 and 5.3.0) which
-	 * otherwise stores offset on the stack instead of in a register.
-	 */
-	if (unlikely(desc->len < dev->vhost_hlen) || !desc_addr) {
-		error = -1;
-		goto out;
-	}
+	while (desc_len) {
+		uint64_t desc_addr;
+		uint64_t desc_chunck_len = desc_len;
 
-	rte_prefetch0((void *)(uintptr_t)desc_addr);
-
-	if (likely(desc_chunck_len >= dev->vhost_hlen)) {
-		virtio_enqueue_offload(m,
-				(struct virtio_net_hdr *)(uintptr_t)desc_addr);
-		PRINT_PACKET(dev, (uintptr_t)desc_addr, dev->vhost_hlen, 0);
-		vhost_log_cache_write(dev, vq, desc_gaddr, dev->vhost_hlen);
-	} else {
-		struct virtio_net_hdr vnet_hdr;
-		uint64_t remain = dev->vhost_hlen;
-		uint64_t len;
-		uint64_t src = (uint64_t)(uintptr_t)&vnet_hdr, dst;
-		uint64_t guest_addr = desc_gaddr;
-
-		virtio_enqueue_offload(m, &vnet_hdr);
-
-		while (remain) {
-			len = remain;
-			dst = vhost_iova_to_vva(dev, vq, guest_addr,
-					&len, VHOST_ACCESS_RW);
-			if (unlikely(!dst || !len)) {
-				error = -1;
-				goto out;
-			}
-
-			rte_memcpy((void *)(uintptr_t)dst,
-					(void *)(uintptr_t)src, len);
-
-			PRINT_PACKET(dev, (uintptr_t)dst, (uint32_t)len, 0);
-			vhost_log_cache_write(dev, vq, guest_addr, len);
-			remain -= len;
-			guest_addr += len;
-			src += len;
-		}
-	}
+		if (unlikely(vec_id >= BUF_VECTOR_MAX))
+			return -1;
 
-	desc_avail  = desc->len - dev->vhost_hlen;
-	if (unlikely(desc_chunck_len < dev->vhost_hlen)) {
-		desc_chunck_len = desc_avail;
-		desc_gaddr = desc->addr + dev->vhost_hlen;
-		desc_addr = vhost_iova_to_vva(dev,
-				vq, desc_gaddr,
+		desc_addr = vhost_iova_to_vva(dev, vq,
+				desc_iova,
 				&desc_chunck_len,
-				VHOST_ACCESS_RW);
-		if (unlikely(!desc_addr)) {
-			error = -1;
-			goto out;
-		}
-
-		desc_offset = 0;
-	} else {
-		desc_offset = dev->vhost_hlen;
-		desc_chunck_len -= dev->vhost_hlen;
-	}
-
-	mbuf_avail  = rte_pktmbuf_data_len(m);
-	mbuf_offset = 0;
-	while (mbuf_avail != 0 || m->next != NULL) {
-		/* done with current mbuf, fetch next */
-		if (mbuf_avail == 0) {
-			m = m->next;
-
-			mbuf_offset = 0;
-			mbuf_avail  = rte_pktmbuf_data_len(m);
-		}
-
-		/* done with current desc buf, fetch next */
-		if (desc_avail == 0) {
-			if ((desc->flags & VRING_DESC_F_NEXT) == 0) {
-				/* Room in vring buffer is not enough */
-				error = -1;
-				goto out;
-			}
-			if (unlikely(desc->next >= size || ++nr_desc > size)) {
-				error = -1;
-				goto out;
-			}
-
-			desc = &descs[desc->next];
-			desc_chunck_len = desc->len;
-			desc_gaddr = desc->addr;
-			desc_addr = vhost_iova_to_vva(dev, vq, desc_gaddr,
-							&desc_chunck_len,
-							VHOST_ACCESS_RW);
-			if (unlikely(!desc_addr)) {
-				error = -1;
-				goto out;
-			}
-
-			desc_offset = 0;
-			desc_avail  = desc->len;
-		} else if (unlikely(desc_chunck_len == 0)) {
-			desc_chunck_len = desc_avail;
-			desc_gaddr += desc_offset;
-			desc_addr = vhost_iova_to_vva(dev,
-					vq, desc_gaddr,
-					&desc_chunck_len, VHOST_ACCESS_RW);
-			if (unlikely(!desc_addr)) {
-				error = -1;
-				goto out;
-			}
-			desc_offset = 0;
-		}
-
-		cpy_len = RTE_MIN(desc_chunck_len, mbuf_avail);
-		if (likely(cpy_len > MAX_BATCH_LEN || copy_nb >= vq->size)) {
-			rte_memcpy((void *)((uintptr_t)(desc_addr +
-							desc_offset)),
-				rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
-				cpy_len);
-			vhost_log_cache_write(dev, vq, desc_gaddr + desc_offset,
-					cpy_len);
-			PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset),
-				     cpy_len, 0);
-		} else {
-			batch_copy[copy_nb].dst =
-				(void *)((uintptr_t)(desc_addr + desc_offset));
-			batch_copy[copy_nb].src =
-				rte_pktmbuf_mtod_offset(m, void *, mbuf_offset);
-			batch_copy[copy_nb].log_addr = desc_gaddr + desc_offset;
-			batch_copy[copy_nb].len = cpy_len;
-			copy_nb++;
-		}
-
-		mbuf_avail  -= cpy_len;
-		mbuf_offset += cpy_len;
-		desc_avail  -= cpy_len;
-		desc_offset += cpy_len;
-		desc_chunck_len -= cpy_len;
-	}
-
-out:
-	vq->batch_copy_nb_elems = copy_nb;
-
-	return error;
-}
-
-/**
- * This function adds buffers to the virtio devices RX virtqueue. Buffers can
- * be received from the physical port or from another virtio device. A packet
- * count is returned to indicate the number of packets that are successfully
- * added to the RX queue. This function works when the mbuf is scattered, but
- * it doesn't support the mergeable feature.
- */
-static __rte_always_inline uint32_t
-virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
-	      struct rte_mbuf **pkts, uint32_t count)
-{
-	struct vhost_virtqueue *vq;
-	uint16_t avail_idx, free_entries, start_idx;
-	uint16_t desc_indexes[MAX_PKT_BURST];
-	struct vring_desc *descs;
-	uint16_t used_idx;
-	uint32_t i, sz;
-
-	VHOST_LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
-	if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
-		RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n",
-			dev->vid, __func__, queue_id);
-		return 0;
-	}
-
-	vq = dev->virtqueue[queue_id];
-
-	rte_spinlock_lock(&vq->access_lock);
-
-	if (unlikely(vq->enabled == 0))
-		goto out_access_unlock;
-
-	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
-		vhost_user_iotlb_rd_lock(vq);
-
-	if (unlikely(vq->access_ok == 0)) {
-		if (unlikely(vring_translate(dev, vq) < 0)) {
-			count = 0;
-			goto out;
-		}
-	}
-
-	avail_idx = *((volatile uint16_t *)&vq->avail->idx);
-	start_idx = vq->last_used_idx;
-	free_entries = avail_idx - start_idx;
-	count = RTE_MIN(count, free_entries);
-	count = RTE_MIN(count, (uint32_t)MAX_PKT_BURST);
-	if (count == 0)
-		goto out;
-
-	VHOST_LOG_DEBUG(VHOST_DATA, "(%d) start_idx %d | end_idx %d\n",
-		dev->vid, start_idx, start_idx + count);
-
-	vq->batch_copy_nb_elems = 0;
-
-	/* Retrieve all of the desc indexes first to avoid caching issues. */
-	rte_prefetch0(&vq->avail->ring[start_idx & (vq->size - 1)]);
-	for (i = 0; i < count; i++) {
-		used_idx = (start_idx + i) & (vq->size - 1);
-		desc_indexes[i] = vq->avail->ring[used_idx];
-		vq->used->ring[used_idx].id = desc_indexes[i];
-		vq->used->ring[used_idx].len = pkts[i]->pkt_len +
-					       dev->vhost_hlen;
-		vhost_log_cache_used_vring(dev, vq,
-			offsetof(struct vring_used, ring[used_idx]),
-			sizeof(vq->used->ring[used_idx]));
-	}
-
-	rte_prefetch0(&vq->desc[desc_indexes[0]]);
-	for (i = 0; i < count; i++) {
-		struct vring_desc *idesc = NULL;
-		uint16_t desc_idx = desc_indexes[i];
-		int err;
-
-		if (vq->desc[desc_idx].flags & VRING_DESC_F_INDIRECT) {
-			uint64_t dlen = vq->desc[desc_idx].len;
-			descs = (struct vring_desc *)(uintptr_t)
-				vhost_iova_to_vva(dev,
-						vq, vq->desc[desc_idx].addr,
-						&dlen, VHOST_ACCESS_RO);
-			if (unlikely(!descs)) {
-				count = i;
-				break;
-			}
-
-			if (unlikely(dlen < vq->desc[desc_idx].len)) {
-				/*
-				 * The indirect desc table is not contiguous
-				 * in process VA space, we have to copy it.
-				 */
-				idesc = alloc_copy_ind_table(dev, vq,
-							&vq->desc[desc_idx]);
-				if (unlikely(!idesc))
-					break;
-
-				descs = idesc;
-			}
-
-			desc_idx = 0;
-			sz = vq->desc[desc_idx].len / sizeof(*descs);
-		} else {
-			descs = vq->desc;
-			sz = vq->size;
-		}
-
-		err = copy_mbuf_to_desc(dev, vq, descs, pkts[i], desc_idx, sz);
-		if (unlikely(err)) {
-			count = i;
-			free_ind_table(idesc);
-			break;
-		}
+				perm);
+		if (unlikely(!desc_addr))
+			return -1;
 
-		if (i + 1 < count)
-			rte_prefetch0(&vq->desc[desc_indexes[i+1]]);
+		buf_vec[vec_id].buf_iova = desc_iova;
+		buf_vec[vec_id].buf_addr = desc_addr;
+		buf_vec[vec_id].buf_len  = desc_chunck_len;
 
-		if (unlikely(!!idesc))
-			free_ind_table(idesc);
+		desc_len -= desc_chunck_len;
+		desc_iova += desc_chunck_len;
+		vec_id++;
 	}
+	*vec_idx = vec_id;
 
-	do_data_copy_enqueue(dev, vq);
-
-	rte_smp_wmb();
-
-	vhost_log_cache_sync(dev, vq);
-
-	*(volatile uint16_t *)&vq->used->idx += count;
-	vq->last_used_idx += count;
-	vhost_log_used_vring(dev, vq,
-		offsetof(struct vring_used, idx),
-		sizeof(vq->used->idx));
-
-	vhost_vring_call(dev, vq);
-out:
-	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
-		vhost_user_iotlb_rd_unlock(vq);
-
-out_access_unlock:
-	rte_spinlock_unlock(&vq->access_lock);
-
-	return count;
+	return 0;
 }
 
 static __rte_always_inline int
-fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
-			 uint32_t avail_idx, uint32_t *vec_idx,
+fill_vec_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
+			 uint32_t avail_idx, uint16_t *vec_idx,
 			 struct buf_vector *buf_vec, uint16_t *desc_chain_head,
-			 uint16_t *desc_chain_len)
+			 uint16_t *desc_chain_len, uint8_t perm)
 {
 	uint16_t idx = vq->avail->ring[avail_idx & (vq->size - 1)];
-	uint32_t vec_id = *vec_idx;
+	uint16_t vec_id = *vec_idx;
 	uint32_t len    = 0;
 	uint64_t dlen;
 	struct vring_desc *descs = vq->desc;
@@ -555,7 +354,8 @@ fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 			 * The indirect desc table is not contiguous
 			 * in process VA space, we have to copy it.
 			 */
-			idesc = alloc_copy_ind_table(dev, vq, &vq->desc[idx]);
+			idesc = alloc_copy_ind_table(dev, vq,
+					vq->desc[idx].addr, vq->desc[idx].len);
 			if (unlikely(!idesc))
 				return -1;
 
@@ -566,16 +366,19 @@ fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	}
 
 	while (1) {
-		if (unlikely(vec_id >= BUF_VECTOR_MAX || idx >= vq->size)) {
+		if (unlikely(idx >= vq->size)) {
 			free_ind_table(idesc);
 			return -1;
 		}
 
 		len += descs[idx].len;
-		buf_vec[vec_id].buf_addr = descs[idx].addr;
-		buf_vec[vec_id].buf_len  = descs[idx].len;
-		buf_vec[vec_id].desc_idx = idx;
-		vec_id++;
+
+		if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id,
+						descs[idx].addr, descs[idx].len,
+						perm))) {
+			free_ind_table(idesc);
+			return -1;
+		}
 
 		if ((descs[idx].flags & VRING_DESC_F_NEXT) == 0)
 			break;
@@ -596,13 +399,14 @@ fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
  * Returns -1 on fail, 0 on success
  */
 static inline int
-reserve_avail_buf_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
+reserve_avail_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
 				uint32_t size, struct buf_vector *buf_vec,
-				uint16_t *num_buffers, uint16_t avail_head)
+				uint16_t *num_buffers, uint16_t avail_head,
+				uint16_t *nr_vec)
 {
 	uint16_t cur_idx;
-	uint32_t vec_idx = 0;
-	uint16_t tries = 0;
+	uint16_t vec_idx = 0;
+	uint16_t max_tries, tries = 0;
 
 	uint16_t head_idx = 0;
 	uint16_t len = 0;
@@ -610,49 +414,223 @@ reserve_avail_buf_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	*num_buffers = 0;
 	cur_idx  = vq->last_avail_idx;
 
+	if (rxvq_is_mergeable(dev))
+		max_tries = vq->size - 1;
+	else
+		max_tries = 1;
+
 	while (size > 0) {
 		if (unlikely(cur_idx == avail_head))
 			return -1;
+		/*
+		 * if we tried all available ring items, and still
+		 * can't get enough buf, it means something abnormal
+		 * happened.
+		 */
+		if (unlikely(++tries > max_tries))
+			return -1;
 
-		if (unlikely(fill_vec_buf(dev, vq, cur_idx, &vec_idx, buf_vec,
-						&head_idx, &len) < 0))
+		if (unlikely(fill_vec_buf_split(dev, vq, cur_idx,
+						&vec_idx, buf_vec,
+						&head_idx, &len,
+						VHOST_ACCESS_RW) < 0))
 			return -1;
 		len = RTE_MIN(len, size);
-		update_shadow_used_ring(vq, head_idx, len);
+		update_shadow_used_ring_split(vq, head_idx, len);
 		size -= len;
 
 		cur_idx++;
-		tries++;
 		*num_buffers += 1;
+	}
+
+	*nr_vec = vec_idx;
+
+	return 0;
+}
+
+static __rte_always_inline int
+fill_vec_buf_packed_indirect(struct virtio_net *dev,
+			struct vhost_virtqueue *vq,
+			struct vring_packed_desc *desc, uint16_t *vec_idx,
+			struct buf_vector *buf_vec, uint16_t *len, uint8_t perm)
+{
+	uint16_t i;
+	uint32_t nr_descs;
+	uint16_t vec_id = *vec_idx;
+	uint64_t dlen;
+	struct vring_packed_desc *descs, *idescs = NULL;
+
+	dlen = desc->len;
+	descs = (struct vring_packed_desc *)(uintptr_t)
+		vhost_iova_to_vva(dev, vq, desc->addr, &dlen, VHOST_ACCESS_RO);
+	if (unlikely(!descs))
+		return -1;
+
+	if (unlikely(dlen < desc->len)) {
+		/*
+		 * The indirect desc table is not contiguous
+		 * in process VA space, we have to copy it.
+		 */
+		idescs = alloc_copy_ind_table(dev, vq, desc->addr, desc->len);
+		if (unlikely(!idescs))
+			return -1;
+
+		descs = idescs;
+	}
+
+	nr_descs =  desc->len / sizeof(struct vring_packed_desc);
+	if (unlikely(nr_descs >= vq->size)) {
+		free_ind_table(idescs);
+		return -1;
+	}
+
+	for (i = 0; i < nr_descs; i++) {
+		if (unlikely(vec_id >= BUF_VECTOR_MAX)) {
+			free_ind_table(idescs);
+			return -1;
+		}
+
+		*len += descs[i].len;
+		if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id,
+						descs[i].addr, descs[i].len,
+						perm)))
+			return -1;
+	}
+	*vec_idx = vec_id;
+
+	if (unlikely(!!idescs))
+		free_ind_table(idescs);
+
+	return 0;
+}
+
+static __rte_always_inline int
+fill_vec_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
+				uint16_t avail_idx, uint16_t *desc_count,
+				struct buf_vector *buf_vec, uint16_t *vec_idx,
+				uint16_t *buf_id, uint16_t *len, uint8_t perm)
+{
+	bool wrap_counter = vq->avail_wrap_counter;
+	struct vring_packed_desc *descs = vq->desc_packed;
+	uint16_t vec_id = *vec_idx;
+
+	if (avail_idx < vq->last_avail_idx)
+		wrap_counter ^= 1;
+
+	if (unlikely(!desc_is_avail(&descs[avail_idx], wrap_counter)))
+		return -1;
+
+	*desc_count = 0;
+
+	while (1) {
+		if (unlikely(vec_id >= BUF_VECTOR_MAX))
+			return -1;
+
+		*desc_count += 1;
+		*buf_id = descs[avail_idx].id;
+
+		if (descs[avail_idx].flags & VRING_DESC_F_INDIRECT) {
+			if (unlikely(fill_vec_buf_packed_indirect(dev, vq,
+							&descs[avail_idx],
+							&vec_id, buf_vec,
+							len, perm) < 0))
+				return -1;
+		} else {
+			*len += descs[avail_idx].len;
+
+			if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id,
+							descs[avail_idx].addr,
+							descs[avail_idx].len,
+							perm)))
+				return -1;
+		}
+
+		if ((descs[avail_idx].flags & VRING_DESC_F_NEXT) == 0)
+			break;
+
+		if (++avail_idx >= vq->size) {
+			avail_idx -= vq->size;
+			wrap_counter ^= 1;
+		}
+	}
+
+	*vec_idx = vec_id;
+
+	return 0;
+}
+
+/*
+ * Returns -1 on fail, 0 on success
+ */
+static inline int
+reserve_avail_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
+				uint32_t size, struct buf_vector *buf_vec,
+				uint16_t *nr_vec, uint16_t *num_buffers,
+				uint16_t *nr_descs)
+{
+	uint16_t avail_idx;
+	uint16_t vec_idx = 0;
+	uint16_t max_tries, tries = 0;
+
+	uint16_t buf_id = 0;
+	uint16_t len = 0;
+	uint16_t desc_count;
+
+	*num_buffers = 0;
+	avail_idx = vq->last_avail_idx;
 
+	if (rxvq_is_mergeable(dev))
+		max_tries = vq->size - 1;
+	else
+		max_tries = 1;
+
+	while (size > 0) {
 		/*
 		 * if we tried all available ring items, and still
 		 * can't get enough buf, it means something abnormal
 		 * happened.
 		 */
-		if (unlikely(tries >= vq->size))
+		if (unlikely(++tries > max_tries))
 			return -1;
+
+		if (unlikely(fill_vec_buf_packed(dev, vq,
+						avail_idx, &desc_count,
+						buf_vec, &vec_idx,
+						&buf_id, &len,
+						VHOST_ACCESS_RO) < 0))
+			return -1;
+
+		len = RTE_MIN(len, size);
+		update_shadow_used_ring_packed(vq, buf_id, len, desc_count);
+		size -= len;
+
+		avail_idx += desc_count;
+		if (avail_idx >= vq->size)
+			avail_idx -= vq->size;
+
+		*nr_descs += desc_count;
+		*num_buffers += 1;
 	}
 
+	*nr_vec = vec_idx;
+
 	return 0;
 }
 
 static __rte_always_inline int
-copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
+copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
 			    struct rte_mbuf *m, struct buf_vector *buf_vec,
-			    uint16_t num_buffers)
+			    uint16_t nr_vec, uint16_t num_buffers)
 {
 	uint32_t vec_idx = 0;
-	uint64_t desc_addr, desc_gaddr;
 	uint32_t mbuf_offset, mbuf_avail;
-	uint32_t desc_offset, desc_avail;
+	uint32_t buf_offset, buf_avail;
+	uint64_t buf_addr, buf_iova, buf_len;
 	uint32_t cpy_len;
-	uint64_t desc_chunck_len;
-	uint64_t hdr_addr, hdr_phys_addr;
+	uint64_t hdr_addr;
 	struct rte_mbuf *hdr_mbuf;
 	struct batch_copy_elem *batch_copy = vq->batch_copy_elems;
 	struct virtio_net_hdr_mrg_rxbuf tmp_hdr, *hdr = NULL;
-	uint16_t copy_nb = vq->batch_copy_nb_elems;
 	int error = 0;
 
 	if (unlikely(m == NULL)) {
@@ -660,82 +638,61 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		goto out;
 	}
 
-	desc_chunck_len = buf_vec[vec_idx].buf_len;
-	desc_gaddr = buf_vec[vec_idx].buf_addr;
-	desc_addr = vhost_iova_to_vva(dev, vq,
-					desc_gaddr,
-					&desc_chunck_len,
-					VHOST_ACCESS_RW);
-	if (buf_vec[vec_idx].buf_len < dev->vhost_hlen || !desc_addr) {
+	buf_addr = buf_vec[vec_idx].buf_addr;
+	buf_iova = buf_vec[vec_idx].buf_iova;
+	buf_len = buf_vec[vec_idx].buf_len;
+
+	if (nr_vec > 1)
+		rte_prefetch0((void *)(uintptr_t)buf_vec[1].buf_addr);
+
+	if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1)) {
 		error = -1;
 		goto out;
 	}
 
 	hdr_mbuf = m;
-	hdr_addr = desc_addr;
-	if (unlikely(desc_chunck_len < dev->vhost_hlen))
+	hdr_addr = buf_addr;
+	if (unlikely(buf_len < dev->vhost_hlen))
 		hdr = &tmp_hdr;
 	else
 		hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)hdr_addr;
-	hdr_phys_addr = desc_gaddr;
-	rte_prefetch0((void *)(uintptr_t)hdr_addr);
 
 	VHOST_LOG_DEBUG(VHOST_DATA, "(%d) RX: num merge buffers %d\n",
 		dev->vid, num_buffers);
 
-	desc_avail  = buf_vec[vec_idx].buf_len - dev->vhost_hlen;
-	if (unlikely(desc_chunck_len < dev->vhost_hlen)) {
-		desc_chunck_len = desc_avail;
-		desc_gaddr += dev->vhost_hlen;
-		desc_addr = vhost_iova_to_vva(dev, vq,
-				desc_gaddr,
-				&desc_chunck_len,
-				VHOST_ACCESS_RW);
-		if (unlikely(!desc_addr)) {
-			error = -1;
-			goto out;
-		}
-
-		desc_offset = 0;
+	if (unlikely(buf_len < dev->vhost_hlen)) {
+		buf_offset = dev->vhost_hlen - buf_len;
+		vec_idx++;
+		buf_addr = buf_vec[vec_idx].buf_addr;
+		buf_iova = buf_vec[vec_idx].buf_iova;
+		buf_len = buf_vec[vec_idx].buf_len;
+		buf_avail = buf_len - buf_offset;
 	} else {
-		desc_offset = dev->vhost_hlen;
-		desc_chunck_len -= dev->vhost_hlen;
+		buf_offset = dev->vhost_hlen;
+		buf_avail = buf_len - dev->vhost_hlen;
 	}
 
-
 	mbuf_avail  = rte_pktmbuf_data_len(m);
 	mbuf_offset = 0;
 	while (mbuf_avail != 0 || m->next != NULL) {
-		/* done with current desc buf, get the next one */
-		if (desc_avail == 0) {
+		/* done with current buf, get the next one */
+		if (buf_avail == 0) {
 			vec_idx++;
-			desc_chunck_len = buf_vec[vec_idx].buf_len;
-			desc_gaddr = buf_vec[vec_idx].buf_addr;
-			desc_addr =
-				vhost_iova_to_vva(dev, vq,
-					desc_gaddr,
-					&desc_chunck_len,
-					VHOST_ACCESS_RW);
-			if (unlikely(!desc_addr)) {
+			if (unlikely(vec_idx >= nr_vec)) {
 				error = -1;
 				goto out;
 			}
 
-			/* Prefetch buffer address. */
-			rte_prefetch0((void *)(uintptr_t)desc_addr);
-			desc_offset = 0;
-			desc_avail  = buf_vec[vec_idx].buf_len;
-		} else if (unlikely(desc_chunck_len == 0)) {
-			desc_chunck_len = desc_avail;
-			desc_gaddr += desc_offset;
-			desc_addr = vhost_iova_to_vva(dev, vq,
-					desc_gaddr,
-					&desc_chunck_len, VHOST_ACCESS_RW);
-			if (unlikely(!desc_addr)) {
-				error = -1;
-				goto out;
-			}
-			desc_offset = 0;
+			buf_addr = buf_vec[vec_idx].buf_addr;
+			buf_iova = buf_vec[vec_idx].buf_iova;
+			buf_len = buf_vec[vec_idx].buf_len;
+
+			/* Prefetch next buffer address. */
+			if (vec_idx + 1 < nr_vec)
+				rte_prefetch0((void *)(uintptr_t)
+						buf_vec[vec_idx + 1].buf_addr);
+			buf_offset = 0;
+			buf_avail  = buf_len;
 		}
 
 		/* done with current mbuf, get the next one */
@@ -748,24 +705,21 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
 
 		if (hdr_addr) {
 			virtio_enqueue_offload(hdr_mbuf, &hdr->hdr);
-			ASSIGN_UNLESS_EQUAL(hdr->num_buffers, num_buffers);
+			if (rxvq_is_mergeable(dev))
+				ASSIGN_UNLESS_EQUAL(hdr->num_buffers,
+						num_buffers);
 
 			if (unlikely(hdr == &tmp_hdr)) {
 				uint64_t len;
 				uint64_t remain = dev->vhost_hlen;
 				uint64_t src = (uint64_t)(uintptr_t)hdr, dst;
-				uint64_t guest_addr = hdr_phys_addr;
+				uint64_t iova = buf_vec[0].buf_iova;
+				uint16_t hdr_vec_idx = 0;
 
 				while (remain) {
-					len = remain;
-					dst = vhost_iova_to_vva(dev, vq,
-							guest_addr, &len,
-							VHOST_ACCESS_RW);
-					if (unlikely(!dst || !len)) {
-						error = -1;
-						goto out;
-					}
-
+					len = RTE_MIN(remain,
+						buf_vec[hdr_vec_idx].buf_len);
+					dst = buf_vec[hdr_vec_idx].buf_addr;
 					rte_memcpy((void *)(uintptr_t)dst,
 							(void *)(uintptr_t)src,
 							len);
@@ -773,103 +727,125 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
 					PRINT_PACKET(dev, (uintptr_t)dst,
 							(uint32_t)len, 0);
 					vhost_log_cache_write(dev, vq,
-							guest_addr, len);
+							iova, len);
 
 					remain -= len;
-					guest_addr += len;
+					iova += len;
 					src += len;
+					hdr_vec_idx++;
 				}
 			} else {
 				PRINT_PACKET(dev, (uintptr_t)hdr_addr,
 						dev->vhost_hlen, 0);
-				vhost_log_cache_write(dev, vq, hdr_phys_addr,
+				vhost_log_cache_write(dev, vq,
+						buf_vec[0].buf_iova,
 						dev->vhost_hlen);
 			}
 
 			hdr_addr = 0;
 		}
 
-		cpy_len = RTE_MIN(desc_chunck_len, mbuf_avail);
+		cpy_len = RTE_MIN(buf_avail, mbuf_avail);
 
-		if (likely(cpy_len > MAX_BATCH_LEN || copy_nb >= vq->size)) {
-			rte_memcpy((void *)((uintptr_t)(desc_addr +
-							desc_offset)),
+		if (likely(cpy_len > MAX_BATCH_LEN ||
+					vq->batch_copy_nb_elems >= vq->size)) {
+			rte_memcpy((void *)((uintptr_t)(buf_addr + buf_offset)),
 				rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
 				cpy_len);
-			vhost_log_cache_write(dev, vq, desc_gaddr + desc_offset,
+			vhost_log_cache_write(dev, vq, buf_iova + buf_offset,
 					cpy_len);
-			PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset),
+			PRINT_PACKET(dev, (uintptr_t)(buf_addr + buf_offset),
 				cpy_len, 0);
 		} else {
-			batch_copy[copy_nb].dst =
-				(void *)((uintptr_t)(desc_addr + desc_offset));
-			batch_copy[copy_nb].src =
+			batch_copy[vq->batch_copy_nb_elems].dst =
+				(void *)((uintptr_t)(buf_addr + buf_offset));
+			batch_copy[vq->batch_copy_nb_elems].src =
 				rte_pktmbuf_mtod_offset(m, void *, mbuf_offset);
-			batch_copy[copy_nb].log_addr = desc_gaddr + desc_offset;
-			batch_copy[copy_nb].len = cpy_len;
-			copy_nb++;
+			batch_copy[vq->batch_copy_nb_elems].log_addr =
+				buf_iova + buf_offset;
+			batch_copy[vq->batch_copy_nb_elems].len = cpy_len;
+			vq->batch_copy_nb_elems++;
 		}
 
 		mbuf_avail  -= cpy_len;
 		mbuf_offset += cpy_len;
-		desc_avail  -= cpy_len;
-		desc_offset += cpy_len;
-		desc_chunck_len -= cpy_len;
+		buf_avail  -= cpy_len;
+		buf_offset += cpy_len;
 	}
 
 out:
-	vq->batch_copy_nb_elems = copy_nb;
 
 	return error;
 }
 
 static __rte_always_inline uint32_t
-virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
+virtio_dev_rx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	struct rte_mbuf **pkts, uint32_t count)
 {
-	struct vhost_virtqueue *vq;
 	uint32_t pkt_idx = 0;
 	uint16_t num_buffers;
 	struct buf_vector buf_vec[BUF_VECTOR_MAX];
 	uint16_t avail_head;
 
-	VHOST_LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
-	if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
-		RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n",
-			dev->vid, __func__, queue_id);
-		return 0;
-	}
+	rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
+	avail_head = *((volatile uint16_t *)&vq->avail->idx);
 
-	vq = dev->virtqueue[queue_id];
+	for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
+		uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen;
+		uint16_t nr_vec = 0;
 
-	rte_spinlock_lock(&vq->access_lock);
+		if (unlikely(reserve_avail_buf_split(dev, vq,
+						pkt_len, buf_vec, &num_buffers,
+						avail_head, &nr_vec) < 0)) {
+			VHOST_LOG_DEBUG(VHOST_DATA,
+				"(%d) failed to get enough desc from vring\n",
+				dev->vid);
+			vq->shadow_used_idx -= num_buffers;
+			break;
+		}
 
-	if (unlikely(vq->enabled == 0))
-		goto out_access_unlock;
+		rte_prefetch0((void *)(uintptr_t)buf_vec[0].buf_addr);
 
-	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
-		vhost_user_iotlb_rd_lock(vq);
+		VHOST_LOG_DEBUG(VHOST_DATA, "(%d) current index %d | end index %d\n",
+			dev->vid, vq->last_avail_idx,
+			vq->last_avail_idx + num_buffers);
 
-	if (unlikely(vq->access_ok == 0))
-		if (unlikely(vring_translate(dev, vq) < 0))
-			goto out;
+		if (copy_mbuf_to_desc(dev, vq, pkts[pkt_idx],
+						buf_vec, nr_vec,
+						num_buffers) < 0) {
+			vq->shadow_used_idx -= num_buffers;
+			break;
+		}
 
-	count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
-	if (count == 0)
-		goto out;
+		vq->last_avail_idx += num_buffers;
+	}
 
-	vq->batch_copy_nb_elems = 0;
+	do_data_copy_enqueue(dev, vq);
 
-	rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
+	if (likely(vq->shadow_used_idx)) {
+		flush_shadow_used_ring_split(dev, vq);
+		vhost_vring_call_split(dev, vq);
+	}
+
+	return pkt_idx;
+}
+
+static __rte_always_inline uint32_t
+virtio_dev_rx_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
+	struct rte_mbuf **pkts, uint32_t count)
+{
+	uint32_t pkt_idx = 0;
+	uint16_t num_buffers;
+	struct buf_vector buf_vec[BUF_VECTOR_MAX];
 
-	vq->shadow_used_idx = 0;
-	avail_head = *((volatile uint16_t *)&vq->avail->idx);
 	for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
 		uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen;
+		uint16_t nr_vec = 0;
+		uint16_t nr_descs = 0;
 
-		if (unlikely(reserve_avail_buf_mergeable(dev, vq,
-						pkt_len, buf_vec, &num_buffers,
-						avail_head) < 0)) {
+		if (unlikely(reserve_avail_buf_packed(dev, vq,
+						pkt_len, buf_vec, &nr_vec,
+						&num_buffers, &nr_descs) < 0)) {
 			VHOST_LOG_DEBUG(VHOST_DATA,
 				"(%d) failed to get enough desc from vring\n",
 				dev->vid);
@@ -877,26 +853,72 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
 			break;
 		}
 
+		rte_prefetch0((void *)(uintptr_t)buf_vec[0].buf_addr);
+
 		VHOST_LOG_DEBUG(VHOST_DATA, "(%d) current index %d | end index %d\n",
 			dev->vid, vq->last_avail_idx,
 			vq->last_avail_idx + num_buffers);
 
-		if (copy_mbuf_to_desc_mergeable(dev, vq, pkts[pkt_idx],
-						buf_vec, num_buffers) < 0) {
+		if (copy_mbuf_to_desc(dev, vq, pkts[pkt_idx],
+						buf_vec, nr_vec,
+						num_buffers) < 0) {
 			vq->shadow_used_idx -= num_buffers;
 			break;
 		}
 
-		vq->last_avail_idx += num_buffers;
+		vq->last_avail_idx += nr_descs;
+		if (vq->last_avail_idx >= vq->size) {
+			vq->last_avail_idx -= vq->size;
+			vq->avail_wrap_counter ^= 1;
+		}
 	}
 
 	do_data_copy_enqueue(dev, vq);
 
 	if (likely(vq->shadow_used_idx)) {
-		flush_shadow_used_ring(dev, vq);
-		vhost_vring_call(dev, vq);
+		flush_shadow_used_ring_packed(dev, vq);
+		vhost_vring_call_packed(dev, vq);
 	}
 
+	return pkt_idx;
+}
+
+static __rte_always_inline uint32_t
+virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
+	struct rte_mbuf **pkts, uint32_t count)
+{
+	struct vhost_virtqueue *vq;
+
+	VHOST_LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
+	if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
+		RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n",
+			dev->vid, __func__, queue_id);
+		return 0;
+	}
+
+	vq = dev->virtqueue[queue_id];
+
+	rte_spinlock_lock(&vq->access_lock);
+
+	if (unlikely(vq->enabled == 0))
+		goto out_access_unlock;
+
+	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+		vhost_user_iotlb_rd_lock(vq);
+
+	if (unlikely(vq->access_ok == 0))
+		if (unlikely(vring_translate(dev, vq) < 0))
+			goto out;
+
+	count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
+	if (count == 0)
+		goto out;
+
+	if (vq_is_packed(dev))
+		count = virtio_dev_rx_packed(dev, vq, pkts, count);
+	else
+		count = virtio_dev_rx_split(dev, vq, pkts, count);
+
 out:
 	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
 		vhost_user_iotlb_rd_unlock(vq);
@@ -904,7 +926,7 @@ out:
 out_access_unlock:
 	rte_spinlock_unlock(&vq->access_lock);
 
-	return pkt_idx;
+	return count;
 }
 
 uint16_t
@@ -923,10 +945,7 @@ rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
 		return 0;
 	}
 
-	if (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF))
-		return virtio_dev_merge_rx(dev, queue_id, pkts, count);
-	else
-		return virtio_dev_rx(dev, queue_id, pkts, count);
+	return virtio_dev_rx(dev, queue_id, pkts, count);
 }
 
 static inline bool
@@ -1051,76 +1070,60 @@ put_zmbuf(struct zcopy_mbuf *zmbuf)
 
 static __rte_always_inline int
 copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
-		  struct vring_desc *descs, uint16_t max_desc,
-		  struct rte_mbuf *m, uint16_t desc_idx,
-		  struct rte_mempool *mbuf_pool)
+		  struct buf_vector *buf_vec, uint16_t nr_vec,
+		  struct rte_mbuf *m, struct rte_mempool *mbuf_pool)
 {
-	struct vring_desc *desc;
-	uint64_t desc_addr, desc_gaddr;
-	uint32_t desc_avail, desc_offset;
+	uint32_t buf_avail, buf_offset;
+	uint64_t buf_addr, buf_iova, buf_len;
 	uint32_t mbuf_avail, mbuf_offset;
 	uint32_t cpy_len;
-	uint64_t desc_chunck_len;
 	struct rte_mbuf *cur = m, *prev = m;
 	struct virtio_net_hdr tmp_hdr;
 	struct virtio_net_hdr *hdr = NULL;
 	/* A counter to avoid desc dead loop chain */
-	uint32_t nr_desc = 1;
+	uint16_t vec_idx = 0;
 	struct batch_copy_elem *batch_copy = vq->batch_copy_elems;
-	uint16_t copy_nb = vq->batch_copy_nb_elems;
 	int error = 0;
 
-	desc = &descs[desc_idx];
-	if (unlikely((desc->len < dev->vhost_hlen)) ||
-			(desc->flags & VRING_DESC_F_INDIRECT)) {
-		error = -1;
-		goto out;
-	}
+	buf_addr = buf_vec[vec_idx].buf_addr;
+	buf_iova = buf_vec[vec_idx].buf_iova;
+	buf_len = buf_vec[vec_idx].buf_len;
 
-	desc_chunck_len = desc->len;
-	desc_gaddr = desc->addr;
-	desc_addr = vhost_iova_to_vva(dev,
-					vq, desc_gaddr,
-					&desc_chunck_len,
-					VHOST_ACCESS_RO);
-	if (unlikely(!desc_addr)) {
+	if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1)) {
 		error = -1;
 		goto out;
 	}
 
+	if (likely(nr_vec > 1))
+		rte_prefetch0((void *)(uintptr_t)buf_vec[1].buf_addr);
+
 	if (virtio_net_with_host_offload(dev)) {
-		if (unlikely(desc_chunck_len < sizeof(struct virtio_net_hdr))) {
-			uint64_t len = desc_chunck_len;
+		if (unlikely(buf_len < sizeof(struct virtio_net_hdr))) {
+			uint64_t len;
 			uint64_t remain = sizeof(struct virtio_net_hdr);
-			uint64_t src = desc_addr;
+			uint64_t src;
 			uint64_t dst = (uint64_t)(uintptr_t)&tmp_hdr;
-			uint64_t guest_addr = desc_gaddr;
+			uint16_t hdr_vec_idx = 0;
 
 			/*
 			 * No luck, the virtio-net header doesn't fit
 			 * in a contiguous virtual area.
 			 */
 			while (remain) {
-				len = remain;
-				src = vhost_iova_to_vva(dev, vq,
-						guest_addr, &len,
-						VHOST_ACCESS_RO);
-				if (unlikely(!src || !len)) {
-					error = -1;
-					goto out;
-				}
-
+				len = RTE_MIN(remain,
+					buf_vec[hdr_vec_idx].buf_len);
+				src = buf_vec[hdr_vec_idx].buf_addr;
 				rte_memcpy((void *)(uintptr_t)dst,
 						   (void *)(uintptr_t)src, len);
 
-				guest_addr += len;
 				remain -= len;
 				dst += len;
+				hdr_vec_idx++;
 			}
 
 			hdr = &tmp_hdr;
 		} else {
-			hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
+			hdr = (struct virtio_net_hdr *)((uintptr_t)buf_addr);
 			rte_prefetch0(hdr);
 		}
 	}
@@ -1130,61 +1133,40 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	 * for Tx: the first for storing the header, and others
 	 * for storing the data.
 	 */
-	if (likely((desc->len == dev->vhost_hlen) &&
-		   (desc->flags & VRING_DESC_F_NEXT) != 0)) {
-		desc = &descs[desc->next];
-		if (unlikely(desc->flags & VRING_DESC_F_INDIRECT)) {
-			error = -1;
+	if (unlikely(buf_len < dev->vhost_hlen)) {
+		buf_offset = dev->vhost_hlen - buf_len;
+		vec_idx++;
+		buf_addr = buf_vec[vec_idx].buf_addr;
+		buf_iova = buf_vec[vec_idx].buf_iova;
+		buf_len = buf_vec[vec_idx].buf_len;
+		buf_avail  = buf_len - buf_offset;
+	} else if (buf_len == dev->vhost_hlen) {
+		if (unlikely(++vec_idx >= nr_vec))
 			goto out;
-		}
-
-		desc_chunck_len = desc->len;
-		desc_gaddr = desc->addr;
-		desc_addr = vhost_iova_to_vva(dev,
-							vq, desc_gaddr,
-							&desc_chunck_len,
-							VHOST_ACCESS_RO);
-		if (unlikely(!desc_addr)) {
-			error = -1;
-			goto out;
-		}
+		buf_addr = buf_vec[vec_idx].buf_addr;
+		buf_iova = buf_vec[vec_idx].buf_iova;
+		buf_len = buf_vec[vec_idx].buf_len;
 
-		desc_offset = 0;
-		desc_avail  = desc->len;
-		nr_desc    += 1;
+		buf_offset = 0;
+		buf_avail = buf_len;
 	} else {
-		desc_avail  = desc->len - dev->vhost_hlen;
-
-		if (unlikely(desc_chunck_len < dev->vhost_hlen)) {
-			desc_chunck_len = desc_avail;
-			desc_gaddr += dev->vhost_hlen;
-			desc_addr = vhost_iova_to_vva(dev,
-					vq, desc_gaddr,
-					&desc_chunck_len,
-					VHOST_ACCESS_RO);
-			if (unlikely(!desc_addr)) {
-				error = -1;
-				goto out;
-			}
-
-			desc_offset = 0;
-		} else {
-			desc_offset = dev->vhost_hlen;
-			desc_chunck_len -= dev->vhost_hlen;
-		}
+		buf_offset = dev->vhost_hlen;
+		buf_avail = buf_vec[vec_idx].buf_len - dev->vhost_hlen;
 	}
 
-	rte_prefetch0((void *)(uintptr_t)(desc_addr + desc_offset));
+	rte_prefetch0((void *)(uintptr_t)
+			(buf_addr + buf_offset));
 
-	PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset),
-			(uint32_t)desc_chunck_len, 0);
+	PRINT_PACKET(dev,
+			(uintptr_t)(buf_addr + buf_offset),
+			(uint32_t)buf_avail, 0);
 
 	mbuf_offset = 0;
 	mbuf_avail  = m->buf_len - RTE_PKTMBUF_HEADROOM;
 	while (1) {
 		uint64_t hpa;
 
-		cpy_len = RTE_MIN(desc_chunck_len, mbuf_avail);
+		cpy_len = RTE_MIN(buf_avail, mbuf_avail);
 
 		/*
 		 * A desc buf might across two host physical pages that are
@@ -1192,11 +1174,11 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		 * will be copied even though zero copy is enabled.
 		 */
 		if (unlikely(dev->dequeue_zero_copy && (hpa = gpa_to_hpa(dev,
-					desc_gaddr + desc_offset, cpy_len)))) {
+					buf_iova + buf_offset, cpy_len)))) {
 			cur->data_len = cpy_len;
 			cur->data_off = 0;
-			cur->buf_addr = (void *)(uintptr_t)(desc_addr
-				+ desc_offset);
+			cur->buf_addr =
+				(void *)(uintptr_t)(buf_addr + buf_offset);
 			cur->buf_iova = hpa;
 
 			/*
@@ -1206,81 +1188,53 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 			mbuf_avail = cpy_len;
 		} else {
 			if (likely(cpy_len > MAX_BATCH_LEN ||
-				   copy_nb >= vq->size ||
-				   (hdr && cur == m) ||
-				   desc->len != desc_chunck_len)) {
+				   vq->batch_copy_nb_elems >= vq->size ||
+				   (hdr && cur == m))) {
 				rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *,
 								   mbuf_offset),
-					   (void *)((uintptr_t)(desc_addr +
-								desc_offset)),
+					   (void *)((uintptr_t)(buf_addr +
+							   buf_offset)),
 					   cpy_len);
 			} else {
-				batch_copy[copy_nb].dst =
+				batch_copy[vq->batch_copy_nb_elems].dst =
 					rte_pktmbuf_mtod_offset(cur, void *,
 								mbuf_offset);
-				batch_copy[copy_nb].src =
-					(void *)((uintptr_t)(desc_addr +
-							     desc_offset));
-				batch_copy[copy_nb].len = cpy_len;
-				copy_nb++;
+				batch_copy[vq->batch_copy_nb_elems].src =
+					(void *)((uintptr_t)(buf_addr +
+								buf_offset));
+				batch_copy[vq->batch_copy_nb_elems].len =
+					cpy_len;
+				vq->batch_copy_nb_elems++;
 			}
 		}
 
 		mbuf_avail  -= cpy_len;
 		mbuf_offset += cpy_len;
-		desc_avail  -= cpy_len;
-		desc_chunck_len -= cpy_len;
-		desc_offset += cpy_len;
+		buf_avail -= cpy_len;
+		buf_offset += cpy_len;
 
-		/* This desc reaches to its end, get the next one */
-		if (desc_avail == 0) {
-			if ((desc->flags & VRING_DESC_F_NEXT) == 0)
+		/* This buf reaches to its end, get the next one */
+		if (buf_avail == 0) {
+			if (++vec_idx >= nr_vec)
 				break;
 
-			if (unlikely(desc->next >= max_desc ||
-				     ++nr_desc > max_desc)) {
-				error = -1;
-				goto out;
-			}
-			desc = &descs[desc->next];
-			if (unlikely(desc->flags & VRING_DESC_F_INDIRECT)) {
-				error = -1;
-				goto out;
-			}
+			buf_addr = buf_vec[vec_idx].buf_addr;
+			buf_iova = buf_vec[vec_idx].buf_iova;
+			buf_len = buf_vec[vec_idx].buf_len;
 
-			desc_chunck_len = desc->len;
-			desc_gaddr = desc->addr;
-			desc_addr = vhost_iova_to_vva(dev,
-							vq, desc_gaddr,
-							&desc_chunck_len,
-							VHOST_ACCESS_RO);
-			if (unlikely(!desc_addr)) {
-				error = -1;
-				goto out;
-			}
+			/*
+			 * Prefecth desc n + 1 buffer while
+			 * desc n buffer is processed.
+			 */
+			if (vec_idx + 1 < nr_vec)
+				rte_prefetch0((void *)(uintptr_t)
+						buf_vec[vec_idx + 1].buf_addr);
 
-			rte_prefetch0((void *)(uintptr_t)desc_addr);
-
-			desc_offset = 0;
-			desc_avail  = desc->len;
-
-			PRINT_PACKET(dev, (uintptr_t)desc_addr,
-					(uint32_t)desc_chunck_len, 0);
-		} else if (unlikely(desc_chunck_len == 0)) {
-			desc_chunck_len = desc_avail;
-			desc_gaddr += desc_offset;
-			desc_addr = vhost_iova_to_vva(dev, vq,
-					desc_gaddr,
-					&desc_chunck_len,
-					VHOST_ACCESS_RO);
-			if (unlikely(!desc_addr)) {
-				error = -1;
-				goto out;
-			}
-			desc_offset = 0;
+			buf_offset = 0;
+			buf_avail  = buf_len;
 
-			PRINT_PACKET(dev, (uintptr_t)desc_addr,
-					(uint32_t)desc_chunck_len, 0);
+			PRINT_PACKET(dev, (uintptr_t)buf_addr,
+					(uint32_t)buf_avail, 0);
 		}
 
 		/*
@@ -1316,40 +1270,10 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		vhost_dequeue_offload(hdr, m);
 
 out:
-	vq->batch_copy_nb_elems = copy_nb;
 
 	return error;
 }
 
-static __rte_always_inline void
-update_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq,
-		 uint32_t used_idx, uint32_t desc_idx)
-{
-	vq->used->ring[used_idx].id  = desc_idx;
-	vq->used->ring[used_idx].len = 0;
-	vhost_log_cache_used_vring(dev, vq,
-			offsetof(struct vring_used, ring[used_idx]),
-			sizeof(vq->used->ring[used_idx]));
-}
-
-static __rte_always_inline void
-update_used_idx(struct virtio_net *dev, struct vhost_virtqueue *vq,
-		uint32_t count)
-{
-	if (unlikely(count == 0))
-		return;
-
-	rte_smp_wmb();
-	rte_smp_rmb();
-
-	vhost_log_cache_sync(dev, vq);
-
-	vq->used->idx += count;
-	vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx),
-			sizeof(vq->used->idx));
-	vhost_vring_call(dev, vq);
-}
-
 static __rte_always_inline struct zcopy_mbuf *
 get_zmbuf(struct vhost_virtqueue *vq)
 {
@@ -1409,66 +1333,137 @@ restore_mbuf(struct rte_mbuf *m)
 	}
 }
 
-uint16_t
-rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
+static __rte_always_inline uint16_t
+virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
 {
-	struct virtio_net *dev;
-	struct rte_mbuf *rarp_mbuf = NULL;
-	struct vhost_virtqueue *vq;
-	uint32_t desc_indexes[MAX_PKT_BURST];
-	uint32_t used_idx;
-	uint32_t i = 0;
+	uint16_t i;
 	uint16_t free_entries;
-	uint16_t avail_idx;
 
-	dev = get_device(vid);
-	if (!dev)
-		return 0;
+	if (unlikely(dev->dequeue_zero_copy)) {
+		struct zcopy_mbuf *zmbuf, *next;
 
-	if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) {
-		RTE_LOG(ERR, VHOST_DATA,
-			"(%d) %s: built-in vhost net backend is disabled.\n",
-			dev->vid, __func__);
-		return 0;
-	}
+		for (zmbuf = TAILQ_FIRST(&vq->zmbuf_list);
+		     zmbuf != NULL; zmbuf = next) {
+			next = TAILQ_NEXT(zmbuf, next);
 
-	if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->nr_vring))) {
-		RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n",
-			dev->vid, __func__, queue_id);
-		return 0;
+			if (mbuf_is_consumed(zmbuf->mbuf)) {
+				update_shadow_used_ring_split(vq,
+						zmbuf->desc_idx, 0);
+				TAILQ_REMOVE(&vq->zmbuf_list, zmbuf, next);
+				restore_mbuf(zmbuf->mbuf);
+				rte_pktmbuf_free(zmbuf->mbuf);
+				put_zmbuf(zmbuf);
+				vq->nr_zmbuf -= 1;
+			}
+		}
+
+		flush_shadow_used_ring_split(dev, vq);
+		vhost_vring_call_split(dev, vq);
 	}
 
-	vq = dev->virtqueue[queue_id];
+	rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
 
-	if (unlikely(rte_spinlock_trylock(&vq->access_lock) == 0))
+	free_entries = *((volatile uint16_t *)&vq->avail->idx) -
+			vq->last_avail_idx;
+	if (free_entries == 0)
 		return 0;
 
-	if (unlikely(vq->enabled == 0))
-		goto out_access_unlock;
+	VHOST_LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
 
-	vq->batch_copy_nb_elems = 0;
+	count = RTE_MIN(count, MAX_PKT_BURST);
+	count = RTE_MIN(count, free_entries);
+	VHOST_LOG_DEBUG(VHOST_DATA, "(%d) about to dequeue %u buffers\n",
+			dev->vid, count);
 
-	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
-		vhost_user_iotlb_rd_lock(vq);
+	for (i = 0; i < count; i++) {
+		struct buf_vector buf_vec[BUF_VECTOR_MAX];
+		uint16_t head_idx, dummy_len;
+		uint16_t nr_vec = 0;
+		int err;
 
-	if (unlikely(vq->access_ok == 0))
-		if (unlikely(vring_translate(dev, vq) < 0))
-			goto out;
+		if (unlikely(fill_vec_buf_split(dev, vq,
+						vq->last_avail_idx + i,
+						&nr_vec, buf_vec,
+						&head_idx, &dummy_len,
+						VHOST_ACCESS_RO) < 0))
+			break;
+
+		if (likely(dev->dequeue_zero_copy == 0))
+			update_shadow_used_ring_split(vq, head_idx, 0);
+
+		rte_prefetch0((void *)(uintptr_t)buf_vec[0].buf_addr);
+
+		pkts[i] = rte_pktmbuf_alloc(mbuf_pool);
+		if (unlikely(pkts[i] == NULL)) {
+			RTE_LOG(ERR, VHOST_DATA,
+				"Failed to allocate memory for mbuf.\n");
+			break;
+		}
+
+		err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts[i],
+				mbuf_pool);
+		if (unlikely(err)) {
+			rte_pktmbuf_free(pkts[i]);
+			break;
+		}
+
+		if (unlikely(dev->dequeue_zero_copy)) {
+			struct zcopy_mbuf *zmbuf;
+
+			zmbuf = get_zmbuf(vq);
+			if (!zmbuf) {
+				rte_pktmbuf_free(pkts[i]);
+				break;
+			}
+			zmbuf->mbuf = pkts[i];
+			zmbuf->desc_idx = head_idx;
+
+			/*
+			 * Pin lock the mbuf; we will check later to see
+			 * whether the mbuf is freed (when we are the last
+			 * user) or not. If that's the case, we then could
+			 * update the used ring safely.
+			 */
+			rte_mbuf_refcnt_update(pkts[i], 1);
+
+			vq->nr_zmbuf += 1;
+			TAILQ_INSERT_TAIL(&vq->zmbuf_list, zmbuf, next);
+		}
+	}
+	vq->last_avail_idx += i;
+
+	if (likely(dev->dequeue_zero_copy == 0)) {
+		do_data_copy_dequeue(vq);
+		if (unlikely(i < count))
+			vq->shadow_used_idx = i;
+		flush_shadow_used_ring_split(dev, vq);
+		vhost_vring_call_split(dev, vq);
+	}
+
+	return i;
+}
+
+static __rte_always_inline uint16_t
+virtio_dev_tx_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
+	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
+{
+	uint16_t i;
+
+	rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]);
 
 	if (unlikely(dev->dequeue_zero_copy)) {
 		struct zcopy_mbuf *zmbuf, *next;
-		int nr_updated = 0;
 
 		for (zmbuf = TAILQ_FIRST(&vq->zmbuf_list);
 		     zmbuf != NULL; zmbuf = next) {
 			next = TAILQ_NEXT(zmbuf, next);
 
 			if (mbuf_is_consumed(zmbuf->mbuf)) {
-				used_idx = vq->last_used_idx++ & (vq->size - 1);
-				update_used_ring(dev, vq, used_idx,
-						 zmbuf->desc_idx);
-				nr_updated += 1;
+				update_shadow_used_ring_packed(vq,
+						zmbuf->desc_idx,
+						0,
+						zmbuf->desc_count);
 
 				TAILQ_REMOVE(&vq->zmbuf_list, zmbuf, next);
 				restore_mbuf(zmbuf->mbuf);
@@ -1478,122 +1473,46 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 			}
 		}
 
-		update_used_idx(dev, vq, nr_updated);
+		flush_shadow_used_ring_packed(dev, vq);
+		vhost_vring_call_packed(dev, vq);
 	}
 
-	/*
-	 * Construct a RARP broadcast packet, and inject it to the "pkts"
-	 * array, to looks like that guest actually send such packet.
-	 *
-	 * Check user_send_rarp() for more information.
-	 *
-	 * broadcast_rarp shares a cacheline in the virtio_net structure
-	 * with some fields that are accessed during enqueue and
-	 * rte_atomic16_cmpset() causes a write if using cmpxchg. This could
-	 * result in false sharing between enqueue and dequeue.
-	 *
-	 * Prevent unnecessary false sharing by reading broadcast_rarp first
-	 * and only performing cmpset if the read indicates it is likely to
-	 * be set.
-	 */
-
-	if (unlikely(rte_atomic16_read(&dev->broadcast_rarp) &&
-			rte_atomic16_cmpset((volatile uint16_t *)
-				&dev->broadcast_rarp.cnt, 1, 0))) {
-
-		rarp_mbuf = rte_net_make_rarp_packet(mbuf_pool, &dev->mac);
-		if (rarp_mbuf == NULL) {
-			RTE_LOG(ERR, VHOST_DATA,
-				"Failed to make RARP packet.\n");
-			return 0;
-		}
-		count -= 1;
-	}
-
-	free_entries = *((volatile uint16_t *)&vq->avail->idx) -
-			vq->last_avail_idx;
-	if (free_entries == 0)
-		goto out;
-
 	VHOST_LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
 
-	/* Prefetch available and used ring */
-	avail_idx = vq->last_avail_idx & (vq->size - 1);
-	used_idx  = vq->last_used_idx  & (vq->size - 1);
-	rte_prefetch0(&vq->avail->ring[avail_idx]);
-	rte_prefetch0(&vq->used->ring[used_idx]);
-
 	count = RTE_MIN(count, MAX_PKT_BURST);
-	count = RTE_MIN(count, free_entries);
 	VHOST_LOG_DEBUG(VHOST_DATA, "(%d) about to dequeue %u buffers\n",
 			dev->vid, count);
 
-	/* Retrieve all of the head indexes first to avoid caching issues. */
 	for (i = 0; i < count; i++) {
-		avail_idx = (vq->last_avail_idx + i) & (vq->size - 1);
-		used_idx  = (vq->last_used_idx  + i) & (vq->size - 1);
-		desc_indexes[i] = vq->avail->ring[avail_idx];
-
-		if (likely(dev->dequeue_zero_copy == 0))
-			update_used_ring(dev, vq, used_idx, desc_indexes[i]);
-	}
-
-	/* Prefetch descriptor index. */
-	rte_prefetch0(&vq->desc[desc_indexes[0]]);
-	for (i = 0; i < count; i++) {
-		struct vring_desc *desc, *idesc = NULL;
-		uint16_t sz, idx;
-		uint64_t dlen;
+		struct buf_vector buf_vec[BUF_VECTOR_MAX];
+		uint16_t buf_id, dummy_len;
+		uint16_t desc_count, nr_vec = 0;
 		int err;
 
-		if (likely(i + 1 < count))
-			rte_prefetch0(&vq->desc[desc_indexes[i + 1]]);
-
-		if (vq->desc[desc_indexes[i]].flags & VRING_DESC_F_INDIRECT) {
-			dlen = vq->desc[desc_indexes[i]].len;
-			desc = (struct vring_desc *)(uintptr_t)
-				vhost_iova_to_vva(dev, vq,
-						vq->desc[desc_indexes[i]].addr,
-						&dlen,
-						VHOST_ACCESS_RO);
-			if (unlikely(!desc))
-				break;
+		if (unlikely(fill_vec_buf_packed(dev, vq,
+						vq->last_avail_idx, &desc_count,
+						buf_vec, &nr_vec,
+						&buf_id, &dummy_len,
+						VHOST_ACCESS_RW) < 0))
+			break;
 
-			if (unlikely(dlen < vq->desc[desc_indexes[i]].len)) {
-				/*
-				 * The indirect desc table is not contiguous
-				 * in process VA space, we have to copy it.
-				 */
-				idesc = alloc_copy_ind_table(dev, vq,
-						&vq->desc[desc_indexes[i]]);
-				if (unlikely(!idesc))
-					break;
-
-				desc = idesc;
-			}
+		if (likely(dev->dequeue_zero_copy == 0))
+			update_shadow_used_ring_packed(vq, buf_id, 0,
+					desc_count);
 
-			rte_prefetch0(desc);
-			sz = vq->desc[desc_indexes[i]].len / sizeof(*desc);
-			idx = 0;
-		} else {
-			desc = vq->desc;
-			sz = vq->size;
-			idx = desc_indexes[i];
-		}
+		rte_prefetch0((void *)(uintptr_t)buf_vec[0].buf_addr);
 
 		pkts[i] = rte_pktmbuf_alloc(mbuf_pool);
 		if (unlikely(pkts[i] == NULL)) {
 			RTE_LOG(ERR, VHOST_DATA,
 				"Failed to allocate memory for mbuf.\n");
-			free_ind_table(idesc);
 			break;
 		}
 
-		err = copy_desc_to_mbuf(dev, vq, desc, sz, pkts[i], idx,
-					mbuf_pool);
+		err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts[i],
+				mbuf_pool);
 		if (unlikely(err)) {
 			rte_pktmbuf_free(pkts[i]);
-			free_ind_table(idesc);
 			break;
 		}
 
@@ -1603,11 +1522,11 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 			zmbuf = get_zmbuf(vq);
 			if (!zmbuf) {
 				rte_pktmbuf_free(pkts[i]);
-				free_ind_table(idesc);
 				break;
 			}
 			zmbuf->mbuf = pkts[i];
-			zmbuf->desc_idx = desc_indexes[i];
+			zmbuf->desc_idx = buf_id;
+			zmbuf->desc_count = desc_count;
 
 			/*
 			 * Pin lock the mbuf; we will check later to see
@@ -1621,17 +1540,102 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 			TAILQ_INSERT_TAIL(&vq->zmbuf_list, zmbuf, next);
 		}
 
-		if (unlikely(!!idesc))
-			free_ind_table(idesc);
+		vq->last_avail_idx += desc_count;
+		if (vq->last_avail_idx >= vq->size) {
+			vq->last_avail_idx -= vq->size;
+			vq->avail_wrap_counter ^= 1;
+		}
 	}
-	vq->last_avail_idx += i;
 
 	if (likely(dev->dequeue_zero_copy == 0)) {
 		do_data_copy_dequeue(vq);
-		vq->last_used_idx += i;
-		update_used_idx(dev, vq, i);
+		if (unlikely(i < count))
+			vq->shadow_used_idx = i;
+		flush_shadow_used_ring_packed(dev, vq);
+		vhost_vring_call_packed(dev, vq);
+	}
+
+	return i;
+}
+
+uint16_t
+rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
+	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
+{
+	struct virtio_net *dev;
+	struct rte_mbuf *rarp_mbuf = NULL;
+	struct vhost_virtqueue *vq;
+
+	dev = get_device(vid);
+	if (!dev)
+		return 0;
+
+	if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) {
+		RTE_LOG(ERR, VHOST_DATA,
+			"(%d) %s: built-in vhost net backend is disabled.\n",
+			dev->vid, __func__);
+		return 0;
+	}
+
+	if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->nr_vring))) {
+		RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n",
+			dev->vid, __func__, queue_id);
+		return 0;
+	}
+
+	vq = dev->virtqueue[queue_id];
+
+	if (unlikely(rte_spinlock_trylock(&vq->access_lock) == 0))
+		return 0;
+
+	if (unlikely(vq->enabled == 0)) {
+		count = 0;
+		goto out_access_unlock;
 	}
 
+	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+		vhost_user_iotlb_rd_lock(vq);
+
+	if (unlikely(vq->access_ok == 0))
+		if (unlikely(vring_translate(dev, vq) < 0)) {
+			count = 0;
+			goto out;
+		}
+
+	/*
+	 * Construct a RARP broadcast packet, and inject it to the "pkts"
+	 * array, to looks like that guest actually send such packet.
+	 *
+	 * Check user_send_rarp() for more information.
+	 *
+	 * broadcast_rarp shares a cacheline in the virtio_net structure
+	 * with some fields that are accessed during enqueue and
+	 * rte_atomic16_cmpset() causes a write if using cmpxchg. This could
+	 * result in false sharing between enqueue and dequeue.
+	 *
+	 * Prevent unnecessary false sharing by reading broadcast_rarp first
+	 * and only performing cmpset if the read indicates it is likely to
+	 * be set.
+	 */
+	if (unlikely(rte_atomic16_read(&dev->broadcast_rarp) &&
+			rte_atomic16_cmpset((volatile uint16_t *)
+				&dev->broadcast_rarp.cnt, 1, 0))) {
+
+		rarp_mbuf = rte_net_make_rarp_packet(mbuf_pool, &dev->mac);
+		if (rarp_mbuf == NULL) {
+			RTE_LOG(ERR, VHOST_DATA,
+				"Failed to make RARP packet.\n");
+			count = 0;
+			goto out;
+		}
+		count -= 1;
+	}
+
+	if (vq_is_packed(dev))
+		count = virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, count);
+	else
+		count = virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count);
+
 out:
 	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
 		vhost_user_iotlb_rd_unlock(vq);
@@ -1644,10 +1648,10 @@ out_access_unlock:
 		 * Inject it to the head of "pkts" array, so that switch's mac
 		 * learning table will get updated first.
 		 */
-		memmove(&pkts[1], pkts, i * sizeof(struct rte_mbuf *));
+		memmove(&pkts[1], pkts, count * sizeof(struct rte_mbuf *));
 		pkts[0] = rarp_mbuf;
-		i += 1;
+		count += 1;
 	}
 
-	return i;
+	return count;
 }
diff --git a/lib/meson.build b/lib/meson.build
index 9d11571f..eb91f100 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -9,7 +9,8 @@
 # given as a dep, no need to mention ring. This is especially true for the
 # core libs which are widely reused, so their deps are kept to a minimum.
 libraries = [ 'compat', # just a header, used for versioning
-	'eal', 'ring', 'mempool', 'mbuf', 'net', 'kvargs', 'ethdev', 'pci', # core
+	'kvargs',
+	'eal', 'ring', 'mempool', 'mbuf', 'net', 'ethdev', 'pci', # core
 	'metrics', # bitrate/latency stats depends on this
 	'hash',    # efd depends on this
 	'timer',   # eventdev depends on this
@@ -25,6 +26,10 @@ libraries = [ 'compat', # just a header, used for versioning
 	# flow_classify lib depends on pkt framework table lib
 	'flow_classify', 'bpf']
 
+default_cflags = machine_args
+if cc.has_argument('-Wno-format-truncation')
+	default_cflags += '-Wno-format-truncation'
+endif
 foreach l:libraries
 	build = true
 	name = l
@@ -33,7 +38,7 @@ foreach l:libraries
 	sources = []
 	headers = []
 	includes = []
-	cflags = machine_args
+	cflags = default_cflags
 	objs = [] # other object files to link against, used e.g. for
 	          # instruction-set optimized versions of code
 
@@ -41,9 +46,12 @@ foreach l:libraries
 	# external package/library requirements
 	ext_deps = []
 	deps = ['eal']   # eal is standard dependency except for itself
-	if l == 'eal'
+	if l == 'kvargs'
 		deps = []
 	endif
+	if l == 'eal'
+		deps = ['kvargs']
+	endif
 
 	dir_name = 'librte_' + l
 	subdir(dir_name)
@@ -63,6 +71,10 @@ foreach l:libraries
 			shared_deps = ext_deps
 			static_deps = ext_deps
 			foreach d:deps
+				if not is_variable('shared_rte_' + d)
+					error('Missing dependency ' + d +
+						' for library ' + lib_name)
+				endif
 				shared_deps += [get_variable('shared_rte_' + d)]
 				static_deps += [get_variable('static_rte_' + d)]
 			endforeach
author	Luca Boccassi <luca.boccassi@gmail.com>	2018-08-14 18:55:37 +0100
committer	Luca Boccassi <luca.boccassi@gmail.com>	2018-08-14 18:57:39 +0100
commit	ae1479de3027a4a0f68d90bf65ac6ff2b862b837 (patch)
tree	5dbca675b6717b6b61abbe1a9583d5166fed942b /lib
parent	8cee230dd1f0f9f31f4b0339c671d0b1ee14e111 (diff)
parent	b63264c8342e6a1b6971c79550d2af2024b6a4de (diff)