Imported Upstream version 16.07-rc1

Change-Id: I40a523e52f12e8496fdd69e902824b0226c303de Signed-off-by: Christian Ehrhardt <christian.ehrhardt@canonical.com>
author: Christian Ehrhardt <christian.ehrhardt@canonical.com> 2016-07-06 09:22:35 +0200
committer: Christian Ehrhardt <christian.ehrhardt@canonical.com> 2016-07-06 16:09:40 +0200
commit: 8b25d1ad5d2264bdfc2818c7bda74ee2697df6db (patch)
tree: 8c3c769777f7e66a2d1ba7dd7651b563cfde370b /lib
parent: 97f17497d162afdb82c8704bf097f0fee3724b2e (diff)
137 files changed, 7025 insertions, 2902 deletions
diff --git a/lib/Makefile b/lib/Makefile
index f254dba8..ca7c02fd 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -57,6 +57,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_PORT) += librte_port
 DIRS-$(CONFIG_RTE_LIBRTE_TABLE) += librte_table
 DIRS-$(CONFIG_RTE_LIBRTE_PIPELINE) += librte_pipeline
 DIRS-$(CONFIG_RTE_LIBRTE_REORDER) += librte_reorder
+DIRS-$(CONFIG_RTE_LIBRTE_PDUMP) += librte_pdump
 
 ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
 DIRS-$(CONFIG_RTE_LIBRTE_KNI) += librte_kni
diff --git a/lib/librte_acl/Makefile b/lib/librte_acl/Makefile
index 2e394c97..9803e9dd 100644
--- a/lib/librte_acl/Makefile
+++ b/lib/librte_acl/Makefile
@@ -73,7 +73,7 @@ else
 	$(shell $(CC) -march=core-avx2 -dM -E - </dev/null 2>&1 | \
 	grep -q AVX2 && echo 1)
 	ifeq ($(CC_AVX2_SUPPORT), 1)
-		ifeq ($(CC), icc)
+		ifeq ($(CONFIG_RTE_TOOLCHAIN_ICC),y)
 		CFLAGS_acl_run_avx2.o += -march=core-avx2
 		else
 		CFLAGS_acl_run_avx2.o += -mavx2
diff --git a/lib/librte_cfgfile/rte_cfgfile.c b/lib/librte_cfgfile/rte_cfgfile.c
index 75625a28..d72052a0 100644
--- a/lib/librte_cfgfile/rte_cfgfile.c
+++ b/lib/librte_cfgfile/rte_cfgfile.c
@@ -232,6 +232,7 @@ rte_cfgfile_load(const char *filename, int flags)
 	return cfg;
 
 error1:
+	cfg->num_sections = curr_section + 1;
 	rte_cfgfile_close(cfg);
 error2:
 	fclose(f);
diff --git a/lib/librte_cfgfile/rte_cfgfile.h b/lib/librte_cfgfile/rte_cfgfile.h
index 834f8287..f649836c 100644
--- a/lib/librte_cfgfile/rte_cfgfile.h
+++ b/lib/librte_cfgfile/rte_cfgfile.h
@@ -72,7 +72,7 @@ struct rte_cfgfile_entry {
 * @param flags
 *   Config file flags, Reserved for future use. Must be set to 0.
 * @return
-*   Handle to configuration file
+*   Handle to configuration file on success, NULL otherwise
 */
 struct rte_cfgfile *rte_cfgfile_load(const char *filename, int flags);
 
diff --git a/lib/librte_cmdline/cmdline.c b/lib/librte_cmdline/cmdline.c
index c405878e..a9c47be3 100644
--- a/lib/librte_cmdline/cmdline.c
+++ b/lib/librte_cmdline/cmdline.c
@@ -130,6 +130,7 @@ struct cmdline *
 cmdline_new(cmdline_parse_ctx_t *ctx, const char *prompt, int s_in, int s_out)
 {
 	struct cmdline *cl;
+	int ret;
 
 	if (!ctx || !prompt)
 		return NULL;
@@ -142,8 +143,13 @@ cmdline_new(cmdline_parse_ctx_t *ctx, const char *prompt, int s_in, int s_out)
 	cl->s_out = s_out;
 	cl->ctx = ctx;
 
-	rdline_init(&cl->rdl, cmdline_write_char,
-		    cmdline_valid_buffer, cmdline_complete_buffer);
+	ret = rdline_init(&cl->rdl, cmdline_write_char, cmdline_valid_buffer,
+			cmdline_complete_buffer);
+	if (ret != 0) {
+		free(cl);
+		return NULL;
+	}
+
 	cl->rdl.opaque = cl;
 	cmdline_set_prompt(cl, prompt);
 	rdline_newline(&cl->rdl, cl->prompt);
diff --git a/lib/librte_cmdline/cmdline_parse.c b/lib/librte_cmdline/cmdline_parse.c
index 24a6ed67..b496067a 100644
--- a/lib/librte_cmdline/cmdline_parse.c
+++ b/lib/librte_cmdline/cmdline_parse.c
@@ -118,6 +118,14 @@ cmdline_isendoftoken(char c)
 	return 0;
 }
 
+int
+cmdline_isendofcommand(char c)
+{
+	if (!c || iscomment(c) || isendofline(c))
+		return 1;
+	return 0;
+}
+
 static unsigned int
 nb_common_chars(const char * s1, const char * s2)
 {
diff --git a/lib/librte_cmdline/cmdline_parse.h b/lib/librte_cmdline/cmdline_parse.h
index 4b25c456..4ac05d6b 100644
--- a/lib/librte_cmdline/cmdline_parse.h
+++ b/lib/librte_cmdline/cmdline_parse.h
@@ -184,6 +184,9 @@ int cmdline_complete(struct cmdline *cl, const char *buf, int *state,
  * isendofline(c)) */
 int cmdline_isendoftoken(char c);
 
+/* return true if(!c || iscomment(c) || isendofline(c)) */
+int cmdline_isendofcommand(char c);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_cmdline/cmdline_parse_string.c b/lib/librte_cmdline/cmdline_parse_string.c
index 45883b3e..35917a7b 100644
--- a/lib/librte_cmdline/cmdline_parse_string.c
+++ b/lib/librte_cmdline/cmdline_parse_string.c
@@ -76,9 +76,10 @@ struct cmdline_token_ops cmdline_token_string_ops = {
 	.get_help = cmdline_get_help_string,
 };
 
-#define MULTISTRING_HELP "Mul-choice STRING"
-#define ANYSTRING_HELP   "Any STRING"
-#define FIXEDSTRING_HELP "Fixed STRING"
+#define CHOICESTRING_HELP "Mul-choice STRING"
+#define ANYSTRING_HELP    "Any STRING"
+#define ANYSTRINGS_HELP   "Any STRINGS"
+#define FIXEDSTRING_HELP  "Fixed STRING"
 
 static unsigned int
 get_token_len(const char *s)
@@ -123,8 +124,8 @@ cmdline_parse_string(cmdline_parse_token_hdr_t *tk, const char *buf, void *res,
 
 	sd = &tk2->string_data;
 
-	/* fixed string */
-	if (sd->str) {
+	/* fixed string (known single token) */
+	if ((sd->str != NULL) && (strcmp(sd->str, TOKEN_STRING_MULTI) != 0)) {
 		str = sd->str;
 		do {
 			token_len = get_token_len(str);
@@ -148,7 +149,21 @@ cmdline_parse_string(cmdline_parse_token_hdr_t *tk, const char *buf, void *res,
 		if (!str)
 			return -1;
 	}
-	/* unspecified string */
+	/* multi string */
+	else if (sd->str != NULL) {
+		if (ressize < STR_MULTI_TOKEN_SIZE)
+			return -1;
+
+		token_len = 0;
+		while (!cmdline_isendofcommand(buf[token_len]) &&
+		      token_len < (STR_MULTI_TOKEN_SIZE - 1))
+			token_len++;
+
+		/* return if token too long */
+		if (token_len >= (STR_MULTI_TOKEN_SIZE - 1))
+			return -1;
+	}
+	/* unspecified string (unknown single token) */
 	else {
 		token_len = 0;
 		while(!cmdline_isendoftoken(buf[token_len]) &&
@@ -162,12 +177,16 @@ cmdline_parse_string(cmdline_parse_token_hdr_t *tk, const char *buf, void *res,
 	}
 
 	if (res) {
-		/* we are sure that token_len is < STR_TOKEN_SIZE-1 */
-		snprintf(res, STR_TOKEN_SIZE, "%s", buf);
+		if ((sd->str != NULL) && (strcmp(sd->str, TOKEN_STRING_MULTI) == 0))
+			/* we are sure that token_len is < STR_MULTI_TOKEN_SIZE-1 */
+			snprintf(res, STR_MULTI_TOKEN_SIZE, "%s", buf);
+		else
+			/* we are sure that token_len is < STR_TOKEN_SIZE-1 */
+			snprintf(res, STR_TOKEN_SIZE, "%s", buf);
+
 		*((char *)res + token_len) = 0;
 	}
 
-
 	return token_len;
 }
 
@@ -242,8 +261,10 @@ int cmdline_get_help_string(cmdline_parse_token_hdr_t *tk, char *dstbuf,
 	s = sd->str;
 
 	if (s) {
-		if (get_next_token(s))
-			snprintf(dstbuf, size, MULTISTRING_HELP);
+		if (strcmp(s, TOKEN_STRING_MULTI) == 0)
+			snprintf(dstbuf, size, ANYSTRINGS_HELP);
+		else if (get_next_token(s))
+			snprintf(dstbuf, size, CHOICESTRING_HELP);
 		else
 			snprintf(dstbuf, size, FIXEDSTRING_HELP);
 	} else
diff --git a/lib/librte_cmdline/cmdline_parse_string.h b/lib/librte_cmdline/cmdline_parse_string.h
index 94aa1f1b..a84291b0 100644
--- a/lib/librte_cmdline/cmdline_parse_string.h
+++ b/lib/librte_cmdline/cmdline_parse_string.h
@@ -70,8 +70,13 @@ extern "C" {
 /* size of a parsed string */
 #define STR_TOKEN_SIZE 128
 
+/* size of a parsed multi string */
+#define STR_MULTI_TOKEN_SIZE 4096
+
 typedef char cmdline_fixed_string_t[STR_TOKEN_SIZE];
 
+typedef char cmdline_multi_string_t[STR_MULTI_TOKEN_SIZE];
+
 struct cmdline_token_string_data {
 	const char *str;
 };
@@ -92,6 +97,16 @@ int cmdline_complete_get_elt_string(cmdline_parse_token_hdr_t *tk, int idx,
 int cmdline_get_help_string(cmdline_parse_token_hdr_t *tk, char *dstbuf,
 			    unsigned int size);
 
+/**
+* Token marked as TOKEN_STRING_MULTI takes entire parsing string
+* until “#” sign appear. Everything after “#” sign is treated as a comment.
+*
+* Note:
+* In this case second parameter of TOKEN_STRING_INITIALIZER must be a type of
+* cmdline_multi_string_t.
+*/
+#define TOKEN_STRING_MULTI ""
+
 #define TOKEN_STRING_INITIALIZER(structure, field, string)  \
 {                                                           \
 	/* hdr */                                               \
diff --git a/lib/librte_cmdline/rte_cmdline_version.map b/lib/librte_cmdline/rte_cmdline_version.map
index c9fc18ab..04bcb387 100644
--- a/lib/librte_cmdline/rte_cmdline_version.map
+++ b/lib/librte_cmdline/rte_cmdline_version.map
@@ -50,7 +50,6 @@ DPDK_2.0 {
 	cmdline_token_num_ops;
 	cmdline_token_portlist_ops;
 	cmdline_token_string_ops;
-	cmdline_token_string_ops;
 	cmdline_write_char;
 	rdline_add_history;
 	rdline_char_in;
diff --git a/lib/librte_cryptodev/rte_crypto_sym.h b/lib/librte_cryptodev/rte_crypto_sym.h
index 4ae9b9e8..d9bd8210 100644
--- a/lib/librte_cryptodev/rte_crypto_sym.h
+++ b/lib/librte_cryptodev/rte_crypto_sym.h
@@ -388,7 +388,8 @@ struct rte_crypto_sym_op {
 			  * this location.
 			  *
 			  * @note
-			  * For Snow3G @ RTE_CRYPTO_CIPHER_SNOW3G_UEA2,
+			  * For Snow3G @ RTE_CRYPTO_CIPHER_SNOW3G_UEA2
+			  * and KASUMI @ RTE_CRYPTO_CIPHER_KASUMI_F8,
 			  * this field should be in bits.
 			  */
 
@@ -413,6 +414,7 @@ struct rte_crypto_sym_op {
 			  *
 			  * @note
 			  * For Snow3G @ RTE_CRYPTO_AUTH_SNOW3G_UEA2
+			  * and KASUMI @ RTE_CRYPTO_CIPHER_KASUMI_F8,
 			  * this field should be in bits.
 			  */
 		} data; /**< Data offsets and length for ciphering */
@@ -485,6 +487,7 @@ struct rte_crypto_sym_op {
 			  *
 			  * @note
 			  * For Snow3G @ RTE_CRYPTO_AUTH_SNOW3G_UIA2
+			  * and KASUMI @ RTE_CRYPTO_AUTH_KASUMI_F9,
 			  * this field should be in bits.
 			  */
 
@@ -504,6 +507,7 @@ struct rte_crypto_sym_op {
 			  *
 			  * @note
 			  * For Snow3G @ RTE_CRYPTO_AUTH_SNOW3G_UIA2
+			  * and KASUMI @ RTE_CRYPTO_AUTH_KASUMI_F9,
 			  * this field should be in bits.
 			  */
 		} data; /**< Data offsets and length for authentication */
diff --git a/lib/librte_cryptodev/rte_cryptodev.c b/lib/librte_cryptodev/rte_cryptodev.c
index aa4ea425..20e5beb8 100644
--- a/lib/librte_cryptodev/rte_cryptodev.c
+++ b/lib/librte_cryptodev/rte_cryptodev.c
@@ -102,6 +102,97 @@ struct rte_cryptodev_callback {
 	uint32_t active;			/**< Callback is executing */
 };
 
+#define RTE_CRYPTODEV_VDEV_MAX_NB_QP_ARG		("max_nb_queue_pairs")
+#define RTE_CRYPTODEV_VDEV_MAX_NB_SESS_ARG		("max_nb_sessions")
+#define RTE_CRYPTODEV_VDEV_SOCKET_ID			("socket_id")
+
+static const char *cryptodev_vdev_valid_params[] = {
+	RTE_CRYPTODEV_VDEV_MAX_NB_QP_ARG,
+	RTE_CRYPTODEV_VDEV_MAX_NB_SESS_ARG,
+	RTE_CRYPTODEV_VDEV_SOCKET_ID
+};
+
+static uint8_t
+number_of_sockets(void)
+{
+	int sockets = 0;
+	int i;
+	const struct rte_memseg *ms = rte_eal_get_physmem_layout();
+
+	for (i = 0; ((i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL)); i++) {
+		if (sockets < ms[i].socket_id)
+			sockets = ms[i].socket_id;
+	}
+
+	/* Number of sockets = maximum socket_id + 1 */
+	return ++sockets;
+}
+
+/** Parse integer from integer argument */
+static int
+parse_integer_arg(const char *key __rte_unused,
+		const char *value, void *extra_args)
+{
+	int *i = (int *) extra_args;
+
+	*i = atoi(value);
+	if (*i < 0) {
+		CDEV_LOG_ERR("Argument has to be positive.");
+		return -1;
+	}
+
+	return 0;
+}
+
+int
+rte_cryptodev_parse_vdev_init_params(struct rte_crypto_vdev_init_params *params,
+		const char *input_args)
+{
+	struct rte_kvargs *kvlist;
+	int ret;
+
+	if (params == NULL)
+		return -EINVAL;
+
+	if (input_args) {
+		kvlist = rte_kvargs_parse(input_args,
+				cryptodev_vdev_valid_params);
+		if (kvlist == NULL)
+			return -1;
+
+		ret = rte_kvargs_process(kvlist,
+					RTE_CRYPTODEV_VDEV_MAX_NB_QP_ARG,
+					&parse_integer_arg,
+					&params->max_nb_queue_pairs);
+		if (ret < 0)
+			goto free_kvlist;
+
+		ret = rte_kvargs_process(kvlist,
+					RTE_CRYPTODEV_VDEV_MAX_NB_SESS_ARG,
+					&parse_integer_arg,
+					&params->max_nb_sessions);
+		if (ret < 0)
+			goto free_kvlist;
+
+		ret = rte_kvargs_process(kvlist, RTE_CRYPTODEV_VDEV_SOCKET_ID,
+					&parse_integer_arg,
+					&params->socket_id);
+		if (ret < 0)
+			goto free_kvlist;
+
+		if (params->socket_id >= number_of_sockets()) {
+			CDEV_LOG_ERR("Invalid socket id specified to create "
+				"the virtual crypto device on");
+			goto free_kvlist;
+		}
+	}
+
+	return 0;
+
+free_kvlist:
+	rte_kvargs_free(kvlist);
+	return ret;
+}
 
 const char *
 rte_cryptodev_get_feature_name(uint64_t flag)
@@ -956,7 +1047,7 @@ rte_cryptodev_sym_session_init(struct rte_mempool *mp,
 	sess->mp = mp;
 
 	if (dev->dev_ops->session_initialize)
-		(*dev->dev_ops->session_initialize)(mp, sess->_private);
+		(*dev->dev_ops->session_initialize)(mp, sess);
 }
 
 static int
diff --git a/lib/librte_cryptodev/rte_cryptodev.h b/lib/librte_cryptodev/rte_cryptodev.h
index d47f1e87..7768f0ae 100644
--- a/lib/librte_cryptodev/rte_cryptodev.h
+++ b/lib/librte_cryptodev/rte_cryptodev.h
@@ -59,12 +59,15 @@ extern "C" {
 /**< Intel QAT Symmetric Crypto PMD device name */
 #define CRYPTODEV_NAME_SNOW3G_PMD	("cryptodev_snow3g_pmd")
 /**< SNOW 3G PMD device name */
+#define CRYPTODEV_NAME_KASUMI_PMD	("cryptodev_kasumi_pmd")
+/**< KASUMI PMD device name */
 
 /** Crypto device type */
 enum rte_cryptodev_type {
 	RTE_CRYPTODEV_NULL_PMD = 1,	/**< Null crypto PMD */
 	RTE_CRYPTODEV_AESNI_GCM_PMD,	/**< AES-NI GCM PMD */
 	RTE_CRYPTODEV_AESNI_MB_PMD,	/**< AES-NI multi buffer PMD */
+	RTE_CRYPTODEV_KASUMI_PMD,	/**< KASUMI PMD */
 	RTE_CRYPTODEV_QAT_SYM_PMD,	/**< QAT PMD Symmetric Crypto */
 	RTE_CRYPTODEV_SNOW3G_PMD,	/**< SNOW 3G PMD */
 };
@@ -297,48 +300,6 @@ struct rte_crypto_vdev_init_params {
 	uint8_t socket_id;
 };
 
-#define RTE_CRYPTODEV_VDEV_MAX_NB_QP_ARG		("max_nb_queue_pairs")
-#define RTE_CRYPTODEV_VDEV_MAX_NB_SESS_ARG		("max_nb_sessions")
-#define RTE_CRYPTODEV_VDEV_SOCKET_ID			("socket_id")
-
-static const char *cryptodev_vdev_valid_params[] = {
-	RTE_CRYPTODEV_VDEV_MAX_NB_QP_ARG,
-	RTE_CRYPTODEV_VDEV_MAX_NB_SESS_ARG,
-	RTE_CRYPTODEV_VDEV_SOCKET_ID
-};
-
-static inline uint8_t
-number_of_sockets(void)
-{
-	int sockets = 0;
-	int i;
-	const struct rte_memseg *ms = rte_eal_get_physmem_layout();
-
-	for (i = 0; ((i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL)); i++) {
-		if (sockets < ms[i].socket_id)
-			sockets = ms[i].socket_id;
-	}
-
-	/* Number of sockets = maximum socket_id + 1 */
-	return ++sockets;
-}
-
-/** Parse integer from integer argument */
-static inline int
-__rte_cryptodev_parse_integer_arg(const char *key __rte_unused,
-		const char *value, void *extra_args)
-{
-	int *i = (int *) extra_args;
-
-	*i = atoi(value);
-	if (*i < 0) {
-		CDEV_LOG_ERR("Argument has to be positive.");
-		return -1;
-	}
-
-	return 0;
-}
-
 /**
  * Parse virtual device initialisation parameters input arguments
  * @internal
@@ -350,55 +311,10 @@ __rte_cryptodev_parse_integer_arg(const char *key __rte_unused,
  * 0 on successful parse
  * <0 on failure to parse
  */
-static inline int
-rte_cryptodev_parse_vdev_init_params(struct rte_crypto_vdev_init_params *params,
-		const char *input_args)
-{
-	struct rte_kvargs *kvlist;
-	int ret;
-
-	if (params == NULL)
-		return -EINVAL;
-
-	if (input_args) {
-		kvlist = rte_kvargs_parse(input_args,
-				cryptodev_vdev_valid_params);
-		if (kvlist == NULL)
-			return -1;
-
-		ret = rte_kvargs_process(kvlist,
-					RTE_CRYPTODEV_VDEV_MAX_NB_QP_ARG,
-					&__rte_cryptodev_parse_integer_arg,
-					&params->max_nb_queue_pairs);
-		if (ret < 0)
-			goto free_kvlist;
-
-		ret = rte_kvargs_process(kvlist,
-					RTE_CRYPTODEV_VDEV_MAX_NB_SESS_ARG,
-					&__rte_cryptodev_parse_integer_arg,
-					&params->max_nb_sessions);
-		if (ret < 0)
-			goto free_kvlist;
-
-		ret = rte_kvargs_process(kvlist, RTE_CRYPTODEV_VDEV_SOCKET_ID,
-					&__rte_cryptodev_parse_integer_arg,
-					&params->socket_id);
-		if (ret < 0)
-			goto free_kvlist;
-
-		if (params->socket_id >= number_of_sockets()) {
-			CDEV_LOG_ERR("Invalid socket id specified to create "
-				"the virtual crypto device on");
-			goto free_kvlist;
-		}
-	}
-
-	return 0;
-
-free_kvlist:
-	rte_kvargs_free(kvlist);
-	return ret;
-}
+int
+rte_cryptodev_parse_vdev_init_params(
+		struct rte_crypto_vdev_init_params *params,
+		const char *input_args);
 
 /**
  * Create a virtual crypto device
diff --git a/lib/librte_cryptodev/rte_cryptodev_version.map b/lib/librte_cryptodev/rte_cryptodev_version.map
index 41004e1c..a08fd202 100644
--- a/lib/librte_cryptodev/rte_cryptodev_version.map
+++ b/lib/librte_cryptodev/rte_cryptodev_version.map
@@ -32,3 +32,10 @@ DPDK_16.04 {
 
 	local: *;
 };
+
+DPDK_16.07 {
+	global:
+
+	rte_cryptodev_parse_vdev_init_params;
+
+} DPDK_16.04;
diff --git a/lib/librte_eal/bsdapp/eal/Makefile b/lib/librte_eal/bsdapp/eal/Makefile
index 9054ad61..698fa0a1 100644
--- a/lib/librte_eal/bsdapp/eal/Makefile
+++ b/lib/librte_eal/bsdapp/eal/Makefile
@@ -40,8 +40,6 @@ VPATH += $(RTE_SDK)/lib/librte_eal/common/arch/$(ARCH_DIR)
 CFLAGS += -I$(SRCDIR)/include
 CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common
 CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common/include
-CFLAGS += -I$(RTE_SDK)/lib/librte_ring
-CFLAGS += -I$(RTE_SDK)/lib/librte_mempool
 CFLAGS += $(WERROR_FLAGS) -O3
 
 LDLIBS += -lexecinfo
diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c
index 06bfd4e0..a0c8f8c8 100644
--- a/lib/librte_eal/bsdapp/eal/eal.c
+++ b/lib/librte_eal/bsdapp/eal/eal.c
@@ -605,7 +605,7 @@ rte_eal_init(int argc, char **argv)
 		/* Set thread_name for aid in debugging. */
 		snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN,
 				"lcore-slave-%d", i);
-		pthread_set_name_np(lcore_config[i].thread_id, thread_name);
+		rte_thread_setname(lcore_config[i].thread_id, thread_name);
 	}
 
 	/*
diff --git a/lib/librte_eal/bsdapp/eal/eal_debug.c b/lib/librte_eal/bsdapp/eal/eal_debug.c
index 907fbfa7..5fbc17c5 100644
--- a/lib/librte_eal/bsdapp/eal/eal_debug.c
+++ b/lib/librte_eal/bsdapp/eal/eal_debug.c
@@ -77,9 +77,6 @@ void __rte_panic(const char *funcname, const char *format, ...)
 {
 	va_list ap;
 
-	/* disable history */
-	rte_log_set_history(0);
-
 	rte_log(RTE_LOG_CRIT, RTE_LOGTYPE_EAL, "PANIC in %s():\n", funcname);
 	va_start(ap, format);
 	rte_vlog(RTE_LOG_CRIT, RTE_LOGTYPE_EAL, format, ap);
@@ -98,9 +95,6 @@ rte_exit(int exit_code, const char *format, ...)
 {
 	va_list ap;
 
-	/* disable history */
-	rte_log_set_history(0);
-
 	if (exit_code != 0)
 		RTE_LOG(CRIT, EAL, "Error - exiting with code: %d\n"
 				"  Cause: ", exit_code);
diff --git a/lib/librte_eal/bsdapp/eal/eal_pci.c b/lib/librte_eal/bsdapp/eal/eal_pci.c
index 2d16d782..374b68f2 100644
--- a/lib/librte_eal/bsdapp/eal/eal_pci.c
+++ b/lib/librte_eal/bsdapp/eal/eal_pci.c
@@ -278,6 +278,11 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf)
 	/* get subsystem_device id */
 	dev->id.subsystem_device_id = conf->pc_subdevice;
 
+	/* get class id */
+	dev->id.class_id = (conf->pc_class << 16) |
+			   (conf->pc_subclass << 8) |
+			   (conf->pc_progif);
+
 	/* TODO: get max_vfs */
 	dev->max_vfs = 0;
 
@@ -422,7 +427,7 @@ int rte_eal_pci_read_config(const struct rte_pci_device *dev,
 		goto error;
 	}
 
-	fd = open("/dev/pci", O_RDONLY);
+	fd = open("/dev/pci", O_RDWR);
 	if (fd < 0) {
 		RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__);
 		goto error;
@@ -466,7 +471,7 @@ int rte_eal_pci_write_config(const struct rte_pci_device *dev,
 
 	memcpy(&pi.pi_data, buf, len);
 
-	fd = open("/dev/pci", O_RDONLY);
+	fd = open("/dev/pci", O_RDWR);
 	if (fd < 0) {
 		RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__);
 		goto error;
diff --git a/lib/librte_eal/bsdapp/eal/eal_thread.c b/lib/librte_eal/bsdapp/eal/eal_thread.c
index 9a034373..1b8cd8a6 100644
--- a/lib/librte_eal/bsdapp/eal/eal_thread.c
+++ b/lib/librte_eal/bsdapp/eal/eal_thread.c
@@ -199,3 +199,10 @@ int rte_sys_gettid(void)
 	thr_self(&lwpid);
 	return (int)lwpid;
 }
+
+int rte_thread_setname(pthread_t id, const char *name)
+{
+	/* this BSD function returns no error */
+	pthread_set_name_np(id, name);
+	return 0;
+}
diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
index 58c2951e..1852c4a4 100644
--- a/lib/librte_eal/bsdapp/eal/rte_eal_version.map
+++ b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
@@ -151,3 +151,13 @@ DPDK_16.04 {
 	rte_eal_primary_proc_alive;
 
 } DPDK_2.2;
+
+DPDK_16.07 {
+	global:
+
+	pci_get_sysfs_path;
+	rte_keepalive_mark_sleep;
+	rte_keepalive_register_relay_callback;
+	rte_thread_setname;
+
+} DPDK_16.04;
diff --git a/lib/librte_eal/common/eal_common_devargs.c b/lib/librte_eal/common/eal_common_devargs.c
index 2bfe54a1..e403717b 100644
--- a/lib/librte_eal/common/eal_common_devargs.c
+++ b/lib/librte_eal/common/eal_common_devargs.c
@@ -58,7 +58,7 @@ rte_eal_parse_devargs_str(const char *devargs_str,
 		return -1;
 
 	*drvname = strdup(devargs_str);
-	if (drvname == NULL)
+	if (*drvname == NULL)
 		return -1;
 
 	/* set the first ',' to '\0' to split name and arguments */
diff --git a/lib/librte_eal/common/eal_common_lcore.c b/lib/librte_eal/common/eal_common_lcore.c
index a4263ba5..2cd41320 100644
--- a/lib/librte_eal/common/eal_common_lcore.c
+++ b/lib/librte_eal/common/eal_common_lcore.c
@@ -104,7 +104,7 @@ rte_eal_cpu_init(void)
 	RTE_LOG(DEBUG, EAL,
 		"Support maximum %u logical core(s) by configuration.\n",
 		RTE_MAX_LCORE);
-	RTE_LOG(DEBUG, EAL, "Detected %u lcore(s)\n", config->lcore_count);
+	RTE_LOG(INFO, EAL, "Detected %u lcore(s)\n", config->lcore_count);
 
 	return 0;
 }
diff --git a/lib/librte_eal/common/eal_common_log.c b/lib/librte_eal/common/eal_common_log.c
index 1ae8de70..7916c781 100644
--- a/lib/librte_eal/common/eal_common_log.c
+++ b/lib/librte_eal/common/eal_common_log.c
@@ -31,54 +31,16 @@
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include <string.h>
 #include <stdio.h>
 #include <stdint.h>
 #include <stdarg.h>
-#include <sys/types.h>
 #include <stdlib.h>
-#include <unistd.h>
-#include <inttypes.h>
-#include <errno.h>
-#include <sys/queue.h>
 
 #include <rte_log.h>
-#include <rte_memory.h>
-#include <rte_memzone.h>
-#include <rte_launch.h>
-#include <rte_common.h>
-#include <rte_cycles.h>
-#include <rte_eal.h>
 #include <rte_per_lcore.h>
-#include <rte_lcore.h>
-#include <rte_atomic.h>
-#include <rte_debug.h>
-#include <rte_spinlock.h>
-#include <rte_branch_prediction.h>
-#include <rte_ring.h>
-#include <rte_mempool.h>
 
 #include "eal_private.h"
 
-#define LOG_ELT_SIZE     2048
-
-#define LOG_HISTORY_MP_NAME "log_history"
-
-STAILQ_HEAD(log_history_list, log_history);
-
-/**
- * The structure of a message log in the log history.
- */
-struct log_history {
-	STAILQ_ENTRY(log_history) next;
-	unsigned size;
-	char buf[0];
-};
-
-static struct rte_mempool *log_history_mp = NULL;
-static unsigned log_history_size = 0;
-static struct log_history_list log_history;
-
 /* global log structure */
 struct rte_logs rte_logs = {
 	.type = ~0,
@@ -86,10 +48,7 @@ struct rte_logs rte_logs = {
 	.file = NULL,
 };
 
-static rte_spinlock_t log_dump_lock = RTE_SPINLOCK_INITIALIZER;
-static rte_spinlock_t log_list_lock = RTE_SPINLOCK_INITIALIZER;
 static FILE *default_log_stream;
-static int history_enabled = 1;
 
 /**
  * This global structure stores some informations about the message
@@ -98,66 +57,24 @@ static int history_enabled = 1;
 struct log_cur_msg {
 	uint32_t loglevel; /**< log level - see rte_log.h */
 	uint32_t logtype;  /**< log type  - see rte_log.h */
-} __rte_cache_aligned;
-static struct log_cur_msg log_cur_msg[RTE_MAX_LCORE]; /**< per core log */
+};
 
+ /* per core log */
+static RTE_DEFINE_PER_LCORE(struct log_cur_msg, log_cur_msg);
 
 /* default logs */
 
 int
-rte_log_add_in_history(const char *buf, size_t size)
+rte_log_add_in_history(const char *buf __rte_unused, size_t size __rte_unused)
 {
-	struct log_history *hist_buf = NULL;
-	static const unsigned hist_buf_size = LOG_ELT_SIZE - sizeof(*hist_buf);
-	void *obj;
-
-	if (history_enabled == 0)
-		return 0;
-
-	rte_spinlock_lock(&log_list_lock);
-
-	/* get a buffer for adding in history */
-	if (log_history_size > RTE_LOG_HISTORY) {
-		hist_buf = STAILQ_FIRST(&log_history);
-		if (hist_buf) {
-			STAILQ_REMOVE_HEAD(&log_history, next);
-			log_history_size--;
-		}
-	}
-	else {
-		if (rte_mempool_mc_get(log_history_mp, &obj) < 0)
-			obj = NULL;
-		hist_buf = obj;
-	}
-
-	/* no buffer */
-	if (hist_buf == NULL) {
-		rte_spinlock_unlock(&log_list_lock);
-		return -ENOBUFS;
-	}
-
-	/* not enough room for msg, buffer go back in mempool */
-	if (size >= hist_buf_size) {
-		rte_mempool_mp_put(log_history_mp, hist_buf);
-		rte_spinlock_unlock(&log_list_lock);
-		return -ENOBUFS;
-	}
-
-	/* add in history */
-	memcpy(hist_buf->buf, buf, size);
-	hist_buf->buf[size] = hist_buf->buf[hist_buf_size-1] = '\0';
-	hist_buf->size = size;
-	STAILQ_INSERT_TAIL(&log_history, hist_buf, next);
-	log_history_size++;
-	rte_spinlock_unlock(&log_list_lock);
-
 	return 0;
 }
 
 void
 rte_log_set_history(int enable)
 {
-	history_enabled = enable;
+	if (enable)
+		RTE_LOG(WARNING, EAL, "The log history is deprecated.\n");
 }
 
 /* Change the stream that will be used by logging system */
@@ -205,63 +122,19 @@ rte_get_log_type(void)
 /* get the current loglevel for the message beeing processed */
 int rte_log_cur_msg_loglevel(void)
 {
-	unsigned lcore_id;
-	lcore_id = rte_lcore_id();
-	if (lcore_id >= RTE_MAX_LCORE)
-		return rte_get_log_level();
-	return log_cur_msg[lcore_id].loglevel;
+	return RTE_PER_LCORE(log_cur_msg).loglevel;
 }
 
 /* get the current logtype for the message beeing processed */
 int rte_log_cur_msg_logtype(void)
 {
-	unsigned lcore_id;
-	lcore_id = rte_lcore_id();
-	if (lcore_id >= RTE_MAX_LCORE)
-		return rte_get_log_type();
-	return log_cur_msg[lcore_id].logtype;
+	return RTE_PER_LCORE(log_cur_msg).logtype;
 }
 
 /* Dump log history to file */
 void
-rte_log_dump_history(FILE *out)
+rte_log_dump_history(FILE *out __rte_unused)
 {
-	struct log_history_list tmp_log_history;
-	struct log_history *hist_buf;
-	unsigned i;
-
-	/* only one dump at a time */
-	rte_spinlock_lock(&log_dump_lock);
-
-	/* save list, and re-init to allow logging during dump */
-	rte_spinlock_lock(&log_list_lock);
-	tmp_log_history = log_history;
-	STAILQ_INIT(&log_history);
-	log_history_size = 0;
-	rte_spinlock_unlock(&log_list_lock);
-
-	for (i=0; i<RTE_LOG_HISTORY; i++) {
-
-		/* remove one message from history list */
-		hist_buf = STAILQ_FIRST(&tmp_log_history);
-
-		if (hist_buf == NULL)
-			break;
-
-		STAILQ_REMOVE_HEAD(&tmp_log_history, next);
-
-		/* write on stdout */
-		if (fwrite(hist_buf->buf, hist_buf->size, 1, out) == 0) {
-			rte_mempool_mp_put(log_history_mp, hist_buf);
-			break;
-		}
-
-		/* put back message structure in pool */
-		rte_mempool_mp_put(log_history_mp, hist_buf);
-	}
-	fflush(out);
-
-	rte_spinlock_unlock(&log_dump_lock);
 }
 
 /*
@@ -273,17 +146,13 @@ rte_vlog(uint32_t level, uint32_t logtype, const char *format, va_list ap)
 {
 	int ret;
 	FILE *f = rte_logs.file;
-	unsigned lcore_id;
 
 	if ((level > rte_logs.level) || !(logtype & rte_logs.type))
 		return 0;
 
 	/* save loglevel and logtype in a global per-lcore variable */
-	lcore_id = rte_lcore_id();
-	if (lcore_id < RTE_MAX_LCORE) {
-		log_cur_msg[lcore_id].loglevel = level;
-		log_cur_msg[lcore_id].logtype = logtype;
-	}
+	RTE_PER_LCORE(log_cur_msg).loglevel = level;
+	RTE_PER_LCORE(log_cur_msg).logtype = logtype;
 
 	ret = vfprintf(f, format, ap);
 	fflush(f);
@@ -308,30 +177,17 @@ rte_log(uint32_t level, uint32_t logtype, const char *format, ...)
 }
 
 /*
- * called by environment-specific log init function to initialize log
- * history
+ * called by environment-specific log init function
  */
 int
 rte_eal_common_log_init(FILE *default_log)
 {
-	STAILQ_INIT(&log_history);
-
-	/* reserve RTE_LOG_HISTORY*2 elements, so we can dump and
-	 * keep logging during this time */
-	log_history_mp = rte_mempool_create(LOG_HISTORY_MP_NAME, RTE_LOG_HISTORY*2,
-				LOG_ELT_SIZE, 0, 0,
-				NULL, NULL,
-				NULL, NULL,
-				SOCKET_ID_ANY, 0);
-
-	if ((log_history_mp == NULL) &&
-	    ((log_history_mp = rte_mempool_lookup(LOG_HISTORY_MP_NAME)) == NULL)){
-		RTE_LOG(ERR, EAL, "%s(): cannot create log_history mempool\n",
-			__func__);
-		return -1;
-	}
-
 	default_log_stream = default_log;
 	rte_openlog_stream(default_log);
+
+#if RTE_LOG_LEVEL >= RTE_LOG_DEBUG
+	RTE_LOG(NOTICE, EAL, "Debug logs available - lower performance\n");
+#endif
+
 	return 0;
 }
diff --git a/lib/librte_eal/common/eal_common_memzone.c b/lib/librte_eal/common/eal_common_memzone.c
index 711c8457..5d28341f 100644
--- a/lib/librte_eal/common/eal_common_memzone.c
+++ b/lib/librte_eal/common/eal_common_memzone.c
@@ -119,6 +119,9 @@ find_heap_max_free_elem(int *s, unsigned align)
 		}
 	}
 
+	if (len < MALLOC_ELEM_OVERHEAD + align)
+		return 0;
+
 	return len - MALLOC_ELEM_OVERHEAD - align;
 }
 
@@ -126,6 +129,7 @@ static const struct rte_memzone *
 memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
 		int socket_id, unsigned flags, unsigned align, unsigned bound)
 {
+	struct rte_memzone *mz;
 	struct rte_mem_config *mcfg;
 	size_t requested_len;
 	int socket, i;
@@ -148,6 +152,13 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
 		return NULL;
 	}
 
+	if (strlen(name) >= sizeof(mz->name) - 1) {
+		RTE_LOG(DEBUG, EAL, "%s(): memzone <%s>: name too long\n",
+			__func__, name);
+		rte_errno = EEXIST;
+		return NULL;
+	}
+
 	/* if alignment is not a power of two */
 	if (align && !rte_is_power_of_2(align)) {
 		RTE_LOG(ERR, EAL, "%s(): Invalid alignment: %u\n", __func__,
@@ -189,8 +200,13 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
 	if (len == 0) {
 		if (bound != 0)
 			requested_len = bound;
-		else
+		else {
 			requested_len = find_heap_max_free_elem(&socket_id, align);
+			if (requested_len == 0) {
+				rte_errno = ENOMEM;
+				return NULL;
+			}
+		}
 	}
 
 	if (socket_id == SOCKET_ID_ANY)
@@ -223,7 +239,7 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
 	const struct malloc_elem *elem = malloc_elem_from_data(mz_addr);
 
 	/* fill the zone in config */
-	struct rte_memzone *mz = get_next_free_memzone();
+	mz = get_next_free_memzone();
 
 	if (mz == NULL) {
 		RTE_LOG(ERR, EAL, "%s(): Cannot find free memzone but there is room "
@@ -321,15 +337,19 @@ rte_memzone_free(const struct rte_memzone *mz)
 	idx = ((uintptr_t)mz - (uintptr_t)mcfg->memzone);
 	idx = idx / sizeof(struct rte_memzone);
 
-	addr = mcfg->memzone[idx].addr;
 #ifdef RTE_LIBRTE_IVSHMEM
 	/*
 	 * If ioremap_addr is set, it's an IVSHMEM memzone and we cannot
 	 * free it.
 	 */
-	if (mcfg->memzone[idx].ioremap_addr != 0)
-		ret = -EINVAL;
+	if (mcfg->memzone[idx].ioremap_addr != 0) {
+		rte_rwlock_write_unlock(&mcfg->mlock);
+		return -EINVAL;
+	}
 #endif
+
+	addr = mcfg->memzone[idx].addr;
+
 	if (addr == NULL)
 		ret = -EINVAL;
 	else if (mcfg->memzone_cnt == 0) {
diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
index 2b418d52..3efc90f0 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -139,7 +139,11 @@ eal_reset_internal_config(struct internal_config *internal_cfg)
 
 	internal_cfg->syslog_facility = LOG_DAEMON;
 	/* default value from build option */
+#if RTE_LOG_LEVEL >= RTE_LOG_DEBUG
+	internal_cfg->log_level = RTE_LOG_INFO;
+#else
 	internal_cfg->log_level = RTE_LOG_LEVEL;
+#endif
 
 	internal_cfg->xen_dom0_support = 0;
 
diff --git a/lib/librte_eal/common/eal_common_pci.c b/lib/librte_eal/common/eal_common_pci.c
index 40f49229..7248c38b 100644
--- a/lib/librte_eal/common/eal_common_pci.c
+++ b/lib/librte_eal/common/eal_common_pci.c
@@ -85,6 +85,19 @@
 struct pci_driver_list pci_driver_list;
 struct pci_device_list pci_device_list;
 
+#define SYSFS_PCI_DEVICES "/sys/bus/pci/devices"
+
+const char *pci_get_sysfs_path(void)
+{
+	const char *path = NULL;
+
+	path = getenv("SYSFS_PCI_DEVICES");
+	if (path == NULL)
+		return SYSFS_PCI_DEVICES;
+
+	return path;
+}
+
 static struct rte_devargs *pci_devargs_lookup(struct rte_pci_device *dev)
 {
 	struct rte_devargs *devargs;
@@ -162,23 +175,26 @@ rte_eal_pci_probe_one_driver(struct rte_pci_driver *dr, struct rte_pci_device *d
 		if (id_table->subsystem_device_id != dev->id.subsystem_device_id &&
 				id_table->subsystem_device_id != PCI_ANY_ID)
 			continue;
+		if (id_table->class_id != dev->id.class_id &&
+				id_table->class_id != RTE_CLASS_ANY_ID)
+			continue;
 
 		struct rte_pci_addr *loc = &dev->addr;
 
-		RTE_LOG(DEBUG, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n",
+		RTE_LOG(INFO, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n",
 				loc->domain, loc->bus, loc->devid, loc->function,
 				dev->numa_node);
 
-		RTE_LOG(DEBUG, EAL, "  probe driver: %x:%x %s\n", dev->id.vendor_id,
-				dev->id.device_id, dr->name);
-
 		/* no initialization when blacklisted, return without error */
 		if (dev->devargs != NULL &&
 			dev->devargs->type == RTE_DEVTYPE_BLACKLISTED_PCI) {
-			RTE_LOG(DEBUG, EAL, "  Device is blacklisted, not initializing\n");
+			RTE_LOG(INFO, EAL, "  Device is blacklisted, not initializing\n");
 			return 1;
 		}
 
+		RTE_LOG(INFO, EAL, "  probe driver: %x:%x %s\n", dev->id.vendor_id,
+				dev->id.device_id, dr->name);
+
 		if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) {
 			/* map resources for devices that use igb_uio */
 			ret = rte_eal_pci_map_device(dev);
diff --git a/lib/librte_eal/common/eal_common_pci_uio.c b/lib/librte_eal/common/eal_common_pci_uio.c
index f062e81d..367a6816 100644
--- a/lib/librte_eal/common/eal_common_pci_uio.c
+++ b/lib/librte_eal/common/eal_common_pci_uio.c
@@ -53,7 +53,7 @@ EAL_REGISTER_TAILQ(rte_uio_tailq)
 static int
 pci_uio_map_secondary(struct rte_pci_device *dev)
 {
-	int fd, i;
+	int fd, i, j;
 	struct mapped_pci_resource *uio_res;
 	struct mapped_pci_res_list *uio_res_list =
 			RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list);
@@ -85,6 +85,16 @@ pci_uio_map_secondary(struct rte_pci_device *dev)
 					"Cannot mmap device resource file %s to address: %p\n",
 					uio_res->maps[i].path,
 					uio_res->maps[i].addr);
+				if (mapaddr != MAP_FAILED) {
+					/* unmap addrs correctly mapped */
+					for (j = 0; j < i; j++)
+						pci_unmap_resource(
+							uio_res->maps[j].addr,
+							(size_t)uio_res->maps[j].size);
+					/* unmap addr wrongly mapped */
+					pci_unmap_resource(mapaddr,
+						(size_t)uio_res->maps[i].size);
+				}
 				return -1;
 			}
 		}
@@ -159,7 +169,8 @@ pci_uio_unmap(struct mapped_pci_resource *uio_res)
 	for (i = 0; i != uio_res->nb_maps; i++) {
 		pci_unmap_resource(uio_res->maps[i].addr,
 				(size_t)uio_res->maps[i].size);
-		rte_free(uio_res->maps[i].path);
+		if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+			rte_free(uio_res->maps[i].path);
 	}
 }
 
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index 2342fa16..857dc3ea 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -49,9 +49,6 @@ int rte_eal_memzone_init(void);
 /**
  * Common log initialization function (private to eal).
  *
- * Called by environment-specific log initialization function to initialize
- * log history.
- *
  * @param default_log
  *   The default log stream to be used.
  * @return
diff --git a/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h b/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h
index 988125b3..da6c233a 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h
@@ -323,12 +323,6 @@ rte_memcpy(void *dst, const void *src, size_t n)
 	return memcpy(dst, src, n);
 }
 
-static inline void *
-rte_memcpy_func(void *dst, const void *src, size_t n)
-{
-	return memcpy(dst, src, n);
-}
-
 #endif /* RTE_ARCH_ARM_NEON_MEMCPY */
 
 #ifdef __cplusplus
diff --git a/lib/librte_eal/common/include/arch/arm/rte_memcpy_64.h b/lib/librte_eal/common/include/arch/arm/rte_memcpy_64.h
index 917cdc1b..5db66b63 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_memcpy_64.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_memcpy_64.h
@@ -80,12 +80,6 @@ rte_mov256(uint8_t *dst, const uint8_t *src)
 
 #define rte_memcpy(d, s, n)	memcpy((d), (s), (n))
 
-static inline void *
-rte_memcpy_func(void *dst, const void *src, size_t n)
-{
-	return memcpy(dst, src, n);
-}
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_eal/common/include/arch/tile/rte_memcpy.h b/lib/librte_eal/common/include/arch/tile/rte_memcpy.h
index 9b5b37ef..e606957c 100644
--- a/lib/librte_eal/common/include/arch/tile/rte_memcpy.h
+++ b/lib/librte_eal/common/include/arch/tile/rte_memcpy.h
@@ -80,12 +80,6 @@ rte_mov256(uint8_t *dst, const uint8_t *src)
 
 #define rte_memcpy(d, s, n)	memcpy((d), (s), (n))
 
-static inline void *
-rte_memcpy_func(void *dst, const void *src, size_t n)
-{
-	return memcpy(dst, src, n);
-}
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_eal/common/include/arch/x86/rte_memcpy.h b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h
index f463ab30..413035e7 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_memcpy.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h
@@ -363,71 +363,26 @@ rte_mov128(uint8_t *dst, const uint8_t *src)
 }
 
 /**
- * Copy 256 bytes from one location to another,
- * locations should not overlap.
- */
-static inline void
-rte_mov256(uint8_t *dst, const uint8_t *src)
-{
-	rte_mov32((uint8_t *)dst + 0 * 32, (const uint8_t *)src + 0 * 32);
-	rte_mov32((uint8_t *)dst + 1 * 32, (const uint8_t *)src + 1 * 32);
-	rte_mov32((uint8_t *)dst + 2 * 32, (const uint8_t *)src + 2 * 32);
-	rte_mov32((uint8_t *)dst + 3 * 32, (const uint8_t *)src + 3 * 32);
-	rte_mov32((uint8_t *)dst + 4 * 32, (const uint8_t *)src + 4 * 32);
-	rte_mov32((uint8_t *)dst + 5 * 32, (const uint8_t *)src + 5 * 32);
-	rte_mov32((uint8_t *)dst + 6 * 32, (const uint8_t *)src + 6 * 32);
-	rte_mov32((uint8_t *)dst + 7 * 32, (const uint8_t *)src + 7 * 32);
-}
-
-/**
- * Copy 64-byte blocks from one location to another,
- * locations should not overlap.
- */
-static inline void
-rte_mov64blocks(uint8_t *dst, const uint8_t *src, size_t n)
-{
-	__m256i ymm0, ymm1;
-
-	while (n >= 64) {
-		ymm0 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 0 * 32));
-		n -= 64;
-		ymm1 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 1 * 32));
-		src = (const uint8_t *)src + 64;
-		_mm256_storeu_si256((__m256i *)((uint8_t *)dst + 0 * 32), ymm0);
-		_mm256_storeu_si256((__m256i *)((uint8_t *)dst + 1 * 32), ymm1);
-		dst = (uint8_t *)dst + 64;
-	}
-}
-
-/**
- * Copy 256-byte blocks from one location to another,
+ * Copy 128-byte blocks from one location to another,
  * locations should not overlap.
  */
 static inline void
-rte_mov256blocks(uint8_t *dst, const uint8_t *src, size_t n)
+rte_mov128blocks(uint8_t *dst, const uint8_t *src, size_t n)
 {
-	__m256i ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7;
+	__m256i ymm0, ymm1, ymm2, ymm3;
 
-	while (n >= 256) {
+	while (n >= 128) {
 		ymm0 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 0 * 32));
-		n -= 256;
+		n -= 128;
 		ymm1 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 1 * 32));
 		ymm2 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 2 * 32));
 		ymm3 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 3 * 32));
-		ymm4 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 4 * 32));
-		ymm5 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 5 * 32));
-		ymm6 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 6 * 32));
-		ymm7 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 7 * 32));
-		src = (const uint8_t *)src + 256;
+		src = (const uint8_t *)src + 128;
 		_mm256_storeu_si256((__m256i *)((uint8_t *)dst + 0 * 32), ymm0);
 		_mm256_storeu_si256((__m256i *)((uint8_t *)dst + 1 * 32), ymm1);
 		_mm256_storeu_si256((__m256i *)((uint8_t *)dst + 2 * 32), ymm2);
 		_mm256_storeu_si256((__m256i *)((uint8_t *)dst + 3 * 32), ymm3);
-		_mm256_storeu_si256((__m256i *)((uint8_t *)dst + 4 * 32), ymm4);
-		_mm256_storeu_si256((__m256i *)((uint8_t *)dst + 5 * 32), ymm5);
-		_mm256_storeu_si256((__m256i *)((uint8_t *)dst + 6 * 32), ymm6);
-		_mm256_storeu_si256((__m256i *)((uint8_t *)dst + 7 * 32), ymm7);
-		dst = (uint8_t *)dst + 256;
+		dst = (uint8_t *)dst + 128;
 	}
 }
 
@@ -466,51 +421,56 @@ rte_memcpy(void *dst, const void *src, size_t n)
 	}
 
 	/**
-	 * Fast way when copy size doesn't exceed 512 bytes
+	 * Fast way when copy size doesn't exceed 256 bytes
 	 */
 	if (n <= 32) {
 		rte_mov16((uint8_t *)dst, (const uint8_t *)src);
-		rte_mov16((uint8_t *)dst - 16 + n, (const uint8_t *)src - 16 + n);
+		rte_mov16((uint8_t *)dst - 16 + n,
+				(const uint8_t *)src - 16 + n);
+		return ret;
+	}
+	if (n <= 48) {
+		rte_mov16((uint8_t *)dst, (const uint8_t *)src);
+		rte_mov16((uint8_t *)dst + 16, (const uint8_t *)src + 16);
+		rte_mov16((uint8_t *)dst - 16 + n,
+				(const uint8_t *)src - 16 + n);
 		return ret;
 	}
 	if (n <= 64) {
 		rte_mov32((uint8_t *)dst, (const uint8_t *)src);
-		rte_mov32((uint8_t *)dst - 32 + n, (const uint8_t *)src - 32 + n);
+		rte_mov32((uint8_t *)dst - 32 + n,
+				(const uint8_t *)src - 32 + n);
 		return ret;
 	}
-	if (n <= 512) {
-		if (n >= 256) {
-			n -= 256;
-			rte_mov256((uint8_t *)dst, (const uint8_t *)src);
-			src = (const uint8_t *)src + 256;
-			dst = (uint8_t *)dst + 256;
-		}
+	if (n <= 256) {
 		if (n >= 128) {
 			n -= 128;
 			rte_mov128((uint8_t *)dst, (const uint8_t *)src);
 			src = (const uint8_t *)src + 128;
 			dst = (uint8_t *)dst + 128;
 		}
+COPY_BLOCK_128_BACK31:
 		if (n >= 64) {
 			n -= 64;
 			rte_mov64((uint8_t *)dst, (const uint8_t *)src);
 			src = (const uint8_t *)src + 64;
 			dst = (uint8_t *)dst + 64;
 		}
-COPY_BLOCK_64_BACK31:
 		if (n > 32) {
 			rte_mov32((uint8_t *)dst, (const uint8_t *)src);
-			rte_mov32((uint8_t *)dst - 32 + n, (const uint8_t *)src - 32 + n);
+			rte_mov32((uint8_t *)dst - 32 + n,
+					(const uint8_t *)src - 32 + n);
 			return ret;
 		}
 		if (n > 0) {
-			rte_mov32((uint8_t *)dst - 32 + n, (const uint8_t *)src - 32 + n);
+			rte_mov32((uint8_t *)dst - 32 + n,
+					(const uint8_t *)src - 32 + n);
 		}
 		return ret;
 	}
 
 	/**
-	 * Make store aligned when copy size exceeds 512 bytes
+	 * Make store aligned when copy size exceeds 256 bytes
 	 */
 	dstofss = (uintptr_t)dst & 0x1F;
 	if (dstofss > 0) {
@@ -522,35 +482,19 @@ COPY_BLOCK_64_BACK31:
 	}
 
 	/**
-	 * Copy 256-byte blocks.
-	 * Use copy block function for better instruction order control,
-	 * which is important when load is unaligned.
+	 * Copy 128-byte blocks
 	 */
-	rte_mov256blocks((uint8_t *)dst, (const uint8_t *)src, n);
+	rte_mov128blocks((uint8_t *)dst, (const uint8_t *)src, n);
 	bits = n;
-	n = n & 255;
+	n = n & 127;
 	bits -= n;
 	src = (const uint8_t *)src + bits;
 	dst = (uint8_t *)dst + bits;
 
 	/**
-	 * Copy 64-byte blocks.
-	 * Use copy block function for better instruction order control,
-	 * which is important when load is unaligned.
-	 */
-	if (n >= 64) {
-		rte_mov64blocks((uint8_t *)dst, (const uint8_t *)src, n);
-		bits = n;
-		n = n & 63;
-		bits -= n;
-		src = (const uint8_t *)src + bits;
-		dst = (uint8_t *)dst + bits;
-	}
-
-	/**
 	 * Copy whatever left
 	 */
-	goto COPY_BLOCK_64_BACK31;
+	goto COPY_BLOCK_128_BACK31;
 }
 
 #else /* RTE_MACHINE_CPUFLAG */
diff --git a/lib/librte_eal/common/include/arch/x86/rte_rtm.h b/lib/librte_eal/common/include/arch/x86/rte_rtm.h
index d9356419..0649f794 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_rtm.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_rtm.h
@@ -50,11 +50,10 @@ void rte_xend(void)
 	 asm volatile(".byte 0x0f,0x01,0xd5" ::: "memory");
 }
 
-static __attribute__((__always_inline__)) inline
-void rte_xabort(const unsigned int status)
-{
-	asm volatile(".byte 0xc6,0xf8,%P0" :: "i" (status) : "memory");
-}
+/* not an inline function to workaround a clang bug with -O0 */
+#define rte_xabort(status) do { \
+	asm volatile(".byte 0xc6,0xf8,%P0" :: "i" (status) : "memory"); \
+} while (0)
 
 static __attribute__((__always_inline__)) inline
 int rte_xtest(void)
diff --git a/lib/librte_eal/common/include/generic/rte_memcpy.h b/lib/librte_eal/common/include/generic/rte_memcpy.h
index 03e84773..afb0afe4 100644
--- a/lib/librte_eal/common/include/generic/rte_memcpy.h
+++ b/lib/librte_eal/common/include/generic/rte_memcpy.h
@@ -134,11 +134,4 @@ rte_memcpy(void *dst, const void *src, size_t n);
 
 #endif /* __DOXYGEN__ */
 
-/*
- * memcpy() function used by rte_memcpy macro
- */
-static inline void *
-rte_memcpy_func(void *dst, const void *src, size_t n) __attribute__((always_inline));
-
-
 #endif /* _RTE_MEMCPY_H_ */
diff --git a/lib/librte_eal/common/include/rte_debug.h b/lib/librte_eal/common/include/rte_debug.h
index 94129fab..cab6fb4c 100644
--- a/lib/librte_eal/common/include/rte_debug.h
+++ b/lib/librte_eal/common/include/rte_debug.h
@@ -43,6 +43,9 @@
  * the implementation is architecture-specific.
  */
 
+#include "rte_log.h"
+#include "rte_branch_prediction.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -76,8 +79,13 @@ void rte_dump_registers(void);
 #define rte_panic(...) rte_panic_(__func__, __VA_ARGS__, "dummy")
 #define rte_panic_(func, format, ...) __rte_panic(func, format "%.0s", __VA_ARGS__)
 
+#if RTE_LOG_LEVEL >= RTE_LOG_DEBUG
+#define RTE_ASSERT(exp)	RTE_VERIFY(exp)
+#else
+#define RTE_ASSERT(exp) do {} while (0)
+#endif
 #define	RTE_VERIFY(exp)	do {                                                  \
-	if (!(exp))                                                           \
+	if (unlikely(!(exp)))                                                           \
 		rte_panic("line %d\tassert \"" #exp "\" failed\n", __LINE__); \
 } while (0)
 
diff --git a/lib/librte_eal/common/include/rte_keepalive.h b/lib/librte_eal/common/include/rte_keepalive.h
index 10dac2e0..88ad8e48 100644
--- a/lib/librte_eal/common/include/rte_keepalive.h
+++ b/lib/librte_eal/common/include/rte_keepalive.h
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright 2015 Intel Shannon Ltd. All rights reserved.
+ *   Copyright 2015-2016 Intel Shannon Ltd. All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
  *   modification, are permitted provided that the following conditions
@@ -48,17 +48,47 @@
 #define RTE_KEEPALIVE_MAXCORES RTE_MAX_LCORE
 #endif
 
+enum rte_keepalive_state {
+	RTE_KA_STATE_UNUSED = 0,
+	RTE_KA_STATE_ALIVE = 1,
+	RTE_KA_STATE_MISSING = 4,
+	RTE_KA_STATE_DEAD = 2,
+	RTE_KA_STATE_GONE = 3,
+	RTE_KA_STATE_DOZING = 5,
+	RTE_KA_STATE_SLEEP = 6
+};
+
 /**
  * Keepalive failure callback.
  *
  *  Receives a data pointer passed to rte_keepalive_create() and the id of the
  *  failed core.
+ *  @param data Data pointer passed to rte_keepalive_create()
+ *  @param id_core ID of the core that has failed
  */
 typedef void (*rte_keepalive_failure_callback_t)(
 	void *data,
 	const int id_core);
 
 /**
+ * Keepalive relay callback.
+ *
+ *  Receives a data pointer passed to rte_keepalive_register_relay_callback(),
+ *  the id of the core for which state is to be forwarded, and details of the
+ *  current core state.
+ *  @param data Data pointer passed to rte_keepalive_register_relay_callback()
+ *  @param id_core ID of the core for which state is being reported
+ *  @param core_state The current state of the core
+ *  @param Timestamp of when core was last seen alive
+ */
+typedef void (*rte_keepalive_relay_callback_t)(
+	void *data,
+	const int id_core,
+	enum rte_keepalive_state core_state,
+	uint64_t last_seen
+	);
+
+/**
  * Keepalive state structure.
  * @internal
  */
@@ -105,4 +135,35 @@ void rte_keepalive_register_core(struct rte_keepalive *keepcfg,
 void
 rte_keepalive_mark_alive(struct rte_keepalive *keepcfg);
 
+/**
+ * Per-core sleep-time indication.
+ * @param *keepcfg
+ *   Keepalive structure pointer
+ *
+ * If CPU idling is enabled, this function needs to be called from within
+ * the main process loop of the LCore going to sleep, in order to avoid
+ * the LCore being mis-detected as dead.
+ */
+void
+rte_keepalive_mark_sleep(struct rte_keepalive *keepcfg);
+
+/**
+ * Registers a 'live core' callback.
+ *
+ * The complement of the 'dead core' callback. This is called when a
+ * core is known to be alive, and is intended for cases when an app
+ * needs to know 'liveness' beyond just knowing when a core has died.
+ *
+ * @param *keepcfg
+ *   Keepalive structure pointer
+ * @param callback
+ *   Function called upon detection of a dead core.
+ * @param data
+ *   Data pointer to be passed to function callback.
+ */
+void
+rte_keepalive_register_relay_callback(struct rte_keepalive *keepcfg,
+	rte_keepalive_relay_callback_t callback,
+	void *data);
+
 #endif /* _KEEPALIVE_H_ */
diff --git a/lib/librte_eal/common/include/rte_lcore.h b/lib/librte_eal/common/include/rte_lcore.h
index ac151302..fe7b5865 100644
--- a/lib/librte_eal/common/include/rte_lcore.h
+++ b/lib/librte_eal/common/include/rte_lcore.h
@@ -250,23 +250,16 @@ void rte_thread_get_affinity(rte_cpuset_t *cpusetp);
 /**
  * Set thread names.
  *
- * Macro to wrap `pthread_setname_np()` with a glibc version check.
- * Only glibc >= 2.12 supports this feature.
+ * @note It fails with glibc < 2.12.
  *
- * This macro only used for Linux, BSD does direct libc call.
- * BSD libc version of function is `pthread_set_name_np()`.
+ * @param id
+ *   Thread id.
+ * @param name
+ *   Thread name to set.
+ * @return
+ *   On success, return 0; otherwise return a negative value.
  */
-#if defined(__DOXYGEN__)
-#define rte_thread_setname(...) pthread_setname_np(__VA_ARGS__)
-#endif
-
-#if defined(__GLIBC__) && defined(__GLIBC_PREREQ)
-#if __GLIBC_PREREQ(2, 12)
-#define rte_thread_setname(...) pthread_setname_np(__VA_ARGS__)
-#else
-#define rte_thread_setname(...) 0
-#endif
-#endif
+int rte_thread_setname(pthread_t id, const char *name);
 
 #ifdef __cplusplus
 }
diff --git a/lib/librte_eal/common/include/rte_log.h b/lib/librte_eal/common/include/rte_log.h
index 2e47e7f6..b1add04c 100644
--- a/lib/librte_eal/common/include/rte_log.h
+++ b/lib/librte_eal/common/include/rte_log.h
@@ -42,6 +42,8 @@
  * This file provides a log API to RTE applications.
  */
 
+#include "rte_common.h" /* for __rte_deprecated macro */
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -179,22 +181,27 @@ int rte_log_cur_msg_loglevel(void);
 int rte_log_cur_msg_logtype(void);
 
 /**
+ * @deprecated
  * Enable or disable the history (enabled by default)
  *
  * @param enable
  *   true to enable, or 0 to disable history.
  */
+__rte_deprecated
 void rte_log_set_history(int enable);
 
 /**
+ * @deprecated
  * Dump the log history to a file
  *
  * @param f
  *   A pointer to a file for output
  */
+__rte_deprecated
 void rte_log_dump_history(FILE *f);
 
 /**
+ * @deprecated
  * Add a log message to the history.
  *
  * This function can be called from a user-defined log stream. It adds
@@ -209,6 +216,7 @@ void rte_log_dump_history(FILE *f);
  *   - 0: Success.
  *   - (-ENOBUFS) if there is no room to store the message.
  */
+__rte_deprecated
 int rte_log_add_in_history(const char *buf, size_t size);
 
 /**
diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h
index f8dbece0..06611093 100644
--- a/lib/librte_eal/common/include/rte_memory.h
+++ b/lib/librte_eal/common/include/rte_memory.h
@@ -200,21 +200,22 @@ unsigned rte_memory_get_nrank(void);
 int rte_xen_dom0_supported(void);
 
 /**< Internal use only - phys to virt mapping for xen */
-phys_addr_t rte_xen_mem_phy2mch(uint32_t, const phys_addr_t);
+phys_addr_t rte_xen_mem_phy2mch(int32_t, const phys_addr_t);
 
 /**
  * Return the physical address of elt, which is an element of the pool mp.
  *
  * @param memseg_id
- *   The mempool is from which memory segment.
+ *   Identifier of the memory segment owning the physical address. If
+ *   set to -1, find it automatically.
  * @param phy_addr
  *   physical address of elt.
  *
  * @return
- *   The physical address or error.
+ *   The physical address or RTE_BAD_PHYS_ADDR on error.
  */
 static inline phys_addr_t
-rte_mem_phy2mch(uint32_t memseg_id, const phys_addr_t phy_addr)
+rte_mem_phy2mch(int32_t memseg_id, const phys_addr_t phy_addr)
 {
 	if (rte_xen_dom0_supported())
 		return rte_xen_mem_phy2mch(memseg_id, phy_addr);
@@ -250,7 +251,7 @@ static inline int rte_xen_dom0_supported(void)
 }
 
 static inline phys_addr_t
-rte_mem_phy2mch(uint32_t memseg_id __rte_unused, const phys_addr_t phy_addr)
+rte_mem_phy2mch(int32_t memseg_id __rte_unused, const phys_addr_t phy_addr)
 {
 	return phy_addr;
 }
diff --git a/lib/librte_eal/common/include/rte_pci.h b/lib/librte_eal/common/include/rte_pci.h
index e692094e..fa749626 100644
--- a/lib/librte_eal/common/include/rte_pci.h
+++ b/lib/librte_eal/common/include/rte_pci.h
@@ -91,7 +91,7 @@ extern struct pci_driver_list pci_driver_list; /**< Global list of PCI drivers.
 extern struct pci_device_list pci_device_list; /**< Global list of PCI devices. */
 
 /** Pathname of PCI devices directory. */
-#define SYSFS_PCI_DEVICES "/sys/bus/pci/devices"
+const char *pci_get_sysfs_path(void);
 
 /** Formatting string for PCI device identifier: Ex: 0000:00:01.0 */
 #define PCI_PRI_FMT "%.4" PRIx16 ":%.2" PRIx8 ":%.2" PRIx8 ".%" PRIx8
@@ -105,9 +105,6 @@ extern struct pci_device_list pci_device_list; /**< Global list of PCI devices.
 /** Nb. of values in PCI resource format. */
 #define PCI_RESOURCE_FMT_NVAL 3
 
-/** IO resource type: memory address space */
-#define IORESOURCE_MEM        0x00000200
-
 /**
  * A structure describing a PCI resource.
  */
@@ -125,6 +122,7 @@ struct rte_pci_resource {
  * table of these IDs for each device that it supports.
  */
 struct rte_pci_id {
+	uint32_t class_id;            /**< Class ID (class, subclass, pi) or RTE_CLASS_ANY_ID. */
 	uint16_t vendor_id;           /**< Vendor ID or PCI_ANY_ID. */
 	uint16_t device_id;           /**< Device ID or PCI_ANY_ID. */
 	uint16_t subsystem_vendor_id; /**< Subsystem vendor ID or PCI_ANY_ID. */
@@ -170,10 +168,12 @@ struct rte_pci_device {
 
 /** Any PCI device identifier (vendor, device, ...) */
 #define PCI_ANY_ID (0xffff)
+#define RTE_CLASS_ANY_ID (0xffffff)
 
 #ifdef __cplusplus
 /** C++ macro used to help building up tables of device IDs */
 #define RTE_PCI_DEVICE(vend, dev) \
+	RTE_CLASS_ANY_ID,         \
 	(vend),                   \
 	(dev),                    \
 	PCI_ANY_ID,               \
@@ -181,6 +181,7 @@ struct rte_pci_device {
 #else
 /** Macro used to help building up tables of device IDs */
 #define RTE_PCI_DEVICE(vend, dev)          \
+	.class_id = RTE_CLASS_ANY_ID,      \
 	.vendor_id = (vend),               \
 	.device_id = (dev),                \
 	.subsystem_vendor_id = PCI_ANY_ID, \
@@ -213,8 +214,6 @@ struct rte_pci_driver {
 
 /** Device needs PCI BAR mapping (done with either IGB_UIO or VFIO) */
 #define RTE_PCI_DRV_NEED_MAPPING 0x0001
-/** Device driver must be registered several times until failure - deprecated */
-#pragma GCC poison RTE_PCI_DRV_MULTIPLE
 /** Device needs to be unbound even if no module is provided */
 #define RTE_PCI_DRV_FORCE_UNBIND 0x0004
 /** Device driver supports link state interrupt */
@@ -520,15 +519,17 @@ int rte_eal_pci_write_config(const struct rte_pci_device *device,
 struct rte_pci_ioport {
 	struct rte_pci_device *dev;
 	uint64_t base;
+	uint64_t len; /* only filled for memory mapped ports */
 };
 
 /**
- * Initialises a rte_pci_ioport object for a pci device io resource.
+ * Initialize a rte_pci_ioport object for a pci device io resource.
+ *
  * This object is then used to gain access to those io resources (see below).
  *
  * @param dev
- *   A pointer to a rte_pci_device structure describing the device.
- *   to use
+ *   A pointer to a rte_pci_device structure describing the device
+ *   to use.
  * @param bar
  *   Index of the io pci resource we want to access.
  * @param p
@@ -544,6 +545,8 @@ int rte_eal_pci_ioport_map(struct rte_pci_device *dev, int bar,
  *
  * @param p
  *   The rte_pci_ioport object to be uninitialized.
+ * @return
+ *  0 on success, negative on error.
  */
 int rte_eal_pci_ioport_unmap(struct rte_pci_ioport *p);
 
@@ -577,20 +580,6 @@ void rte_eal_pci_ioport_read(struct rte_pci_ioport *p,
 void rte_eal_pci_ioport_write(struct rte_pci_ioport *p,
 			      const void *data, size_t len, off_t offset);
 
-#ifdef RTE_PCI_CONFIG
-#include <rte_common.h>
-/**
- * Set special config space registers for performance purpose.
- * It is deprecated, as all configurations have been moved into
- * each PMDs respectively.
- *
- * @param dev
- *   A pointer to a rte_pci_device structure describing the device
- *   to use
- */
-void pci_config_space_set(struct rte_pci_device *dev) __rte_deprecated;
-#endif /* RTE_PCI_CONFIG */
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_eal/common/include/rte_pci_dev_ids.h b/lib/librte_eal/common/include/rte_pci_dev_ids.h
index cf7b5487..af39fbbd 100644
--- a/lib/librte_eal/common/include/rte_pci_dev_ids.h
+++ b/lib/librte_eal/common/include/rte_pci_dev_ids.h
@@ -63,11 +63,12 @@
  * This file contains a list of the PCI device IDs recognised by DPDK, which
  * can be used to fill out an array of structures describing the devices.
  *
- * Currently four families of devices are recognised: those supported by the
- * IGB driver, by EM driver, those supported by the IXGBE driver, and by virtio
- * driver which is a para virtualization driver running in guest virtual machine.
- * The inclusion of these in an array built using this file depends on the
- * definition of
+ * Currently five families of devices are recognised: those supported by the
+ * IGB driver, by EM driver, those supported by the IXGBE driver, those
+ * supported by the BNXT driver, and by virtio driver which is a para
+ * virtualization driver running in guest virtual machine. The inclusion of
+ * these in an array built using this file depends on the definition of
+ * RTE_PCI_DEV_ID_DECL_BNXT
  * RTE_PCI_DEV_ID_DECL_EM
  * RTE_PCI_DEV_ID_DECL_IGB
  * RTE_PCI_DEV_ID_DECL_IGBVF
@@ -152,6 +153,10 @@
 #define RTE_PCI_DEV_ID_DECL_BNX2XVF(vend, dev)
 #endif
 
+#ifndef RTE_PCI_DEV_ID_DECL_BNXT
+#define RTE_PCI_DEV_ID_DECL_BNXT(vend, dev)
+#endif
+
 #ifndef PCI_VENDOR_ID_INTEL
 /** Vendor ID used by Intel devices */
 #define PCI_VENDOR_ID_INTEL 0x8086
@@ -446,12 +451,14 @@ RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_DH89XXCC_SFP)
 #define IXGBE_DEV_ID_X550EM_A_KR                0x15C2
 #define IXGBE_DEV_ID_X550EM_A_KR_L              0x15C3
 #define IXGBE_DEV_ID_X550EM_A_SFP_N             0x15C4
-#define IXGBE_DEV_ID_X550EM_A_1G_T              0x15C6
-#define IXGBE_DEV_ID_X550EM_A_1G_T_L            0x15C7
+#define IXGBE_DEV_ID_X550EM_A_SGMII             0x15C6
+#define IXGBE_DEV_ID_X550EM_A_SGMII_L           0x15C7
 #define IXGBE_DEV_ID_X550EM_A_10G_T             0x15C8
 #define IXGBE_DEV_ID_X550EM_A_QSFP              0x15CA
 #define IXGBE_DEV_ID_X550EM_A_QSFP_N            0x15CC
 #define IXGBE_DEV_ID_X550EM_A_SFP               0x15CE
+#define IXGBE_DEV_ID_X550EM_A_1G_T              0x15E4
+#define IXGBE_DEV_ID_X550EM_A_1G_T_L            0x15E5
 #define IXGBE_DEV_ID_X550EM_X_KX4               0x15AA
 #define IXGBE_DEV_ID_X550EM_X_KR                0x15AB
 
@@ -506,12 +513,14 @@ RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550T1)
 RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_KR)
 RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_KR_L)
 RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_SFP_N)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_1G_T)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_1G_T_L)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_SGMII)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_SGMII_L)
 RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_10G_T)
 RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_QSFP)
 RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_QSFP_N)
 RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_SFP)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_1G_T)
+RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_1G_T_L)
 RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_KX4)
 RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_KR)
 
@@ -532,12 +541,16 @@ RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_BYPASS)
 #define I40E_DEV_ID_20G_KR2             0x1587
 #define I40E_DEV_ID_20G_KR2_A           0x1588
 #define I40E_DEV_ID_10G_BASE_T4         0x1589
+#define I40E_DEV_ID_25G_B               0x158A
+#define I40E_DEV_ID_25G_SFP28           0x158B
 #define I40E_DEV_ID_X722_A0             0x374C
 #define I40E_DEV_ID_KX_X722             0x37CE
 #define I40E_DEV_ID_QSFP_X722           0x37CF
 #define I40E_DEV_ID_SFP_X722            0x37D0
 #define I40E_DEV_ID_1G_BASE_T_X722      0x37D1
 #define I40E_DEV_ID_10G_BASE_T_X722     0x37D2
+#define I40E_DEV_ID_SFP_I_X722          0x37D3
+#define I40E_DEV_ID_QSFP_I_X722         0x37D4
 
 RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_SFP_XL710)
 RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_QEMU)
@@ -550,12 +563,16 @@ RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_10G_BASE_T)
 RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_20G_KR2)
 RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_20G_KR2_A)
 RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_10G_BASE_T4)
+RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_25G_B)
+RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_25G_SFP28)
 RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_X722_A0)
 RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_KX_X722)
 RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_QSFP_X722)
 RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_SFP_X722)
 RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_1G_BASE_T_X722)
 RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_10G_BASE_T_X722)
+RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_SFP_I_X722)
+RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_QSFP_I_X722)
 
 /*************** Physical FM10K devices from fm10k_type.h ***************/
 
@@ -686,6 +703,30 @@ RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57811_MF)
 RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57840_MF)
 #endif
 
+/****************** Broadcom bnxt devices ******************/
+
+#define BROADCOM_DEV_ID_57301                  0x16c8
+#define BROADCOM_DEV_ID_57302                  0x16c9
+#define BROADCOM_DEV_ID_57304_PF               0x16ca
+#define BROADCOM_DEV_ID_57304_VF               0x16cb
+#define BROADCOM_DEV_ID_57402                  0x16d0
+#define BROADCOM_DEV_ID_57404                  0x16d1
+#define BROADCOM_DEV_ID_57406_PF               0x16d2
+#define BROADCOM_DEV_ID_57406_VF               0x16d3
+#define BROADCOM_DEV_ID_57406_MF               0x16d4
+#define BROADCOM_DEV_ID_57314                  0x16df
+
+RTE_PCI_DEV_ID_DECL_BNXT(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_57301)
+RTE_PCI_DEV_ID_DECL_BNXT(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_57302)
+RTE_PCI_DEV_ID_DECL_BNXT(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_57304_PF)
+RTE_PCI_DEV_ID_DECL_BNXT(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_57304_VF)
+RTE_PCI_DEV_ID_DECL_BNXT(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_57402)
+RTE_PCI_DEV_ID_DECL_BNXT(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_57404)
+RTE_PCI_DEV_ID_DECL_BNXT(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_57406_PF)
+RTE_PCI_DEV_ID_DECL_BNXT(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_57406_VF)
+RTE_PCI_DEV_ID_DECL_BNXT(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_57406_MF)
+RTE_PCI_DEV_ID_DECL_BNXT(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_57314)
+
 /*
  * Undef all RTE_PCI_DEV_ID_DECL_* here.
  */
@@ -702,3 +743,4 @@ RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57840_MF)
 #undef RTE_PCI_DEV_ID_DECL_VMXNET3
 #undef RTE_PCI_DEV_ID_DECL_FM10K
 #undef RTE_PCI_DEV_ID_DECL_FM10KVF
+#undef RTE_PCI_DEV_ID_DECL_BNXT
diff --git a/lib/librte_eal/common/include/rte_version.h b/lib/librte_eal/common/include/rte_version.h
index f8ea6d73..dbe09975 100644
--- a/lib/librte_eal/common/include/rte_version.h
+++ b/lib/librte_eal/common/include/rte_version.h
@@ -60,7 +60,7 @@ extern "C" {
 /**
  * Minor version/month number i.e. the mm in yy.mm.z
  */
-#define RTE_VER_MONTH 4
+#define RTE_VER_MONTH 7
 
 /**
  * Patch level number i.e. the z in yy.mm.z
@@ -70,14 +70,14 @@ extern "C" {
 /**
  * Extra string to be appended to version number
  */
-#define RTE_VER_SUFFIX ""
+#define RTE_VER_SUFFIX "-rc"
 
 /**
  * Patch release number
  *   0-15 = release candidates
  *   16   = release
  */
-#define RTE_VER_RELEASE 16
+#define RTE_VER_RELEASE 1
 
 /**
  * Macro to compute a version number usable for comparisons
diff --git a/lib/librte_eal/common/rte_keepalive.c b/lib/librte_eal/common/rte_keepalive.c
index 23363ec1..9765d1bd 100644
--- a/lib/librte_eal/common/rte_keepalive.c
+++ b/lib/librte_eal/common/rte_keepalive.c
@@ -42,12 +42,8 @@
 
 struct rte_keepalive {
 	/** Core Liveness. */
-	enum rte_keepalive_state {
-		ALIVE = 1,
-		MISSING = 0,
-		DEAD = 2,
-		GONE = 3
-	} __rte_cache_aligned state_flags[RTE_KEEPALIVE_MAXCORES];
+	enum rte_keepalive_state __rte_cache_aligned state_flags[
+		RTE_KEEPALIVE_MAXCORES];
 
 	/** Last-seen-alive timestamps */
 	uint64_t last_alive[RTE_KEEPALIVE_MAXCORES];
@@ -68,6 +64,15 @@ struct rte_keepalive {
 	void *callback_data;
 	uint64_t tsc_initial;
 	uint64_t tsc_mhz;
+
+	/** Core state relay handler. */
+	rte_keepalive_relay_callback_t relay_callback;
+
+	/**
+	 * Core state relay handler app data.
+	 * Pointer is passed to live core handler.
+	 */
+	void *relay_callback_data;
 };
 
 static void
@@ -92,16 +97,18 @@ rte_keepalive_dispatch_pings(__rte_unused void *ptr_timer,
 			continue;
 
 		switch (keepcfg->state_flags[idx_core]) {
-		case ALIVE: /* Alive */
-			keepcfg->state_flags[idx_core] = MISSING;
+		case RTE_KA_STATE_UNUSED:
+			break;
+		case RTE_KA_STATE_ALIVE: /* Alive */
+			keepcfg->state_flags[idx_core] = RTE_KA_STATE_MISSING;
 			keepcfg->last_alive[idx_core] = rte_rdtsc();
 			break;
-		case MISSING: /* MIA */
+		case RTE_KA_STATE_MISSING: /* MIA */
 			print_trace("Core MIA. ", keepcfg, idx_core);
-			keepcfg->state_flags[idx_core] = DEAD;
+			keepcfg->state_flags[idx_core] = RTE_KA_STATE_DEAD;
 			break;
-		case DEAD: /* Dead */
-			keepcfg->state_flags[idx_core] = GONE;
+		case RTE_KA_STATE_DEAD: /* Dead */
+			keepcfg->state_flags[idx_core] = RTE_KA_STATE_GONE;
 			print_trace("Core died. ", keepcfg, idx_core);
 			if (keepcfg->callback)
 				keepcfg->callback(
@@ -109,9 +116,22 @@ rte_keepalive_dispatch_pings(__rte_unused void *ptr_timer,
 					idx_core
 					);
 			break;
-		case GONE: /* Buried */
+		case RTE_KA_STATE_GONE: /* Buried */
+			break;
+		case RTE_KA_STATE_DOZING: /* Core going idle */
+			keepcfg->state_flags[idx_core] = RTE_KA_STATE_SLEEP;
+			keepcfg->last_alive[idx_core] = rte_rdtsc();
+			break;
+		case RTE_KA_STATE_SLEEP: /* Idled core */
 			break;
 		}
+		if (keepcfg->relay_callback)
+			keepcfg->relay_callback(
+				keepcfg->relay_callback_data,
+				idx_core,
+				keepcfg->state_flags[idx_core],
+				keepcfg->last_alive[idx_core]
+				);
 	}
 }
 
@@ -133,11 +153,19 @@ rte_keepalive_create(rte_keepalive_failure_callback_t callback,
 	return keepcfg;
 }
 
+void rte_keepalive_register_relay_callback(struct rte_keepalive *keepcfg,
+	rte_keepalive_relay_callback_t callback,
+	void *data)
+{
+	keepcfg->relay_callback = callback;
+	keepcfg->relay_callback_data = data;
+}
+
 void
 rte_keepalive_register_core(struct rte_keepalive *keepcfg, const int id_core)
 {
 	if (id_core < RTE_KEEPALIVE_MAXCORES) {
-		keepcfg->active_cores[id_core] = 1;
+		keepcfg->active_cores[id_core] = RTE_KA_STATE_ALIVE;
 		keepcfg->last_alive[id_core] = rte_rdtsc();
 	}
 }
@@ -145,5 +173,11 @@ rte_keepalive_register_core(struct rte_keepalive *keepcfg, const int id_core)
 void
 rte_keepalive_mark_alive(struct rte_keepalive *keepcfg)
 {
-	keepcfg->state_flags[rte_lcore_id()] = ALIVE;
+	keepcfg->state_flags[rte_lcore_id()] = RTE_KA_STATE_ALIVE;
+}
+
+void
+rte_keepalive_mark_sleep(struct rte_keepalive *keepcfg)
+{
+	keepcfg->state_flags[rte_lcore_id()] = RTE_KA_STATE_DOZING;
 }
diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile
index e1093619..30b30f33 100644
--- a/lib/librte_eal/linuxapp/eal/Makefile
+++ b/lib/librte_eal/linuxapp/eal/Makefile
@@ -44,9 +44,12 @@ VPATH += $(RTE_SDK)/lib/librte_eal/common
 CFLAGS += -I$(SRCDIR)/include
 CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common
 CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common/include
+ifeq ($(CONFIG_RTE_LIBRTE_IVSHMEM),y)
+# workaround for circular dependency eal -> ivshmem -> ring/mempool -> eal
 CFLAGS += -I$(RTE_SDK)/lib/librte_ring
 CFLAGS += -I$(RTE_SDK)/lib/librte_mempool
 CFLAGS += -I$(RTE_SDK)/lib/librte_ivshmem
+endif
 CFLAGS += $(WERROR_FLAGS) -O3
 
 LDLIBS += -ldl
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
index 8aafd519..543ef869 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -465,24 +465,6 @@ eal_parse_vfio_intr(const char *mode)
 	return -1;
 }
 
-static inline size_t
-eal_get_hugepage_mem_size(void)
-{
-	uint64_t size = 0;
-	unsigned i, j;
-
-	for (i = 0; i < internal_config.num_hugepage_sizes; i++) {
-		struct hugepage_info *hpi = &internal_config.hugepage_info[i];
-		if (hpi->hugedir != NULL) {
-			for (j = 0; j < RTE_MAX_NUMA_NODES; j++) {
-				size += hpi->hugepage_sz * hpi->num_pages[j];
-			}
-		}
-	}
-
-	return (size < SIZE_MAX) ? (size_t)(size) : SIZE_MAX;
-}
-
 /* Parse the arguments for --log-level only */
 static void
 eal_log_level_parse(int argc, char **argv)
@@ -715,12 +697,8 @@ rte_eal_iopl_init(void)
 #if defined(RTE_ARCH_X86)
 	if (iopl(3) != 0)
 		return -1;
-	return 0;
-#elif defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
-	return 0; /* iopl syscall not supported for ARM/ARM64 */
-#else
-	return -1;
 #endif
+	return 0;
 }
 
 /* Launch threads, called at application init(). */
@@ -766,8 +744,6 @@ rte_eal_init(int argc, char **argv)
 	if (internal_config.memory == 0 && internal_config.force_sockets == 0) {
 		if (internal_config.no_hugetlbfs)
 			internal_config.memory = MEMSIZE_IF_NO_HUGE_PAGE;
-		else
-			internal_config.memory = eal_get_hugepage_mem_size();
 	}
 
 	if (internal_config.vmware_tsc_map == 1) {
@@ -863,7 +839,7 @@ rte_eal_init(int argc, char **argv)
 		ret = rte_thread_setname(lcore_config[i].thread_id,
 						thread_name);
 		if (ret != 0)
-			RTE_LOG(ERR, EAL,
+			RTE_LOG(DEBUG, EAL,
 				"Cannot set name for lcore thread\n");
 	}
 
diff --git a/lib/librte_eal/linuxapp/eal/eal_debug.c b/lib/librte_eal/linuxapp/eal/eal_debug.c
index 907fbfa7..5fbc17c5 100644
--- a/lib/librte_eal/linuxapp/eal/eal_debug.c
+++ b/lib/librte_eal/linuxapp/eal/eal_debug.c
@@ -77,9 +77,6 @@ void __rte_panic(const char *funcname, const char *format, ...)
 {
 	va_list ap;
 
-	/* disable history */
-	rte_log_set_history(0);
-
 	rte_log(RTE_LOG_CRIT, RTE_LOGTYPE_EAL, "PANIC in %s():\n", funcname);
 	va_start(ap, format);
 	rte_vlog(RTE_LOG_CRIT, RTE_LOGTYPE_EAL, format, ap);
@@ -98,9 +95,6 @@ rte_exit(int exit_code, const char *format, ...)
 {
 	va_list ap;
 
-	/* disable history */
-	rte_log_set_history(0);
-
 	if (exit_code != 0)
 		RTE_LOG(CRIT, EAL, "Error - exiting with code: %d\n"
 				"  Cause: ", exit_code);
diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
index 06b26a9e..47a3b20a 100644
--- a/lib/librte_eal/linuxapp/eal/eal_interrupts.c
+++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
@@ -57,10 +57,8 @@
 #include <rte_lcore.h>
 #include <rte_atomic.h>
 #include <rte_branch_prediction.h>
-#include <rte_ring.h>
 #include <rte_debug.h>
 #include <rte_log.h>
-#include <rte_mempool.h>
 #include <rte_pci.h>
 #include <rte_malloc.h>
 #include <rte_errno.h>
@@ -889,7 +887,7 @@ rte_eal_intr_init(void)
 			"eal-intr-thread");
 		ret_1 = rte_thread_setname(intr_thread, thread_name);
 		if (ret_1 != 0)
-			RTE_LOG(ERR, EAL,
+			RTE_LOG(DEBUG, EAL,
 			"Failed to set thread name for interrupt handling\n");
 	}
 
diff --git a/lib/librte_eal/linuxapp/eal/eal_ivshmem.c b/lib/librte_eal/linuxapp/eal/eal_ivshmem.c
index 07aec694..67b3caf2 100644
--- a/lib/librte_eal/linuxapp/eal/eal_ivshmem.c
+++ b/lib/librte_eal/linuxapp/eal/eal_ivshmem.c
@@ -49,7 +49,6 @@
 #include <rte_string_fns.h>
 #include <rte_errno.h>
 #include <rte_ring.h>
-#include <rte_mempool.h>
 #include <rte_malloc.h>
 #include <rte_common.h>
 #include <rte_ivshmem.h>
@@ -184,21 +183,21 @@ overlap(const struct rte_memzone * mz1, const struct rte_memzone * mz2)
 	i_end2 = mz2->ioremap_addr + mz2->len;
 
 	/* check for overlap in virtual addresses */
-	if (start1 > start2 && start1 < end2)
+	if (start1 >= start2 && start1 < end2)
 		result |= VIRT;
 	if (start2 >= start1 && start2 < end1)
 		result |= VIRT;
 
 	/* check for overlap in physical addresses */
-	if (p_start1 > p_start2 && p_start1 < p_end2)
+	if (p_start1 >= p_start2 && p_start1 < p_end2)
 		result |= PHYS;
-	if (p_start2 > p_start1 && p_start2 < p_end1)
+	if (p_start2 >= p_start1 && p_start2 < p_end1)
 		result |= PHYS;
 
 	/* check for overlap in ioremap addresses */
-	if (i_start1 > i_start2 && i_start1 < i_end2)
+	if (i_start1 >= i_start2 && i_start1 < i_end2)
 		result |= IOREMAP;
-	if (i_start2 > i_start1 && i_start2 < i_end1)
+	if (i_start2 >= i_start1 && i_start2 < i_end1)
 		result |= IOREMAP;
 
 	return result;
diff --git a/lib/librte_eal/linuxapp/eal/eal_log.c b/lib/librte_eal/linuxapp/eal/eal_log.c
index 0b133c3e..d3911004 100644
--- a/lib/librte_eal/linuxapp/eal/eal_log.c
+++ b/lib/librte_eal/linuxapp/eal/eal_log.c
@@ -50,8 +50,7 @@
 #include "eal_private.h"
 
 /*
- * default log function, used once mempool (hence log history) is
- * available
+ * default log function
  */
 static ssize_t
 console_log_write(__attribute__((unused)) void *c, const char *buf, size_t size)
@@ -60,9 +59,6 @@ console_log_write(__attribute__((unused)) void *c, const char *buf, size_t size)
 	ssize_t ret;
 	uint32_t loglevel;
 
-	/* add this log in history */
-	rte_log_add_in_history(buf, size);
-
 	/* write on stdout */
 	ret = fwrite(buf, 1, size, stdout);
 	fflush(stdout);
@@ -110,8 +106,7 @@ rte_eal_log_init(const char *id, int facility)
 /* early logs */
 
 /*
- * early log function, used during boot when mempool (hence log
- * history) is not available
+ * early log function, used before rte_eal_log_init
  */
 static ssize_t
 early_log_write(__attribute__((unused)) void *c, const char *buf, size_t size)
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
index 5b9132c6..5578c254 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -80,6 +80,8 @@
 #include <errno.h>
 #include <sys/ioctl.h>
 #include <sys/time.h>
+#include <signal.h>
+#include <setjmp.h>
 
 #include <rte_log.h>
 #include <rte_memory.h>
@@ -309,6 +311,22 @@ get_virtual_area(size_t *size, size_t hugepage_sz)
 	return addr;
 }
 
+static sigjmp_buf huge_jmpenv;
+
+static void huge_sigbus_handler(int signo __rte_unused)
+{
+	siglongjmp(huge_jmpenv, 1);
+}
+
+/* Put setjmp into a wrap method to avoid compiling error. Any non-volatile,
+ * non-static local variable in the stack frame calling sigsetjmp might be
+ * clobbered by a call to longjmp.
+ */
+static int huge_wrap_sigsetjmp(void)
+{
+	return sigsetjmp(huge_jmpenv, 1);
+}
+
 /*
  * Mmap all hugepages of hugepage table: it first open a file in
  * hugetlbfs, then mmap() hugepage_sz data in it. If orig is set, the
@@ -316,7 +334,7 @@ get_virtual_area(size_t *size, size_t hugepage_sz)
  * in hugepg_tbl[i].final_va. The second mapping (when orig is 0) tries to
  * map continguous physical blocks in contiguous virtual blocks.
  */
-static int
+static unsigned
 map_all_hugepages(struct hugepage_file *hugepg_tbl,
 		struct hugepage_info *hpi, int orig)
 {
@@ -394,9 +412,9 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl,
 		/* try to create hugepage file */
 		fd = open(hugepg_tbl[i].filepath, O_CREAT | O_RDWR, 0755);
 		if (fd < 0) {
-			RTE_LOG(ERR, EAL, "%s(): open failed: %s\n", __func__,
+			RTE_LOG(DEBUG, EAL, "%s(): open failed: %s\n", __func__,
 					strerror(errno));
-			return -1;
+			return i;
 		}
 
 		/* map the segment, and populate page tables,
@@ -404,10 +422,10 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl,
 		virtaddr = mmap(vma_addr, hugepage_sz, PROT_READ | PROT_WRITE,
 				MAP_SHARED | MAP_POPULATE, fd, 0);
 		if (virtaddr == MAP_FAILED) {
-			RTE_LOG(ERR, EAL, "%s(): mmap failed: %s\n", __func__,
+			RTE_LOG(DEBUG, EAL, "%s(): mmap failed: %s\n", __func__,
 					strerror(errno));
 			close(fd);
-			return -1;
+			return i;
 		}
 
 		if (orig) {
@@ -417,12 +435,33 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl,
 			hugepg_tbl[i].final_va = virtaddr;
 		}
 
+		if (orig) {
+			/* In linux, hugetlb limitations, like cgroup, are
+			 * enforced at fault time instead of mmap(), even
+			 * with the option of MAP_POPULATE. Kernel will send
+			 * a SIGBUS signal. To avoid to be killed, save stack
+			 * environment here, if SIGBUS happens, we can jump
+			 * back here.
+			 */
+			if (huge_wrap_sigsetjmp()) {
+				RTE_LOG(DEBUG, EAL, "SIGBUS: Cannot mmap more "
+					"hugepages of size %u MB\n",
+					(unsigned)(hugepage_sz / 0x100000));
+				munmap(virtaddr, hugepage_sz);
+				close(fd);
+				unlink(hugepg_tbl[i].filepath);
+				return i;
+			}
+			*(int *)virtaddr = 0;
+		}
+
+
 		/* set shared flock on the file. */
 		if (flock(fd, LOCK_SH | LOCK_NB) == -1) {
-			RTE_LOG(ERR, EAL, "%s(): Locking file failed:%s \n",
+			RTE_LOG(DEBUG, EAL, "%s(): Locking file failed:%s \n",
 				__func__, strerror(errno));
 			close(fd);
-			return -1;
+			return i;
 		}
 
 		close(fd);
@@ -430,7 +469,8 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl,
 		vma_addr = (char *)vma_addr + hugepage_sz;
 		vma_len -= hugepage_sz;
 	}
-	return 0;
+
+	return i;
 }
 
 #ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
@@ -1036,6 +1076,51 @@ calc_num_pages_per_socket(uint64_t * memory,
 	return total_num_pages;
 }
 
+static inline size_t
+eal_get_hugepage_mem_size(void)
+{
+	uint64_t size = 0;
+	unsigned i, j;
+
+	for (i = 0; i < internal_config.num_hugepage_sizes; i++) {
+		struct hugepage_info *hpi = &internal_config.hugepage_info[i];
+		if (hpi->hugedir != NULL) {
+			for (j = 0; j < RTE_MAX_NUMA_NODES; j++) {
+				size += hpi->hugepage_sz * hpi->num_pages[j];
+			}
+		}
+	}
+
+	return (size < SIZE_MAX) ? (size_t)(size) : SIZE_MAX;
+}
+
+static struct sigaction huge_action_old;
+static int huge_need_recover;
+
+static void
+huge_register_sigbus(void)
+{
+	sigset_t mask;
+	struct sigaction action;
+
+	sigemptyset(&mask);
+	sigaddset(&mask, SIGBUS);
+	action.sa_flags = 0;
+	action.sa_mask = mask;
+	action.sa_handler = huge_sigbus_handler;
+
+	huge_need_recover = !sigaction(SIGBUS, &action, &huge_action_old);
+}
+
+static void
+huge_recover_sigbus(void)
+{
+	if (huge_need_recover) {
+		sigaction(SIGBUS, &huge_action_old, NULL);
+		huge_need_recover = 0;
+	}
+}
+
 /*
  * Prepare physical memory mapping: fill configuration structure with
  * these infos, return 0 on success.
@@ -1122,8 +1207,11 @@ rte_eal_hugepage_init(void)
 
 	hp_offset = 0; /* where we start the current page size entries */
 
+	huge_register_sigbus();
+
 	/* map all hugepages and sort them */
 	for (i = 0; i < (int)internal_config.num_hugepage_sizes; i ++){
+		unsigned pages_old, pages_new;
 		struct hugepage_info *hpi;
 
 		/*
@@ -1137,10 +1225,28 @@ rte_eal_hugepage_init(void)
 			continue;
 
 		/* map all hugepages available */
-		if (map_all_hugepages(&tmp_hp[hp_offset], hpi, 1) < 0){
-			RTE_LOG(DEBUG, EAL, "Failed to mmap %u MB hugepages\n",
-					(unsigned)(hpi->hugepage_sz / 0x100000));
+		pages_old = hpi->num_pages[0];
+		pages_new = map_all_hugepages(&tmp_hp[hp_offset], hpi, 1);
+		if (pages_new < pages_old) {
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+			RTE_LOG(ERR, EAL,
+				"%d not %d hugepages of size %u MB allocated\n",
+				pages_new, pages_old,
+				(unsigned)(hpi->hugepage_sz / 0x100000));
 			goto fail;
+#else
+			RTE_LOG(DEBUG, EAL,
+				"%d not %d hugepages of size %u MB allocated\n",
+				pages_new, pages_old,
+				(unsigned)(hpi->hugepage_sz / 0x100000));
+
+			int pages = pages_old - pages_new;
+
+			nr_hugepages -= pages;
+			hpi->num_pages[0] = pages_new;
+			if (pages_new == 0)
+				continue;
+#endif
 		}
 
 		/* find physical addresses and sockets for each hugepage */
@@ -1172,8 +1278,9 @@ rte_eal_hugepage_init(void)
 		hp_offset += new_pages_count[i];
 #else
 		/* remap all hugepages */
-		if (map_all_hugepages(&tmp_hp[hp_offset], hpi, 0) < 0){
-			RTE_LOG(DEBUG, EAL, "Failed to remap %u MB pages\n",
+		if (map_all_hugepages(&tmp_hp[hp_offset], hpi, 0) !=
+		    hpi->num_pages[0]) {
+			RTE_LOG(ERR, EAL, "Failed to remap %u MB pages\n",
 					(unsigned)(hpi->hugepage_sz / 0x100000));
 			goto fail;
 		}
@@ -1187,6 +1294,11 @@ rte_eal_hugepage_init(void)
 #endif
 	}
 
+	huge_recover_sigbus();
+
+	if (internal_config.memory == 0 && internal_config.force_sockets == 0)
+		internal_config.memory = eal_get_hugepage_mem_size();
+
 #ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
 	nr_hugefiles = 0;
 	for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) {
@@ -1373,6 +1485,7 @@ rte_eal_hugepage_init(void)
 	return 0;
 
 fail:
+	huge_recover_sigbus();
 	free(tmp_hp);
 	return -1;
 }
@@ -1399,7 +1512,7 @@ int
 rte_eal_hugepage_attach(void)
 {
 	const struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
-	const struct hugepage_file *hp = NULL;
+	struct hugepage_file *hp = NULL;
 	unsigned num_hp = 0;
 	unsigned i, s = 0; /* s used to track the segment number */
 	off_t size;
@@ -1417,7 +1530,7 @@ rte_eal_hugepage_attach(void)
 	if (internal_config.xen_dom0_support) {
 #ifdef RTE_LIBRTE_XEN_DOM0
 		if (rte_xen_dom0_memory_attach() < 0) {
-			RTE_LOG(ERR, EAL,"Failed to attach memory setments of primay "
+			RTE_LOG(ERR, EAL, "Failed to attach memory segments of primary "
 					"process\n");
 			return -1;
 		}
@@ -1481,7 +1594,7 @@ rte_eal_hugepage_attach(void)
 
 	size = getFileSize(fd_hugepage);
 	hp = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd_hugepage, 0);
-	if (hp == NULL) {
+	if (hp == MAP_FAILED) {
 		RTE_LOG(ERR, EAL, "Could not mmap %s\n", eal_hugepage_info_path());
 		goto error;
 	}
@@ -1545,12 +1658,19 @@ rte_eal_hugepage_attach(void)
 		s++;
 	}
 	/* unmap the hugepage config file, since we are done using it */
-	munmap((void *)(uintptr_t)hp, size);
+	munmap(hp, size);
 	close(fd_zero);
 	close(fd_hugepage);
 	return 0;
 
 error:
+	s = 0;
+	while (s < RTE_MAX_MEMSEG && mcfg->memseg[s].len > 0) {
+		munmap(mcfg->memseg[s].addr, mcfg->memseg[s].len);
+		s++;
+	}
+	if (hp != NULL && hp != MAP_FAILED)
+		munmap(hp, size);
 	if (fd_zero >= 0)
 		close(fd_zero);
 	if (fd_hugepage >= 0)
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c
index dbf12a84..f9c3efd2 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -66,8 +66,8 @@ pci_unbind_kernel_driver(struct rte_pci_device *dev)
 
 	/* open /sys/bus/pci/devices/AAAA:BB:CC.D/driver */
 	snprintf(filename, sizeof(filename),
-	         SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/driver/unbind",
-	         loc->domain, loc->bus, loc->devid, loc->function);
+		"%s/" PCI_PRI_FMT "/driver/unbind", pci_get_sysfs_path(),
+		loc->domain, loc->bus, loc->devid, loc->function);
 
 	f = fopen(filename, "w");
 	if (f == NULL) /* device was not bound */
@@ -190,12 +190,13 @@ pci_find_max_end_va(void)
 	return RTE_PTR_ADD(last->addr, last->len);
 }
 
-/* parse the "resource" sysfs file */
-static int
-pci_parse_sysfs_resource(const char *filename, struct rte_pci_device *dev)
+/* parse one line of the "resource" sysfs file (note that the 'line'
+ * string is modified)
+ */
+int
+pci_parse_one_sysfs_resource(char *line, size_t len, uint64_t *phys_addr,
+	uint64_t *end_addr, uint64_t *flags)
 {
-	FILE *f;
-	char buf[BUFSIZ];
 	union pci_resource_info {
 		struct {
 			char *phys_addr;
@@ -204,6 +205,31 @@ pci_parse_sysfs_resource(const char *filename, struct rte_pci_device *dev)
 		};
 		char *ptrs[PCI_RESOURCE_FMT_NVAL];
 	} res_info;
+
+	if (rte_strsplit(line, len, res_info.ptrs, 3, ' ') != 3) {
+		RTE_LOG(ERR, EAL,
+			"%s(): bad resource format\n", __func__);
+		return -1;
+	}
+	errno = 0;
+	*phys_addr = strtoull(res_info.phys_addr, NULL, 16);
+	*end_addr = strtoull(res_info.end_addr, NULL, 16);
+	*flags = strtoull(res_info.flags, NULL, 16);
+	if (errno != 0) {
+		RTE_LOG(ERR, EAL,
+			"%s(): bad resource format\n", __func__);
+		return -1;
+	}
+
+	return 0;
+}
+
+/* parse the "resource" sysfs file */
+static int
+pci_parse_sysfs_resource(const char *filename, struct rte_pci_device *dev)
+{
+	FILE *f;
+	char buf[BUFSIZ];
 	int i;
 	uint64_t phys_addr, end_addr, flags;
 
@@ -220,21 +246,9 @@ pci_parse_sysfs_resource(const char *filename, struct rte_pci_device *dev)
 				"%s(): cannot read resource\n", __func__);
 			goto error;
 		}
-
-		if (rte_strsplit(buf, sizeof(buf), res_info.ptrs, 3, ' ') != 3) {
-			RTE_LOG(ERR, EAL,
-				"%s(): bad resource format\n", __func__);
+		if (pci_parse_one_sysfs_resource(buf, sizeof(buf), &phys_addr,
+				&end_addr, &flags) < 0)
 			goto error;
-		}
-		errno = 0;
-		phys_addr = strtoull(res_info.phys_addr, NULL, 16);
-		end_addr = strtoull(res_info.end_addr, NULL, 16);
-		flags = strtoull(res_info.flags, NULL, 16);
-		if (errno != 0) {
-			RTE_LOG(ERR, EAL,
-				"%s(): bad resource format\n", __func__);
-			goto error;
-		}
 
 		if (flags & IORESOURCE_MEM) {
 			dev->mem_resource[i].phys_addr = phys_addr;
@@ -306,6 +320,16 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus,
 	}
 	dev->id.subsystem_device_id = (uint16_t)tmp;
 
+	/* get class_id */
+	snprintf(filename, sizeof(filename), "%s/class",
+		 dirname);
+	if (eal_parse_sysfs_value(filename, &tmp) < 0) {
+		free(dev);
+		return -1;
+	}
+	/* the least 24 bits are valid: class, subclass, program interface */
+	dev->id.class_id = (uint32_t)tmp & RTE_CLASS_ANY_ID;
+
 	/* get max_vfs */
 	dev->max_vfs = 0;
 	snprintf(filename, sizeof(filename), "%s/max_vfs", dirname);
@@ -453,7 +477,7 @@ rte_eal_pci_scan(void)
 	uint16_t domain;
 	uint8_t bus, devid, function;
 
-	dir = opendir(SYSFS_PCI_DEVICES);
+	dir = opendir(pci_get_sysfs_path());
 	if (dir == NULL) {
 		RTE_LOG(ERR, EAL, "%s(): opendir failed: %s\n",
 			__func__, strerror(errno));
@@ -468,8 +492,8 @@ rte_eal_pci_scan(void)
 				&bus, &devid, &function) != 0)
 			continue;
 
-		snprintf(dirname, sizeof(dirname), "%s/%s", SYSFS_PCI_DEVICES,
-			 e->d_name);
+		snprintf(dirname, sizeof(dirname), "%s/%s",
+				pci_get_sysfs_path(), e->d_name);
 		if (pci_scan_one(dirname, domain, bus, devid, function) < 0)
 			goto error;
 	}
@@ -481,18 +505,6 @@ error:
 	return -1;
 }
 
-#ifdef RTE_PCI_CONFIG
-/*
- * It is deprecated, all its configurations have been moved into
- * each PMD respectively.
- */
-void
-pci_config_space_set(__rte_unused struct rte_pci_device *dev)
-{
-	RTE_LOG(DEBUG, EAL, "Nothing here, as it is deprecated\n");
-}
-#endif
-
 /* Read PCI config space. */
 int rte_eal_pci_read_config(const struct rte_pci_device *device,
 			    void *buf, size_t len, off_t offset)
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_init.h b/lib/librte_eal/linuxapp/eal/eal_pci_init.h
index 7011753d..f72a2548 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci_init.h
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_init.h
@@ -36,12 +36,22 @@
 
 #include "eal_vfio.h"
 
+/** IO resource type: */
+#define IORESOURCE_IO         0x00000100
+#define IORESOURCE_MEM        0x00000200
+
 /*
  * Helper function to map PCI resources right after hugepages in virtual memory
  */
 extern void *pci_map_addr;
 void *pci_find_max_end_va(void);
 
+/* parse one line of the "resource" sysfs file (note that the 'line'
+ * string is modified)
+ */
+int pci_parse_one_sysfs_resource(char *line, size_t len, uint64_t *phys_addr,
+	uint64_t *end_addr, uint64_t *flags);
+
 int pci_uio_alloc_resource(struct rte_pci_device *dev,
 		struct mapped_pci_resource **uio_res);
 void pci_uio_free_resource(struct rte_pci_device *dev,
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
index 068694dc..1786b754 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
@@ -35,6 +35,7 @@
 #include <unistd.h>
 #include <fcntl.h>
 #include <dirent.h>
+#include <inttypes.h>
 #include <sys/stat.h>
 #include <sys/mman.h>
 #include <linux/pci_regs.h>
@@ -161,14 +162,14 @@ pci_get_uio_dev(struct rte_pci_device *dev, char *dstbuf,
 	 * or uio:uioX */
 
 	snprintf(dirname, sizeof(dirname),
-			SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/uio",
+			"%s/" PCI_PRI_FMT "/uio", pci_get_sysfs_path(),
 			loc->domain, loc->bus, loc->devid, loc->function);
 
 	dir = opendir(dirname);
 	if (dir == NULL) {
 		/* retry with the parent directory */
 		snprintf(dirname, sizeof(dirname),
-				SYSFS_PCI_DEVICES "/" PCI_PRI_FMT,
+				"%s/" PCI_PRI_FMT, pci_get_sysfs_path(),
 				loc->domain, loc->bus, loc->devid, loc->function);
 		dir = opendir(dirname);
 
@@ -309,7 +310,7 @@ pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx,
 		struct mapped_pci_resource *uio_res, int map_idx)
 {
 	int fd;
-	char devname[PATH_MAX]; /* contains the /dev/uioX */
+	char devname[PATH_MAX];
 	void *mapaddr;
 	struct rte_pci_addr *loc;
 	struct pci_map *maps;
@@ -319,7 +320,8 @@ pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx,
 
 	/* update devname for mmap  */
 	snprintf(devname, sizeof(devname),
-			SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/resource%d",
+			"%s/" PCI_PRI_FMT "/resource%d",
+			pci_get_sysfs_path(),
 			loc->domain, loc->bus, loc->devid,
 			loc->function, res_idx);
 
@@ -368,11 +370,11 @@ error:
 	return -1;
 }
 
+#if defined(RTE_ARCH_X86)
 int
 pci_uio_ioport_map(struct rte_pci_device *dev, int bar,
 		   struct rte_pci_ioport *p)
 {
-#if defined(RTE_ARCH_X86)
 	char dirname[PATH_MAX];
 	char filename[PATH_MAX];
 	int uio_num;
@@ -411,81 +413,154 @@ pci_uio_ioport_map(struct rte_pci_device *dev, int bar,
 	RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%lx\n", start);
 
 	p->base = start;
+	p->len = 0;
 	return 0;
+}
 #else
-	RTE_SET_USED(dev);
-	RTE_SET_USED(bar);
-	RTE_SET_USED(p);
+int
+pci_uio_ioport_map(struct rte_pci_device *dev, int bar,
+		   struct rte_pci_ioport *p)
+{
+	FILE *f;
+	char buf[BUFSIZ];
+	char filename[PATH_MAX];
+	uint64_t phys_addr, end_addr, flags;
+	int fd, i;
+	void *addr;
+
+	/* open and read addresses of the corresponding resource in sysfs */
+	snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT "/resource",
+		pci_get_sysfs_path(), dev->addr.domain, dev->addr.bus,
+		dev->addr.devid, dev->addr.function);
+	f = fopen(filename, "r");
+	if (f == NULL) {
+		RTE_LOG(ERR, EAL, "Cannot open sysfs resource: %s\n",
+			strerror(errno));
+		return -1;
+	}
+	for (i = 0; i < bar + 1; i++) {
+		if (fgets(buf, sizeof(buf), f) == NULL) {
+			RTE_LOG(ERR, EAL, "Cannot read sysfs resource\n");
+			goto error;
+		}
+	}
+	if (pci_parse_one_sysfs_resource(buf, sizeof(buf), &phys_addr,
+			&end_addr, &flags) < 0)
+		goto error;
+	if ((flags & IORESOURCE_IO) == 0) {
+		RTE_LOG(ERR, EAL, "BAR %d is not an IO resource\n", bar);
+		goto error;
+	}
+	snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT "/resource%d",
+		pci_get_sysfs_path(), dev->addr.domain, dev->addr.bus,
+		dev->addr.devid, dev->addr.function, bar);
+
+	/* mmap the pci resource */
+	fd = open(filename, O_RDWR);
+	if (fd < 0) {
+		RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", filename,
+			strerror(errno));
+		goto error;
+	}
+	addr = mmap(NULL, end_addr + 1, PROT_READ | PROT_WRITE,
+		MAP_SHARED, fd, 0);
+	close(fd);
+	if (addr == MAP_FAILED) {
+		RTE_LOG(ERR, EAL, "Cannot mmap IO port resource: %s\n",
+			strerror(errno));
+		goto error;
+	}
+
+	/* strangely, the base address is mmap addr + phys_addr */
+	p->base = (uintptr_t)addr + phys_addr;
+	p->len = end_addr + 1;
+	RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%"PRIx64"\n", p->base);
+	fclose(f);
+
+	return 0;
+
+error:
+	fclose(f);
 	return -1;
-#endif
 }
+#endif
 
 void
 pci_uio_ioport_read(struct rte_pci_ioport *p,
 		    void *data, size_t len, off_t offset)
 {
-#if defined(RTE_ARCH_X86)
 	uint8_t *d;
 	int size;
-	unsigned short reg = p->base + offset;
+	uintptr_t reg = p->base + offset;
 
 	for (d = data; len > 0; d += size, reg += size, len -= size) {
 		if (len >= 4) {
 			size = 4;
+#if defined(RTE_ARCH_X86)
 			*(uint32_t *)d = inl(reg);
+#else
+			*(uint32_t *)d = *(volatile uint32_t *)reg;
+#endif
 		} else if (len >= 2) {
 			size = 2;
+#if defined(RTE_ARCH_X86)
 			*(uint16_t *)d = inw(reg);
+#else
+			*(uint16_t *)d = *(volatile uint16_t *)reg;
+#endif
 		} else {
 			size = 1;
+#if defined(RTE_ARCH_X86)
 			*d = inb(reg);
-		}
-	}
 #else
-	RTE_SET_USED(p);
-	RTE_SET_USED(data);
-	RTE_SET_USED(len);
-	RTE_SET_USED(offset);
+			*d = *(volatile uint8_t *)reg;
 #endif
+		}
+	}
 }
 
 void
 pci_uio_ioport_write(struct rte_pci_ioport *p,
 		     const void *data, size_t len, off_t offset)
 {
-#if defined(RTE_ARCH_X86)
 	const uint8_t *s;
 	int size;
-	unsigned short reg = p->base + offset;
+	uintptr_t reg = p->base + offset;
 
 	for (s = data; len > 0; s += size, reg += size, len -= size) {
 		if (len >= 4) {
 			size = 4;
+#if defined(RTE_ARCH_X86)
 			outl_p(*(const uint32_t *)s, reg);
+#else
+			*(volatile uint32_t *)reg = *(const uint32_t *)s;
+#endif
 		} else if (len >= 2) {
 			size = 2;
+#if defined(RTE_ARCH_X86)
 			outw_p(*(const uint16_t *)s, reg);
+#else
+			*(volatile uint16_t *)reg = *(const uint16_t *)s;
+#endif
 		} else {
 			size = 1;
+#if defined(RTE_ARCH_X86)
 			outb_p(*s, reg);
-		}
-	}
 #else
-	RTE_SET_USED(p);
-	RTE_SET_USED(data);
-	RTE_SET_USED(len);
-	RTE_SET_USED(offset);
+			*(volatile uint8_t *)reg = *s;
 #endif
+		}
+	}
 }
 
 int
 pci_uio_ioport_unmap(struct rte_pci_ioport *p)
 {
-	RTE_SET_USED(p);
 #if defined(RTE_ARCH_X86)
+	RTE_SET_USED(p);
 	/* FIXME close intr fd ? */
 	return 0;
 #else
-	return -1;
+	return munmap((void *)(uintptr_t)p->base, p->len);
 #endif
 }
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
index 10266f8f..f91b9242 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
@@ -602,7 +602,7 @@ pci_vfio_get_group_no(const char *pci_addr, int *iommu_group_no)
 
 	/* try to find out IOMMU group for this device */
 	snprintf(linkname, sizeof(linkname),
-			 SYSFS_PCI_DEVICES "/%s/iommu_group", pci_addr);
+			 "%s/%s/iommu_group", pci_get_sysfs_path(), pci_addr);
 
 	ret = readlink(linkname, filename, sizeof(filename));
 
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c
index d9188fde..d54ded88 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c
@@ -287,7 +287,10 @@ pci_vfio_mp_sync_thread(void __rte_unused * arg)
 		struct linger l;
 		l.l_onoff = 1;
 		l.l_linger = 60;
-		setsockopt(conn_sock, SOL_SOCKET, SO_LINGER, &l, sizeof(l));
+
+		if (setsockopt(conn_sock, SOL_SOCKET, SO_LINGER, &l, sizeof(l)) < 0)
+			RTE_LOG(WARNING, EAL, "Cannot set SO_LINGER option "
+					"on listen socket (%s)\n", strerror(errno));
 
 		ret = vfio_mp_sync_receive_request(conn_sock);
 
@@ -396,7 +399,7 @@ pci_vfio_mp_sync_setup(void)
 	snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "pci-vfio-sync");
 	ret = rte_thread_setname(socket_thread, thread_name);
 	if (ret)
-		RTE_LOG(ERR, EAL,
+		RTE_LOG(DEBUG, EAL,
 			"Failed to set thread name for secondary processes!\n");
 
 	return 0;
diff --git a/lib/librte_eal/linuxapp/eal/eal_thread.c b/lib/librte_eal/linuxapp/eal/eal_thread.c
index 18bd8e04..9f88530e 100644
--- a/lib/librte_eal/linuxapp/eal/eal_thread.c
+++ b/lib/librte_eal/linuxapp/eal/eal_thread.c
@@ -197,3 +197,16 @@ int rte_sys_gettid(void)
 {
 	return (int)syscall(SYS_gettid);
 }
+
+int rte_thread_setname(pthread_t id, const char *name)
+{
+	int ret = -1;
+#if defined(__GLIBC__) && defined(__GLIBC_PREREQ)
+#if __GLIBC_PREREQ(2, 12)
+	ret = pthread_setname_np(id, name);
+#endif
+#endif
+	RTE_SET_USED(id);
+	RTE_SET_USED(name);
+	return ret;
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_timer.c b/lib/librte_eal/linuxapp/eal/eal_timer.c
index f2abb7b6..afa32f5c 100644
--- a/lib/librte_eal/linuxapp/eal/eal_timer.c
+++ b/lib/librte_eal/linuxapp/eal/eal_timer.c
@@ -222,8 +222,8 @@ rte_eal_hpet_init(int make_default)
 	snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "hpet-msb-inc");
 	ret = rte_thread_setname(msb_inc_thread_id, thread_name);
 	if (ret != 0)
-		RTE_LOG(ERR, EAL,
-			"ERROR: Cannot set HPET timer thread name!\n");
+		RTE_LOG(DEBUG, EAL,
+			"Cannot set HPET timer thread name!\n");
 
 	if (make_default)
 		eal_timer_source = EAL_TIMER_HPET;
diff --git a/lib/librte_eal/linuxapp/eal/eal_xen_memory.c b/lib/librte_eal/linuxapp/eal/eal_xen_memory.c
index 495eef9e..0b612bb1 100644
--- a/lib/librte_eal/linuxapp/eal/eal_xen_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_xen_memory.c
@@ -156,13 +156,27 @@ get_xen_memory_size(void)
  * Based on physical address to caculate MFN in Xen Dom0.
  */
 phys_addr_t
-rte_xen_mem_phy2mch(uint32_t memseg_id, const phys_addr_t phy_addr)
+rte_xen_mem_phy2mch(int32_t memseg_id, const phys_addr_t phy_addr)
 {
-	int mfn_id;
+	int mfn_id, i;
 	uint64_t mfn, mfn_offset;
 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
 	struct rte_memseg *memseg = mcfg->memseg;
 
+	/* find the memory segment owning the physical address */
+	if (memseg_id == -1) {
+		for (i = 0; i < RTE_MAX_MEMSEG; i++) {
+			if ((phy_addr >= memseg[i].phys_addr) &&
+					(phys_addr < memseg[i].phys_addr +
+						memseg[i].size)) {
+				memseg_id = i;
+				break;
+			}
+		}
+		if (memseg_id == -1)
+			return RTE_BAD_PHYS_ADDR;
+	}
+
 	mfn_id = (phy_addr - memseg[memseg_id].phys_addr) / RTE_PGSIZE_2M;
 
 	/*the MFN is contiguous in 2M */
diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
index 7e5e5984..2acdfd9b 100644
--- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
+++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
@@ -113,7 +113,9 @@ struct rte_kni_mbuf {
 	void *buf_addr __attribute__((__aligned__(RTE_CACHE_LINE_SIZE)));
 	char pad0[10];
 	uint16_t data_off;      /**< Start address of data in segment buffer. */
-	char pad1[4];
+	char pad1[2];
+	uint8_t nb_segs;        /**< Number of segments. */
+	char pad4[1];
 	uint64_t ol_flags;      /**< Offload features. */
 	char pad2[4];
 	uint32_t pkt_len;       /**< Total pkt len: sum of all segment data_len. */
diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
index 12503efa..05134673 100644
--- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
+++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
@@ -154,3 +154,13 @@ DPDK_16.04 {
 	rte_eal_primary_proc_alive;
 
 } DPDK_2.2;
+
+DPDK_16.07 {
+	global:
+
+	pci_get_sysfs_path;
+	rte_keepalive_mark_sleep;
+	rte_keepalive_register_relay_callback;
+	rte_thread_setname;
+
+} DPDK_16.04;
diff --git a/lib/librte_eal/linuxapp/igb_uio/compat.h b/lib/librte_eal/linuxapp/igb_uio/compat.h
index c1d45a66..0d781e48 100644
--- a/lib/librte_eal/linuxapp/igb_uio/compat.h
+++ b/lib/librte_eal/linuxapp/igb_uio/compat.h
@@ -24,6 +24,15 @@
 #define   PCI_MSIX_ENTRY_CTRL_MASKBIT   1
 #endif
 
+/*
+ * for kernels < 2.6.38 and backported patch that moves MSI-X entry definition
+ * to pci_regs.h Those kernels has PCI_MSIX_ENTRY_SIZE defined but not
+ * PCI_MSIX_ENTRY_CTRL_MASKBIT
+ */
+#ifndef PCI_MSIX_ENTRY_CTRL_MASKBIT
+#define PCI_MSIX_ENTRY_CTRL_MASKBIT    1
+#endif
+
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 34) && \
 	(!(defined(RHEL_RELEASE_CODE) && \
 	 RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5, 9)))
diff --git a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c
index 72b26923..45a5720e 100644
--- a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c
+++ b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c
@@ -81,62 +81,10 @@ store_max_vfs(struct device *dev, struct device_attribute *attr,
 	return err ? err : count;
 }
 
-#ifdef RTE_PCI_CONFIG
-static ssize_t
-show_extended_tag(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	dev_info(dev, "Deprecated\n");
-
-	return 0;
-}
-
-static ssize_t
-store_extended_tag(struct device *dev,
-		   struct device_attribute *attr,
-		   const char *buf,
-		   size_t count)
-{
-	dev_info(dev, "Deprecated\n");
-
-	return 0;
-}
-
-static ssize_t
-show_max_read_request_size(struct device *dev,
-			   struct device_attribute *attr,
-			   char *buf)
-{
-	dev_info(dev, "Deprecated\n");
-
-	return 0;
-}
-
-static ssize_t
-store_max_read_request_size(struct device *dev,
-			    struct device_attribute *attr,
-			    const char *buf,
-			    size_t count)
-{
-	dev_info(dev, "Deprecated\n");
-
-	return 0;
-}
-#endif
-
 static DEVICE_ATTR(max_vfs, S_IRUGO | S_IWUSR, show_max_vfs, store_max_vfs);
-#ifdef RTE_PCI_CONFIG
-static DEVICE_ATTR(extended_tag, S_IRUGO | S_IWUSR, show_extended_tag,
-	store_extended_tag);
-static DEVICE_ATTR(max_read_request_size, S_IRUGO | S_IWUSR,
-	show_max_read_request_size, store_max_read_request_size);
-#endif
 
 static struct attribute *dev_attrs[] = {
 	&dev_attr_max_vfs.attr,
-#ifdef RTE_PCI_CONFIG
-	&dev_attr_extended_tag.attr,
-	&dev_attr_max_read_request_size.attr,
-#endif
 	NULL,
 };
 
diff --git a/lib/librte_eal/linuxapp/kni/Makefile b/lib/librte_eal/linuxapp/kni/Makefile
index ac99d3f1..8cc6b61c 100644
--- a/lib/librte_eal/linuxapp/kni/Makefile
+++ b/lib/librte_eal/linuxapp/kni/Makefile
@@ -47,7 +47,7 @@ MODULE_CFLAGS += -Wall -Werror
 ifeq ($(shell lsb_release -si 2>/dev/null),Ubuntu)
 MODULE_CFLAGS += -DUBUNTU_RELEASE_CODE=$(shell lsb_release -sr | tr -d .)
 UBUNTU_KERNEL_CODE := $(shell echo `grep UTS_RELEASE $(RTE_KERNELDIR)/include/generated/utsrelease.h \
-	 | cut -d '"' -f2 | cut -d- -f1,2 | tr .- $(comma)`,1)
+	 | cut -d '"' -f2 | cut -d- -f1,2 | tr .- ,`,1)
 MODULE_CFLAGS += -D"UBUNTU_KERNEL_CODE=UBUNTU_KERNEL_VERSION($(UBUNTU_KERNEL_CODE))"
 endif
 
diff --git a/lib/librte_eal/linuxapp/kni/compat.h b/lib/librte_eal/linuxapp/kni/compat.h
index cf100b67..647ba3ce 100644
--- a/lib/librte_eal/linuxapp/kni/compat.h
+++ b/lib/librte_eal/linuxapp/kni/compat.h
@@ -14,16 +14,27 @@
 
 #endif /* < 2.6.39 */
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35)
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 33)
+#define HAVE_SIMPLIFIED_PERNET_OPERATIONS
+#endif
 
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35)
 #define sk_sleep(s) (s)->sk_sleep
+#endif
 
-#endif /* < 2.6.35 */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
+#define HAVE_CHANGE_CARRIER_CB
+#endif
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0)
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)
 #define HAVE_IOV_ITER_MSGHDR
 #endif
 
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(4,1,0) )
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 1, 0)
 #define HAVE_KIOCB_MSG_PARAM
-#endif /* < 4.1.0 */
+#define HAVE_REBUILD_HEADER
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
+#define HAVE_TRANS_START_HELPER
+#endif
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c
index df224702..140a2a47 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c
@@ -3300,12 +3300,13 @@ s32 e1000_read_phy_reg_mphy(struct e1000_hw *hw, u32 address, u32 *data)
 	*data = E1000_READ_REG(hw, E1000_MPHY_DATA);
 
 	/* Disable access to mPHY if it was originally disabled */
-	if (locked)
+	if (locked) {
 		ready = e1000_is_mphy_ready(hw);
 		if (!ready)
 			return -E1000_ERR_PHY;
 		E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL,
 				E1000_MPHY_DIS_ACCESS);
+	}
 
 	return E1000_SUCCESS;
 }
@@ -3365,12 +3366,13 @@ s32 e1000_write_phy_reg_mphy(struct e1000_hw *hw, u32 address, u32 data,
 	E1000_WRITE_REG(hw, E1000_MPHY_DATA, data);
 
 	/* Disable access to mPHY if it was originally disabled */
-	if (locked)
+	if (locked) {
 		ready = e1000_is_mphy_ready(hw);
 		if (!ready)
 			return -E1000_ERR_PHY;
 		E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL,
 				E1000_MPHY_DIS_ACCESS);
+	}
 
 	return E1000_SUCCESS;
 }
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c
index 017dfe16..c6f4130d 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c
@@ -867,12 +867,13 @@ s32 ixgbe_setup_mac_link_82599(struct ixgbe_hw *hw,
 	    link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR_SGMII) {
 		/* Set KX4/KX/KR support according to speed requested */
 		autoc &= ~(IXGBE_AUTOC_KX4_KX_SUPP_MASK | IXGBE_AUTOC_KR_SUPP);
-		if (speed & IXGBE_LINK_SPEED_10GB_FULL)
+		if (speed & IXGBE_LINK_SPEED_10GB_FULL) {
 			if (orig_autoc & IXGBE_AUTOC_KX4_SUPP)
 				autoc |= IXGBE_AUTOC_KX4_SUPP;
 			if ((orig_autoc & IXGBE_AUTOC_KR_SUPP) &&
 			    (hw->phy.smart_speed_active == false))
 				autoc |= IXGBE_AUTOC_KR_SUPP;
+		}
 		if (speed & IXGBE_LINK_SPEED_1GB_FULL)
 			autoc |= IXGBE_AUTOC_KX_SUPP;
 	} else if ((pma_pmd_1g == IXGBE_AUTOC_1G_SFI) &&
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c
index 8c1d2fe3..92fc9fc7 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c
@@ -59,8 +59,6 @@
 #undef CONFIG_DCA_MODULE
 
 char ixgbe_driver_name[] = "ixgbe";
-static const char ixgbe_driver_string[] =
-			      "Intel(R) 10 Gigabit PCI Express Network Driver";
 #define DRV_HW_PERF
 
 #ifndef CONFIG_IXGBE_NAPI
@@ -79,8 +77,6 @@ static const char ixgbe_driver_string[] =
 #define DRV_VERSION	__stringify(MAJ) "." __stringify(MIN) "." \
 			__stringify(BUILD) DRIVERNAPI DRV_HW_PERF FPGA VMDQ_TAG
 const char ixgbe_driver_version[] = DRV_VERSION;
-static const char ixgbe_copyright[] =
-				"Copyright (c) 1999-2012 Intel Corporation.";
 
 /* ixgbe_pci_tbl - PCI Device ID Table
  *
diff --git a/lib/librte_eal/linuxapp/kni/kni_misc.c b/lib/librte_eal/linuxapp/kni/kni_misc.c
index ae8133f3..59d15ca6 100644
--- a/lib/librte_eal/linuxapp/kni/kni_misc.c
+++ b/lib/librte_eal/linuxapp/kni/kni_misc.c
@@ -26,6 +26,7 @@
 #include <linux/module.h>
 #include <linux/miscdevice.h>
 #include <linux/netdevice.h>
+#include <linux/etherdevice.h>
 #include <linux/pci.h>
 #include <linux/kthread.h>
 #include <linux/rwsem.h>
@@ -34,6 +35,8 @@
 #include <net/netns/generic.h>
 
 #include <exec-env/rte_kni_common.h>
+
+#include "compat.h"
 #include "kni_dev.h"
 
 MODULE_LICENSE("Dual BSD/GPL");
@@ -104,7 +107,7 @@ struct kni_net {
 
 static int __net_init kni_init_net(struct net *net)
 {
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32)
+#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
 	struct kni_net *knet = net_generic(net, kni_net_id);
 #else
 	struct kni_net *knet;
@@ -115,7 +118,7 @@ static int __net_init kni_init_net(struct net *net)
 		ret = -ENOMEM;
 		return ret;
 	}
-#endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) */
+#endif
 
 	/* Clear the bit of device in use */
 	clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use);
@@ -123,7 +126,7 @@ static int __net_init kni_init_net(struct net *net)
 	init_rwsem(&knet->kni_list_lock);
 	INIT_LIST_HEAD(&knet->kni_list_head);
 
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32)
+#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
 	return 0;
 #else
 	ret = net_assign_generic(net, kni_net_id, knet);
@@ -131,25 +134,25 @@ static int __net_init kni_init_net(struct net *net)
 		kfree(knet);
 
 	return ret;
-#endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) */
+#endif
 }
 
 static void __net_exit kni_exit_net(struct net *net)
 {
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 32)
+#ifndef HAVE_SIMPLIFIED_PERNET_OPERATIONS
 	struct kni_net *knet = net_generic(net, kni_net_id);
 
 	kfree(knet);
-#endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) */
+#endif
 }
 
 static struct pernet_operations kni_net_ops = {
 	.init = kni_init_net,
 	.exit = kni_exit_net,
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32)
+#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
 	.id   = &kni_net_id,
 	.size = sizeof(struct kni_net),
-#endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) */
+#endif
 };
 
 static int __init
@@ -164,11 +167,11 @@ kni_init(void)
 		return -EINVAL;
 	}
 
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32)
+#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
 	rc = register_pernet_subsys(&kni_net_ops);
 #else
 	rc = register_pernet_gen_subsys(&kni_net_id, &kni_net_ops);
-#endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) */
+#endif
 	if (rc)
 		return -EPERM;
 
@@ -186,11 +189,11 @@ kni_init(void)
 	return 0;
 
 out:
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32)
+#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
 	unregister_pernet_subsys(&kni_net_ops);
 #else
 	register_pernet_gen_subsys(&kni_net_id, &kni_net_ops);
-#endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) */
+#endif
 	return rc;
 }
 
@@ -198,11 +201,11 @@ static void __exit
 kni_exit(void)
 {
 	misc_deregister(&kni_misc);
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32)
+#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
 	unregister_pernet_subsys(&kni_net_ops);
 #else
 	register_pernet_gen_subsys(&kni_net_id, &kni_net_ops);
-#endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) */
+#endif
 	KNI_PRINT("####### DPDK kni module unloaded  #######\n");
 }
 
@@ -542,6 +545,15 @@ kni_ioctl_create(struct net *net,
 	if (pci)
 		pci_dev_put(pci);
 
+	if (kni->lad_dev)
+		memcpy(net_dev->dev_addr, kni->lad_dev->dev_addr, ETH_ALEN);
+	else
+		/*
+		 * Generate random mac address. eth_random_addr() is the newer
+		 * version of generating mac address in linux kernel.
+		 */
+		random_ether_addr(net_dev->dev_addr);
+
 	ret = register_netdev(net_dev);
 	if (ret) {
 		KNI_ERR("error %i registering device \"%s\"\n",
diff --git a/lib/librte_eal/linuxapp/kni/kni_net.c b/lib/librte_eal/linuxapp/kni/kni_net.c
index cfa83398..fc82193a 100644
--- a/lib/librte_eal/linuxapp/kni/kni_net.c
+++ b/lib/librte_eal/linuxapp/kni/kni_net.c
@@ -38,6 +38,8 @@
 
 #include <exec-env/rte_kni_common.h>
 #include <kni_fifo.h>
+
+#include "compat.h"
 #include "kni_dev.h"
 
 #define WD_TIMEOUT 5 /*jiffies */
@@ -69,15 +71,6 @@ kni_net_open(struct net_device *dev)
 	struct rte_kni_request req;
 	struct kni_dev *kni = netdev_priv(dev);
 
-	if (kni->lad_dev)
-		memcpy(dev->dev_addr, kni->lad_dev->dev_addr, ETH_ALEN);
-	else
-		/*
-		 * Generate random mac address. eth_random_addr() is the newer
-		 * version of generating mac address in linux kernel.
-		 */
-		random_ether_addr(dev->dev_addr);
-
 	netif_start_queue(dev);
 
 	memset(&req, 0, sizeof(req));
@@ -156,7 +149,8 @@ kni_net_rx_normal(struct kni_dev *kni)
 	/* Transfer received packets to netif */
 	for (i = 0; i < num_rx; i++) {
 		kva = (void *)va[i] - kni->mbuf_va + kni->mbuf_kva;
-		len = kva->data_len;
+		len = kva->pkt_len;
+
 		data_kva = kva->buf_addr + kva->data_off - kni->mbuf_va
 				+ kni->mbuf_kva;
 
@@ -165,22 +159,41 @@ kni_net_rx_normal(struct kni_dev *kni)
 			KNI_ERR("Out of mem, dropping pkts\n");
 			/* Update statistics */
 			kni->stats.rx_dropped++;
+			continue;
 		}
-		else {
-			/* Align IP on 16B boundary */
-			skb_reserve(skb, 2);
+
+		/* Align IP on 16B boundary */
+		skb_reserve(skb, 2);
+
+		if (kva->nb_segs == 1) {
 			memcpy(skb_put(skb, len), data_kva, len);
-			skb->dev = dev;
-			skb->protocol = eth_type_trans(skb, dev);
-			skb->ip_summed = CHECKSUM_UNNECESSARY;
+		} else {
+			int nb_segs;
+			int kva_nb_segs = kva->nb_segs;
 
-			/* Call netif interface */
-			netif_rx_ni(skb);
+			for (nb_segs = 0; nb_segs < kva_nb_segs; nb_segs++) {
+				memcpy(skb_put(skb, kva->data_len),
+					data_kva, kva->data_len);
 
-			/* Update statistics */
-			kni->stats.rx_bytes += len;
-			kni->stats.rx_packets++;
+				if (!kva->next)
+					break;
+
+				kva = kva->next - kni->mbuf_va + kni->mbuf_kva;
+				data_kva = kva->buf_addr + kva->data_off
+					- kni->mbuf_va + kni->mbuf_kva;
+			}
 		}
+
+		skb->dev = dev;
+		skb->protocol = eth_type_trans(skb, dev);
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+		/* Call netif interface */
+		netif_rx_ni(skb);
+
+		/* Update statistics */
+		kni->stats.rx_bytes += len;
+		kni->stats.rx_packets++;
 	}
 
 	/* Burst enqueue mbufs into free_q */
@@ -317,7 +330,7 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
 	/* Copy mbufs to sk buffer and then call tx interface */
 	for (i = 0; i < num; i++) {
 		kva = (void *)va[i] - kni->mbuf_va + kni->mbuf_kva;
-		len = kva->data_len;
+		len = kva->pkt_len;
 		data_kva = kva->buf_addr + kva->data_off - kni->mbuf_va +
 				kni->mbuf_kva;
 
@@ -338,20 +351,39 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
 		if (skb == NULL) {
 			KNI_ERR("Out of mem, dropping pkts\n");
 			kni->stats.rx_dropped++;
+			continue;
 		}
-		else {
-			/* Align IP on 16B boundary */
-			skb_reserve(skb, 2);
+
+		/* Align IP on 16B boundary */
+		skb_reserve(skb, 2);
+
+		if (kva->nb_segs == 1) {
 			memcpy(skb_put(skb, len), data_kva, len);
-			skb->dev = dev;
-			skb->ip_summed = CHECKSUM_UNNECESSARY;
+		} else {
+			int nb_segs;
+			int kva_nb_segs = kva->nb_segs;
 
-			kni->stats.rx_bytes += len;
-			kni->stats.rx_packets++;
+			for (nb_segs = 0; nb_segs < kva_nb_segs; nb_segs++) {
+				memcpy(skb_put(skb, kva->data_len),
+					data_kva, kva->data_len);
 
-			/* call tx interface */
-			kni_net_tx(skb, dev);
+				if (!kva->next)
+					break;
+
+				kva = kva->next - kni->mbuf_va + kni->mbuf_kva;
+				data_kva = kva->buf_addr + kva->data_off
+					- kni->mbuf_va + kni->mbuf_kva;
+			}
 		}
+
+		skb->dev = dev;
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+		kni->stats.rx_bytes += len;
+		kni->stats.rx_packets++;
+
+		/* call tx interface */
+		kni_net_tx(skb, dev);
 	}
 
 	/* enqueue all the mbufs from rx_q into free_q */
@@ -396,7 +428,12 @@ kni_net_tx(struct sk_buff *skb, struct net_device *dev)
 	struct rte_kni_mbuf *pkt_kva = NULL;
 	struct rte_kni_mbuf *pkt_va = NULL;
 
-	dev->trans_start = jiffies; /* save the timestamp */
+	/* save the timestamp */
+#ifdef HAVE_TRANS_START_HELPER
+	netif_trans_update(dev);
+#else
+	dev->trans_start = jiffies;
+#endif
 
 	/* Check if the length of skb is less than mbuf size */
 	if (skb->len > kni->mbuf_size)
@@ -604,7 +641,7 @@ kni_net_header(struct sk_buff *skb, struct net_device *dev,
 /*
  * Re-fill the eth header
  */
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 1, 0))
+#ifdef HAVE_REBUILD_HEADER
 static int
 kni_net_rebuild_header(struct sk_buff *skb)
 {
@@ -634,7 +671,7 @@ static int kni_net_set_mac(struct net_device *netdev, void *p)
 	return 0;
 }
 
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0))
+#ifdef HAVE_CHANGE_CARRIER_CB
 static int kni_net_change_carrier(struct net_device *dev, bool new_carrier)
 {
 	if (new_carrier)
@@ -647,7 +684,7 @@ static int kni_net_change_carrier(struct net_device *dev, bool new_carrier)
 
 static const struct header_ops kni_net_header_ops = {
 	.create  = kni_net_header,
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 1, 0))
+#ifdef HAVE_REBUILD_HEADER
 	.rebuild = kni_net_rebuild_header,
 #endif /* < 4.1.0  */
 	.cache   = NULL,  /* disable caching */
@@ -664,7 +701,7 @@ static const struct net_device_ops kni_net_netdev_ops = {
 	.ndo_get_stats = kni_net_stats,
 	.ndo_tx_timeout = kni_net_tx_timeout,
 	.ndo_set_mac_address = kni_net_set_mac,
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0))
+#ifdef HAVE_CHANGE_CARRIER_CB
 	.ndo_change_carrier = kni_net_change_carrier,
 #endif
 };
diff --git a/lib/librte_ether/Makefile b/lib/librte_ether/Makefile
index e8102846..0bb5dc90 100644
--- a/lib/librte_ether/Makefile
+++ b/lib/librte_ether/Makefile
@@ -41,7 +41,7 @@ CFLAGS += $(WERROR_FLAGS)
 
 EXPORT_MAP := rte_ether_version.map
 
-LIBABIVER := 3
+LIBABIVER := 4
 
 SRCS-y += rte_ethdev.c
 
diff --git a/lib/librte_ether/rte_eth_ctrl.h b/lib/librte_ether/rte_eth_ctrl.h
index b8c7be90..c3a2c9e4 100644
--- a/lib/librte_ether/rte_eth_ctrl.h
+++ b/lib/librte_ether/rte_eth_ctrl.h
@@ -74,7 +74,12 @@ extern "C" {
 #define RTE_ETH_FLOW_IPV6_EX            15
 #define RTE_ETH_FLOW_IPV6_TCP_EX        16
 #define RTE_ETH_FLOW_IPV6_UDP_EX        17
-#define RTE_ETH_FLOW_MAX                18
+#define RTE_ETH_FLOW_PORT               18
+	/**< Consider device port number as a flow differentiator */
+#define RTE_ETH_FLOW_VXLAN              19 /**< VXLAN protocol based flow */
+#define RTE_ETH_FLOW_GENEVE             20 /**< GENEVE protocol based flow */
+#define RTE_ETH_FLOW_NVGRE              21 /**< NVGRE protocol based flow */
+#define RTE_ETH_FLOW_MAX                22
 
 /**
  * Feature filter types
diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index a31018e8..eac260f1 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -77,6 +77,12 @@ static uint8_t nb_ports;
 /* spinlock for eth device callbacks */
 static rte_spinlock_t rte_eth_dev_cb_lock = RTE_SPINLOCK_INITIALIZER;
 
+/* spinlock for add/remove rx callbacks */
+static rte_spinlock_t rte_eth_rx_cb_lock = RTE_SPINLOCK_INITIALIZER;
+
+/* spinlock for add/remove tx callbacks */
+static rte_spinlock_t rte_eth_tx_cb_lock = RTE_SPINLOCK_INITIALIZER;
+
 /* store statistics names and its offset in stats structure  */
 struct rte_eth_xstats_name_off {
 	char name[RTE_ETH_XSTATS_NAME_SIZE];
@@ -369,8 +375,7 @@ rte_eth_dev_is_valid_port(uint8_t port_id)
 int
 rte_eth_dev_socket_id(uint8_t port_id)
 {
-	if (!rte_eth_dev_is_valid_port(port_id))
-		return -1;
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -1);
 	return rte_eth_devices[port_id].data->numa_node;
 }
 
@@ -383,8 +388,7 @@ rte_eth_dev_count(void)
 static enum rte_eth_dev_type
 rte_eth_dev_get_device_type(uint8_t port_id)
 {
-	if (!rte_eth_dev_is_valid_port(port_id))
-		return RTE_ETH_DEV_UNKNOWN;
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, RTE_ETH_DEV_UNKNOWN);
 	return rte_eth_devices[port_id].dev_type;
 }
 
@@ -402,7 +406,7 @@ rte_eth_dev_get_addr_by_port(uint8_t port_id, struct rte_pci_addr *addr)
 	return 0;
 }
 
-static int
+int
 rte_eth_dev_get_name_by_port(uint8_t port_id, char *name)
 {
 	char *tmp;
@@ -421,7 +425,7 @@ rte_eth_dev_get_name_by_port(uint8_t port_id, char *name)
 	return 0;
 }
 
-static int
+int
 rte_eth_dev_get_port_by_name(const char *name, uint8_t *port_id)
 {
 	int i;
@@ -479,10 +483,7 @@ rte_eth_dev_is_detachable(uint8_t port_id)
 {
 	uint32_t dev_flags;
 
-	if (!rte_eth_dev_is_valid_port(port_id)) {
-		RTE_PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
-		return -EINVAL;
-	}
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
 
 	switch (rte_eth_devices[port_id].data->kdrv) {
 	case RTE_KDRV_IGB_UIO:
@@ -1507,9 +1508,85 @@ rte_eth_stats_reset(uint8_t port_id)
 	dev->data->rx_mbuf_alloc_failed = 0;
 }
 
+static int
+get_xstats_count(uint8_t port_id)
+{
+	struct rte_eth_dev *dev;
+	int count;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
+	dev = &rte_eth_devices[port_id];
+	if (dev->dev_ops->xstats_get_names != NULL) {
+		count = (*dev->dev_ops->xstats_get_names)(dev, NULL, 0);
+		if (count < 0)
+			return count;
+	} else
+		count = 0;
+	count += RTE_NB_STATS;
+	count += dev->data->nb_rx_queues * RTE_NB_RXQ_STATS;
+	count += dev->data->nb_tx_queues * RTE_NB_TXQ_STATS;
+	return count;
+}
+
+int
+rte_eth_xstats_get_names(uint8_t port_id,
+	struct rte_eth_xstat_name *xstats_names,
+	unsigned size)
+{
+	struct rte_eth_dev *dev;
+	int cnt_used_entries;
+	int cnt_expected_entries;
+	uint32_t idx, id_queue;
+
+	cnt_expected_entries = get_xstats_count(port_id);
+	if (xstats_names == NULL || cnt_expected_entries < 0 ||
+			(int)size < cnt_expected_entries)
+		return cnt_expected_entries;
+
+	/* port_id checked in get_xstats_count() */
+	dev = &rte_eth_devices[port_id];
+	if (dev->dev_ops->xstats_get_names != NULL) {
+		cnt_used_entries = (*dev->dev_ops->xstats_get_names)(
+			dev, xstats_names, size);
+		if (cnt_used_entries < 0)
+			return cnt_used_entries;
+	} else
+		/* Driver itself does not support extended stats, but
+		 * still have basic stats.
+		 */
+		cnt_used_entries = 0;
+
+	for (idx = 0; idx < RTE_NB_STATS; idx++) {
+		snprintf(xstats_names[cnt_used_entries].name,
+			sizeof(xstats_names[0].name),
+			"%s", rte_stats_strings[idx].name);
+		cnt_used_entries++;
+	}
+	for (id_queue = 0; id_queue < dev->data->nb_rx_queues; id_queue++) {
+		for (idx = 0; idx < RTE_NB_RXQ_STATS; idx++) {
+			snprintf(xstats_names[cnt_used_entries].name,
+				sizeof(xstats_names[0].name),
+				"rx_q%u%s",
+				id_queue, rte_rxq_stats_strings[idx].name);
+			cnt_used_entries++;
+		}
+
+	}
+	for (id_queue = 0; id_queue < dev->data->nb_tx_queues; id_queue++) {
+		for (idx = 0; idx < RTE_NB_TXQ_STATS; idx++) {
+			snprintf(xstats_names[cnt_used_entries].name,
+				sizeof(xstats_names[0].name),
+				"tx_q%u%s",
+				id_queue, rte_txq_stats_strings[idx].name);
+			cnt_used_entries++;
+		}
+	}
+	return cnt_used_entries;
+}
+
 /* retrieve ethdev extended statistics */
 int
-rte_eth_xstats_get(uint8_t port_id, struct rte_eth_xstats *xstats,
+rte_eth_xstats_get(uint8_t port_id, struct rte_eth_xstat *xstats,
 	unsigned n)
 {
 	struct rte_eth_stats eth_stats;
@@ -1551,8 +1628,7 @@ rte_eth_xstats_get(uint8_t port_id, struct rte_eth_xstats *xstats,
 		stats_ptr = RTE_PTR_ADD(&eth_stats,
 					rte_stats_strings[i].offset);
 		val = *stats_ptr;
-		snprintf(xstats[count].name, sizeof(xstats[count].name),
-			"%s", rte_stats_strings[i].name);
+		xstats[count].id = count + xcount;
 		xstats[count++].value = val;
 	}
 
@@ -1563,9 +1639,7 @@ rte_eth_xstats_get(uint8_t port_id, struct rte_eth_xstats *xstats,
 					rte_rxq_stats_strings[i].offset +
 					q * sizeof(uint64_t));
 			val = *stats_ptr;
-			snprintf(xstats[count].name, sizeof(xstats[count].name),
-				"rx_q%u_%s", q,
-				rte_rxq_stats_strings[i].name);
+			xstats[count].id = count + xcount;
 			xstats[count++].value = val;
 		}
 	}
@@ -1577,9 +1651,7 @@ rte_eth_xstats_get(uint8_t port_id, struct rte_eth_xstats *xstats,
 					rte_txq_stats_strings[i].offset +
 					q * sizeof(uint64_t));
 			val = *stats_ptr;
-			snprintf(xstats[count].name, sizeof(xstats[count].name),
-				"tx_q%u_%s", q,
-				rte_txq_stats_strings[i].name);
+			xstats[count].id = count + xcount;
 			xstats[count++].value = val;
 		}
 	}
@@ -1639,7 +1711,6 @@ rte_eth_dev_set_rx_queue_stats_mapping(uint8_t port_id, uint16_t rx_queue_id,
 			STAT_QMAP_RX);
 }
 
-
 void
 rte_eth_dev_info_get(uint8_t port_id, struct rte_eth_dev_info *dev_info)
 {
@@ -1661,6 +1732,8 @@ rte_eth_dev_info_get(uint8_t port_id, struct rte_eth_dev_info *dev_info)
 	(*dev->dev_ops->dev_infos_get)(dev, dev_info);
 	dev_info->pci_dev = dev->pci_dev;
 	dev_info->driver_name = dev->data->drv_name;
+	dev_info->nb_rx_queues = dev->data->nb_rx_queues;
+	dev_info->nb_tx_queues = dev->data->nb_tx_queues;
 }
 
 int
@@ -1994,10 +2067,7 @@ rte_eth_dev_rss_reta_query(uint8_t port_id,
 	struct rte_eth_dev *dev;
 	int ret;
 
-	if (port_id >= nb_ports) {
-		RTE_PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
-		return -ENODEV;
-	}
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
 
 	/* Check mask bits */
 	ret = rte_eth_check_reta_mask(reta_conf, reta_size);
@@ -2641,10 +2711,7 @@ rte_eth_dev_rx_intr_ctl(uint8_t port_id, int epfd, int op, void *data)
 	uint16_t qid;
 	int rc;
 
-	if (!rte_eth_dev_is_valid_port(port_id)) {
-		RTE_PMD_DEBUG_TRACE("Invalid port_id=%u\n", port_id);
-		return -ENODEV;
-	}
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
 
 	dev = &rte_eth_devices[port_id];
 	intr_handle = &dev->pci_dev->intr_handle;
@@ -2699,10 +2766,7 @@ rte_eth_dev_rx_intr_ctl_q(uint8_t port_id, uint16_t queue_id,
 	struct rte_intr_handle *intr_handle;
 	int rc;
 
-	if (!rte_eth_dev_is_valid_port(port_id)) {
-		RTE_PMD_DEBUG_TRACE("Invalid port_id=%u\n", port_id);
-		return -ENODEV;
-	}
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
 
 	dev = &rte_eth_devices[port_id];
 	if (queue_id >= dev->data->nb_rx_queues) {
@@ -2734,10 +2798,7 @@ rte_eth_dev_rx_intr_enable(uint8_t port_id,
 {
 	struct rte_eth_dev *dev;
 
-	if (!rte_eth_dev_is_valid_port(port_id)) {
-		RTE_PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
-		return -ENODEV;
-	}
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
 
 	dev = &rte_eth_devices[port_id];
 
@@ -2751,10 +2812,7 @@ rte_eth_dev_rx_intr_disable(uint8_t port_id,
 {
 	struct rte_eth_dev *dev;
 
-	if (!rte_eth_dev_is_valid_port(port_id)) {
-		RTE_PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
-		return -ENODEV;
-	}
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
 
 	dev = &rte_eth_devices[port_id];
 
@@ -2925,7 +2983,6 @@ rte_eth_add_rx_callback(uint8_t port_id, uint16_t queue_id,
 		rte_errno = EINVAL;
 		return NULL;
 	}
-
 	struct rte_eth_rxtx_callback *cb = rte_zmalloc(NULL, sizeof(*cb), 0);
 
 	if (cb == NULL) {
@@ -2936,6 +2993,7 @@ rte_eth_add_rx_callback(uint8_t port_id, uint16_t queue_id,
 	cb->fn.rx = fn;
 	cb->param = user_param;
 
+	rte_spinlock_lock(&rte_eth_rx_cb_lock);
 	/* Add the callbacks in fifo order. */
 	struct rte_eth_rxtx_callback *tail =
 		rte_eth_devices[port_id].post_rx_burst_cbs[queue_id];
@@ -2948,6 +3006,42 @@ rte_eth_add_rx_callback(uint8_t port_id, uint16_t queue_id,
 			tail = tail->next;
 		tail->next = cb;
 	}
+	rte_spinlock_unlock(&rte_eth_rx_cb_lock);
+
+	return cb;
+}
+
+void *
+rte_eth_add_first_rx_callback(uint8_t port_id, uint16_t queue_id,
+		rte_rx_callback_fn fn, void *user_param)
+{
+#ifndef RTE_ETHDEV_RXTX_CALLBACKS
+	rte_errno = ENOTSUP;
+	return NULL;
+#endif
+	/* check input parameters */
+	if (!rte_eth_dev_is_valid_port(port_id) || fn == NULL ||
+		queue_id >= rte_eth_devices[port_id].data->nb_rx_queues) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
+	struct rte_eth_rxtx_callback *cb = rte_zmalloc(NULL, sizeof(*cb), 0);
+
+	if (cb == NULL) {
+		rte_errno = ENOMEM;
+		return NULL;
+	}
+
+	cb->fn.rx = fn;
+	cb->param = user_param;
+
+	rte_spinlock_lock(&rte_eth_rx_cb_lock);
+	/* Add the callbacks at fisrt position*/
+	cb->next = rte_eth_devices[port_id].post_rx_burst_cbs[queue_id];
+	rte_smp_wmb();
+	rte_eth_devices[port_id].post_rx_burst_cbs[queue_id] = cb;
+	rte_spinlock_unlock(&rte_eth_rx_cb_lock);
 
 	return cb;
 }
@@ -2977,6 +3071,7 @@ rte_eth_add_tx_callback(uint8_t port_id, uint16_t queue_id,
 	cb->fn.tx = fn;
 	cb->param = user_param;
 
+	rte_spinlock_lock(&rte_eth_tx_cb_lock);
 	/* Add the callbacks in fifo order. */
 	struct rte_eth_rxtx_callback *tail =
 		rte_eth_devices[port_id].pre_tx_burst_cbs[queue_id];
@@ -2989,6 +3084,7 @@ rte_eth_add_tx_callback(uint8_t port_id, uint16_t queue_id,
 			tail = tail->next;
 		tail->next = cb;
 	}
+	rte_spinlock_unlock(&rte_eth_tx_cb_lock);
 
 	return cb;
 }
@@ -3001,35 +3097,30 @@ rte_eth_remove_rx_callback(uint8_t port_id, uint16_t queue_id,
 	return -ENOTSUP;
 #endif
 	/* Check input parameters. */
-	if (!rte_eth_dev_is_valid_port(port_id) || user_cb == NULL ||
-		    queue_id >= rte_eth_devices[port_id].data->nb_rx_queues) {
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
+	if (user_cb == NULL ||
+			queue_id >= rte_eth_devices[port_id].data->nb_rx_queues)
 		return -EINVAL;
-	}
 
 	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
-	struct rte_eth_rxtx_callback *cb = dev->post_rx_burst_cbs[queue_id];
-	struct rte_eth_rxtx_callback *prev_cb;
-
-	/* Reset head pointer and remove user cb if first in the list. */
-	if (cb == user_cb) {
-		dev->post_rx_burst_cbs[queue_id] = user_cb->next;
-		return 0;
-	}
-
-	/* Remove the user cb from the callback list. */
-	do {
-		prev_cb = cb;
-		cb = cb->next;
-
+	struct rte_eth_rxtx_callback *cb;
+	struct rte_eth_rxtx_callback **prev_cb;
+	int ret = -EINVAL;
+
+	rte_spinlock_lock(&rte_eth_rx_cb_lock);
+	prev_cb = &dev->post_rx_burst_cbs[queue_id];
+	for (; *prev_cb != NULL; prev_cb = &cb->next) {
+		cb = *prev_cb;
 		if (cb == user_cb) {
-			prev_cb->next = user_cb->next;
-			return 0;
+			/* Remove the user cb from the callback list. */
+			*prev_cb = cb->next;
+			ret = 0;
+			break;
 		}
+	}
+	rte_spinlock_unlock(&rte_eth_rx_cb_lock);
 
-	} while (cb != NULL);
-
-	/* Callback wasn't found. */
-	return -EINVAL;
+	return ret;
 }
 
 int
@@ -3040,35 +3131,30 @@ rte_eth_remove_tx_callback(uint8_t port_id, uint16_t queue_id,
 	return -ENOTSUP;
 #endif
 	/* Check input parameters. */
-	if (!rte_eth_dev_is_valid_port(port_id) || user_cb == NULL ||
-		    queue_id >= rte_eth_devices[port_id].data->nb_tx_queues) {
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
+	if (user_cb == NULL ||
+			queue_id >= rte_eth_devices[port_id].data->nb_tx_queues)
 		return -EINVAL;
-	}
 
 	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
-	struct rte_eth_rxtx_callback *cb = dev->pre_tx_burst_cbs[queue_id];
-	struct rte_eth_rxtx_callback *prev_cb;
-
-	/* Reset head pointer and remove user cb if first in the list. */
-	if (cb == user_cb) {
-		dev->pre_tx_burst_cbs[queue_id] = user_cb->next;
-		return 0;
-	}
-
-	/* Remove the user cb from the callback list. */
-	do {
-		prev_cb = cb;
-		cb = cb->next;
-
+	int ret = -EINVAL;
+	struct rte_eth_rxtx_callback *cb;
+	struct rte_eth_rxtx_callback **prev_cb;
+
+	rte_spinlock_lock(&rte_eth_tx_cb_lock);
+	prev_cb = &dev->pre_tx_burst_cbs[queue_id];
+	for (; *prev_cb != NULL; prev_cb = &cb->next) {
+		cb = *prev_cb;
 		if (cb == user_cb) {
-			prev_cb->next = user_cb->next;
-			return 0;
+			/* Remove the user cb from the callback list. */
+			*prev_cb = cb->next;
+			ret = 0;
+			break;
 		}
+	}
+	rte_spinlock_unlock(&rte_eth_tx_cb_lock);
 
-	} while (cb != NULL);
-
-	/* Callback wasn't found. */
-	return -EINVAL;
+	return ret;
 }
 
 int
@@ -3284,10 +3370,7 @@ rte_eth_dev_get_dcb_info(uint8_t port_id,
 {
 	struct rte_eth_dev *dev;
 
-	if (!rte_eth_dev_is_valid_port(port_id)) {
-		RTE_PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
-		return -ENODEV;
-	}
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
 
 	dev = &rte_eth_devices[port_id];
 	memset(dcb_info, 0, sizeof(struct rte_eth_dcb_info));
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 022733ec..0f173231 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -102,7 +102,7 @@
  * rte_eth_dev_configure(), rte_eth_tx_queue_setup(), or
  * rte_eth_rx_queue_setup()), it must call rte_eth_dev_stop() first to stop the
  * device and then do the reconfiguration before calling rte_eth_dev_start()
- * again. The tramsit and receive functions should not be invoked when the
+ * again. The transmit and receive functions should not be invoked when the
  * device is stopped.
  *
  * Please note that some configuration is not stored between calls to
@@ -200,27 +200,9 @@ struct rte_eth_stats {
 	/**< Total of RX packets dropped by the HW,
 	 * because there are no available mbufs (i.e. RX queues are full).
 	 */
-	uint64_t ibadcrc __rte_deprecated;
-	/**< Deprecated; Total of RX packets with CRC error. */
-	uint64_t ibadlen __rte_deprecated;
-	/**< Deprecated; Total of RX packets with bad length. */
 	uint64_t ierrors;   /**< Total number of erroneous received packets. */
 	uint64_t oerrors;   /**< Total number of failed transmitted packets. */
-	uint64_t imcasts;
-	/**< Deprecated; Total number of multicast received packets. */
 	uint64_t rx_nombuf; /**< Total number of RX mbuf allocation failures. */
-	uint64_t fdirmatch __rte_deprecated;
-	/**< Deprecated; Total number of RX packets matching a filter. */
-	uint64_t fdirmiss __rte_deprecated;
-	/**< Deprecated; Total number of RX packets not matching any filter. */
-	uint64_t tx_pause_xon __rte_deprecated;
-	 /**< Deprecated; Total nb. of XON pause frame sent. */
-	uint64_t rx_pause_xon __rte_deprecated;
-	/**< Deprecated; Total nb. of XON pause frame received. */
-	uint64_t tx_pause_xoff __rte_deprecated;
-	/**< Deprecated; Total nb. of XOFF pause frame sent. */
-	uint64_t rx_pause_xoff __rte_deprecated;
-	/**< Deprecated; Total nb. of XOFF pause frame received. */
 	uint64_t q_ipackets[RTE_ETHDEV_QUEUE_STAT_CNTRS];
 	/**< Total number of queue RX packets. */
 	uint64_t q_opackets[RTE_ETHDEV_QUEUE_STAT_CNTRS];
@@ -231,14 +213,6 @@ struct rte_eth_stats {
 	/**< Total number of successfully transmitted queue bytes. */
 	uint64_t q_errors[RTE_ETHDEV_QUEUE_STAT_CNTRS];
 	/**< Total number of queue packets received that are dropped. */
-	uint64_t ilbpackets;
-	/**< Total number of good packets received from loopback,VF Only */
-	uint64_t olbpackets;
-	/**< Total number of good packets transmitted to loopback,VF Only */
-	uint64_t ilbbytes;
-	/**< Total number of good bytes received from loopback,VF Only */
-	uint64_t olbbytes;
-	/**< Total number of good bytes transmitted to loopback,VF Only */
 };
 
 /**
@@ -389,8 +363,8 @@ struct rte_eth_rxmode {
  */
 enum rte_vlan_type {
 	ETH_VLAN_TYPE_UNKNOWN = 0,
-	ETH_VLAN_TYPE_INNER, /**< Single VLAN, or inner VLAN. */
-	ETH_VLAN_TYPE_OUTER, /**< Outer VLAN. */
+	ETH_VLAN_TYPE_INNER, /**< Inner VLAN. */
+	ETH_VLAN_TYPE_OUTER, /**< Single VLAN, or outer VLAN. */
 	ETH_VLAN_TYPE_MAX,
 };
 
@@ -439,6 +413,10 @@ struct rte_eth_rss_conf {
 #define ETH_RSS_IPV6_EX            (1ULL << RTE_ETH_FLOW_IPV6_EX)
 #define ETH_RSS_IPV6_TCP_EX        (1ULL << RTE_ETH_FLOW_IPV6_TCP_EX)
 #define ETH_RSS_IPV6_UDP_EX        (1ULL << RTE_ETH_FLOW_IPV6_UDP_EX)
+#define ETH_RSS_PORT               (1ULL << RTE_ETH_FLOW_PORT)
+#define ETH_RSS_VXLAN              (1ULL << RTE_ETH_FLOW_VXLAN)
+#define ETH_RSS_GENEVE             (1ULL << RTE_ETH_FLOW_GENEVE)
+#define ETH_RSS_NVGRE              (1ULL << RTE_ETH_FLOW_NVGRE)
 
 #define ETH_RSS_IP ( \
 	ETH_RSS_IPV4 | \
@@ -463,6 +441,12 @@ struct rte_eth_rss_conf {
 	ETH_RSS_NONFRAG_IPV4_SCTP | \
 	ETH_RSS_NONFRAG_IPV6_SCTP)
 
+#define ETH_RSS_TUNNEL ( \
+	ETH_RSS_VXLAN  | \
+	ETH_RSS_GENEVE | \
+	ETH_RSS_NVGRE)
+
+
 /**< Mask of valid RSS hash protocols */
 #define ETH_RSS_PROTO_MASK ( \
 	ETH_RSS_IPV4 | \
@@ -480,7 +464,11 @@ struct rte_eth_rss_conf {
 	ETH_RSS_L2_PAYLOAD | \
 	ETH_RSS_IPV6_EX | \
 	ETH_RSS_IPV6_TCP_EX | \
-	ETH_RSS_IPV6_UDP_EX)
+	ETH_RSS_IPV6_UDP_EX | \
+	ETH_RSS_PORT  | \
+	ETH_RSS_VXLAN | \
+	ETH_RSS_GENEVE | \
+	ETH_RSS_NVGRE)
 
 /*
  * Definitions used for redirection table entry size.
@@ -489,6 +477,7 @@ struct rte_eth_rss_conf {
  */
 #define ETH_RSS_RETA_SIZE_64  64
 #define ETH_RSS_RETA_SIZE_128 128
+#define ETH_RSS_RETA_SIZE_256 256
 #define ETH_RSS_RETA_SIZE_512 512
 #define RTE_RETA_GROUP_SIZE   64
 
@@ -908,6 +897,9 @@ struct rte_eth_dev_info {
 	struct rte_eth_desc_lim rx_desc_lim;  /**< RX descriptors limits */
 	struct rte_eth_desc_lim tx_desc_lim;  /**< TX descriptors limits */
 	uint32_t speed_capa;  /**< Supported speeds bitmap (ETH_LINK_SPEED_). */
+	/** Configured number of rx/tx queues */
+	uint16_t nb_rx_queues; /**< Number of RX queues. */
+	uint16_t nb_tx_queues; /**< Number of TX queues. */
 };
 
 /**
@@ -940,11 +932,21 @@ struct rte_eth_txq_info {
  * statistics that are not provided in the generic rte_eth_stats
  * structure.
  */
-struct rte_eth_xstats {
-	char name[RTE_ETH_XSTATS_NAME_SIZE];
+struct rte_eth_xstat {
+	uint64_t id;
 	uint64_t value;
 };
 
+/**
+ * A name-key lookup element for extended statistics.
+ *
+ * This structure is used to map between names and ID numbers
+ * for extended ethernet statistics.
+ */
+struct rte_eth_xstat_name {
+	char name[RTE_ETH_XSTATS_NAME_SIZE];
+};
+
 #define ETH_DCB_NUM_TCS    8
 #define ETH_MAX_VMDQ_POOL  64
 
@@ -1074,12 +1076,16 @@ typedef void (*eth_stats_reset_t)(struct rte_eth_dev *dev);
 /**< @internal Reset global I/O statistics of an Ethernet device to 0. */
 
 typedef int (*eth_xstats_get_t)(struct rte_eth_dev *dev,
-	struct rte_eth_xstats *stats, unsigned n);
+	struct rte_eth_xstat *stats, unsigned n);
 /**< @internal Get extended stats of an Ethernet device. */
 
 typedef void (*eth_xstats_reset_t)(struct rte_eth_dev *dev);
 /**< @internal Reset extended stats of an Ethernet device. */
 
+typedef int (*eth_xstats_get_names_t)(struct rte_eth_dev *dev,
+	struct rte_eth_xstat_name *xstats_names, unsigned size);
+/**< @internal Get names of extended stats of an Ethernet device. */
+
 typedef int (*eth_queue_stats_mapping_set_t)(struct rte_eth_dev *dev,
 					     uint16_t queue_id,
 					     uint8_t stat_idx,
@@ -1150,7 +1156,7 @@ typedef int (*vlan_filter_set_t)(struct rte_eth_dev *dev,
 
 typedef int (*vlan_tpid_set_t)(struct rte_eth_dev *dev,
 			       enum rte_vlan_type type, uint16_t tpid);
-/**< @internal set the outer VLAN-TPID by an Ethernet device. */
+/**< @internal set the outer/inner VLAN-TPID by an Ethernet device. */
 
 typedef void (*vlan_offload_set_t)(struct rte_eth_dev *dev, int mask);
 /**< @internal set VLAN offload function by an Ethernet device. */
@@ -1427,6 +1433,8 @@ struct eth_dev_ops {
 	eth_stats_reset_t          stats_reset;   /**< Reset generic device statistics. */
 	eth_xstats_get_t           xstats_get;    /**< Get extended device statistics. */
 	eth_xstats_reset_t         xstats_reset;  /**< Reset extended device statistics. */
+	eth_xstats_get_names_t     xstats_get_names;
+	/**< Get names of extended statistics. */
 	eth_queue_stats_mapping_set_t queue_stats_mapping_set;
 	/**< Configure per queue stat counter mapping. */
 	eth_dev_infos_get_t        dev_infos_get; /**< Get device info. */
@@ -1434,7 +1442,7 @@ struct eth_dev_ops {
 	/**< Get packet types supported and identified by device*/
 	mtu_set_t                  mtu_set; /**< Set MTU. */
 	vlan_filter_set_t          vlan_filter_set;  /**< Filter VLAN Setup. */
-	vlan_tpid_set_t            vlan_tpid_set;      /**< Outer VLAN TPID Setup. */
+	vlan_tpid_set_t            vlan_tpid_set;      /**< Outer/Inner VLAN TPID Setup. */
 	vlan_strip_queue_set_t     vlan_strip_queue_set; /**< VLAN Stripping on queue. */
 	vlan_offload_set_t         vlan_offload_set; /**< Set VLAN Offload. */
 	vlan_pvid_set_t            vlan_pvid_set; /**< Set port based TX VLAN insertion */
@@ -1641,7 +1649,7 @@ struct rte_eth_dev {
 	struct rte_eth_rxtx_callback *pre_tx_burst_cbs[RTE_MAX_QUEUES_PER_PORT];
 	uint8_t attached; /**< Flag indicating the port is attached */
 	enum rte_eth_dev_type dev_type; /**< Flag indicating the device type */
-};
+} __rte_cache_aligned;
 
 struct rte_eth_dev_sriov {
 	uint8_t active;               /**< SRIOV is active with 16, 32 or 64 pools */
@@ -2015,7 +2023,7 @@ int rte_eth_tx_queue_setup(uint8_t port_id, uint16_t tx_queue_id,
 		uint16_t nb_tx_desc, unsigned int socket_id,
 		const struct rte_eth_txconf *tx_conf);
 
-/*
+/**
  * Return the NUMA socket to which an Ethernet device is connected
  *
  * @param port_id
@@ -2027,7 +2035,7 @@ int rte_eth_tx_queue_setup(uint8_t port_id, uint16_t tx_queue_id,
  */
 int rte_eth_dev_socket_id(uint8_t port_id);
 
-/*
+/**
  * Check if port_id of device is attached
  *
  * @param port_id
@@ -2038,7 +2046,7 @@ int rte_eth_dev_socket_id(uint8_t port_id);
  */
 int rte_eth_dev_is_valid_port(uint8_t port_id);
 
-/*
+/**
  * Allocate mbuf from mempool, setup the DMA physical address
  * and then start RX for specified queue of a port. It is used
  * when rx_deferred_start flag of the specified queue is true.
@@ -2056,7 +2064,7 @@ int rte_eth_dev_is_valid_port(uint8_t port_id);
  */
 int rte_eth_dev_rx_queue_start(uint8_t port_id, uint16_t rx_queue_id);
 
-/*
+/**
  * Stop specified RX queue of a port
  *
  * @param port_id
@@ -2072,7 +2080,7 @@ int rte_eth_dev_rx_queue_start(uint8_t port_id, uint16_t rx_queue_id);
  */
 int rte_eth_dev_rx_queue_stop(uint8_t port_id, uint16_t rx_queue_id);
 
-/*
+/**
  * Start TX for specified queue of a port. It is used when tx_deferred_start
  * flag of the specified queue is true.
  *
@@ -2089,7 +2097,7 @@ int rte_eth_dev_rx_queue_stop(uint8_t port_id, uint16_t rx_queue_id);
  */
 int rte_eth_dev_tx_queue_start(uint8_t port_id, uint16_t tx_queue_id);
 
-/*
+/**
  * Stop specified TX queue of a port
  *
  * @param port_id
@@ -2279,13 +2287,36 @@ int rte_eth_stats_get(uint8_t port_id, struct rte_eth_stats *stats);
 void rte_eth_stats_reset(uint8_t port_id);
 
 /**
+ * Retrieve names of extended statistics of an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param xstats_names
+ *  Block of memory to insert names into. Must be at least size in capacity.
+ *  If set to NULL, function returns required capacity.
+ * @param size
+ *  Capacity of xstats_names (number of names).
+ * @return
+ *   - positive value lower or equal to size: success. The return value
+ *     is the number of entries filled in the stats table.
+ *   - positive value higher than size: error, the given statistics table
+ *     is too small. The return value corresponds to the size that should
+ *     be given to succeed. The entries in the table are not valid and
+ *     shall not be used by the caller.
+ *   - negative value on error (invalid port id)
+ */
+int rte_eth_xstats_get_names(uint8_t port_id,
+		struct rte_eth_xstat_name *xstats_names,
+		unsigned size);
+
+/**
  * Retrieve extended statistics of an Ethernet device.
  *
  * @param port_id
  *   The port identifier of the Ethernet device.
  * @param xstats
- *   A pointer to a table of structure of type *rte_eth_xstats*
- *   to be filled with device statistics names and values.
+ *   A pointer to a table of structure of type *rte_eth_xstat*
+ *   to be filled with device statistics ids and values.
  *   This parameter can be set to NULL if n is 0.
  * @param n
  *   The size of the stats table, which should be large enough to store
@@ -2299,7 +2330,7 @@ void rte_eth_stats_reset(uint8_t port_id);
  *     shall not be used by the caller.
  *   - negative value on error (invalid port id)
  */
-int rte_eth_xstats_get(uint8_t port_id, struct rte_eth_xstats *xstats,
+int rte_eth_xstats_get(uint8_t port_id, struct rte_eth_xstat *xstats,
 		unsigned n);
 
 /**
@@ -2376,6 +2407,21 @@ void rte_eth_dev_info_get(uint8_t port_id, struct rte_eth_dev_info *dev_info);
 /**
  * Retrieve the supported packet types of an Ethernet device.
  *
+ * When a packet type is announced as supported, it *must* be recognized by
+ * the PMD. For instance, if RTE_PTYPE_L2_ETHER, RTE_PTYPE_L2_ETHER_VLAN
+ * and RTE_PTYPE_L3_IPV4 are announced, the PMD must return the following
+ * packet types for these packets:
+ * - Ether/IPv4              -> RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4
+ * - Ether/Vlan/IPv4         -> RTE_PTYPE_L2_ETHER_VLAN | RTE_PTYPE_L3_IPV4
+ * - Ether/[anything else]   -> RTE_PTYPE_L2_ETHER
+ * - Ether/Vlan/[anything else] -> RTE_PTYPE_L2_ETHER_VLAN
+ *
+ * When a packet is received by a PMD, the most precise type must be
+ * returned among the ones supported. However a PMD is allowed to set
+ * packet type that is not in the supported list, at the condition that it
+ * is more precise. Therefore, a PMD announcing no supported packet types
+ * can still set a matching packet type in a received packet.
+ *
  * @note
  *   Better to invoke this API after the device is already started or rx burst
  *   function is decided, to obtain correct supported ptypes.
@@ -2424,6 +2470,7 @@ int rte_eth_dev_get_mtu(uint8_t port_id, uint16_t *mtu);
  *   - (-ENOTSUP) if operation is not supported.
  *   - (-ENODEV) if *port_id* invalid.
  *   - (-EINVAL) if *mtu* invalid.
+ *   - (-EBUSY) if operation is not allowed when the port is running
  */
 int rte_eth_dev_set_mtu(uint8_t port_id, uint16_t mtu);
 
@@ -2709,7 +2756,8 @@ rte_eth_rx_descriptor_done(uint8_t port_id, uint16_t queue_id, uint16_t offset)
  * on the output queue *queue_id* of the Ethernet device designated by its
  * *port_id*.
  * The *nb_pkts* parameter is the number of packets to send which are
- * supplied in the *tx_pkts* array of *rte_mbuf* structures.
+ * supplied in the *tx_pkts* array of *rte_mbuf* structures, each of them
+ * allocated from a pool created with rte_pktmbuf_pool_create().
  * The rte_eth_tx_burst() function loops, sending *nb_pkts* packets,
  * up to the number of transmit descriptors available in the TX ring of the
  * transmit queue.
@@ -3851,6 +3899,34 @@ int rte_eth_dev_get_dcb_info(uint8_t port_id,
 void *rte_eth_add_rx_callback(uint8_t port_id, uint16_t queue_id,
 		rte_rx_callback_fn fn, void *user_param);
 
+/*
+* Add a callback that must be called first on packet RX on a given port
+* and queue.
+*
+* This API configures a first function to be called for each burst of
+* packets received on a given NIC port queue. The return value is a pointer
+* that can be used to later remove the callback using
+* rte_eth_remove_rx_callback().
+*
+* Multiple functions are called in the order that they are added.
+*
+* @param port_id
+*   The port identifier of the Ethernet device.
+* @param queue_id
+*   The queue on the Ethernet device on which the callback is to be added.
+* @param fn
+*   The callback function
+* @param user_param
+*   A generic pointer parameter which will be passed to each invocation of the
+*   callback function on this port and queue.
+*
+* @return
+*   NULL on error.
+*   On success, a pointer value which can later be used to remove the callback.
+*/
+void *rte_eth_add_first_rx_callback(uint8_t port_id, uint16_t queue_id,
+		rte_rx_callback_fn fn, void *user_param);
+
 /**
  * Add a callback to be called on packet TX on a given port and queue.
  *
@@ -3984,7 +4060,7 @@ int rte_eth_rx_queue_info_get(uint8_t port_id, uint16_t queue_id,
 int rte_eth_tx_queue_info_get(uint8_t port_id, uint16_t queue_id,
 	struct rte_eth_txq_info *qinfo);
 
-/*
+/**
  * Retrieve number of available registers for access
  *
  * @param port_id
@@ -4279,6 +4355,35 @@ rte_eth_dev_l2_tunnel_offload_set(uint8_t port_id,
 				  uint32_t mask,
 				  uint8_t en);
 
+/**
+* Get the port id from pci adrress or device name
+* Ex: 0000:2:00.0 or vdev name eth_pcap0
+*
+* @param name
+*  pci address or name of the device
+* @param port_id
+*   pointer to port identifier of the device
+* @return
+*   - (0) if successful.
+*   - (-ENODEV or -EINVAL) on failure.
+*/
+int
+rte_eth_dev_get_port_by_name(const char *name, uint8_t *port_id);
+
+/**
+* Get the device name from port id
+*
+* @param port_id
+*   pointer to port identifier of the device
+* @param name
+*  pci address or name of the device
+* @return
+*   - (0) if successful.
+*   - (-EINVAL) on failure.
+*/
+int
+rte_eth_dev_get_name_by_port(uint8_t port_id, char *name);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_ether/rte_ether_version.map b/lib/librte_ether/rte_ether_version.map
index 214ecc73..e1ccebe0 100644
--- a/lib/librte_ether/rte_ether_version.map
+++ b/lib/librte_ether/rte_ether_version.map
@@ -66,7 +66,6 @@ DPDK_2.2 {
 	rte_eth_dev_set_vf_rxmode;
 	rte_eth_dev_set_vf_tx;
 	rte_eth_dev_set_vf_vlan_filter;
-	rte_eth_dev_set_vlan_ether_type;
 	rte_eth_dev_set_vlan_offload;
 	rte_eth_dev_set_vlan_pvid;
 	rte_eth_dev_set_vlan_strip_on_queue;
@@ -132,3 +131,12 @@ DPDK_16.04 {
 	rte_eth_tx_buffer_set_err_callback;
 
 } DPDK_2.2;
+
+DPDK_16.07 {
+	global:
+
+	rte_eth_add_first_rx_callback;
+	rte_eth_dev_get_name_by_port;
+	rte_eth_dev_get_port_by_name;
+	rte_eth_xstats_get_names;
+} DPDK_16.04;
diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c
index 7b7d1f85..e3cc3a7c 100644
--- a/lib/librte_hash/rte_cuckoo_hash.c
+++ b/lib/librte_hash/rte_cuckoo_hash.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -59,12 +59,10 @@
 #include <rte_compat.h>
 
 #include "rte_hash.h"
-#if defined(RTE_ARCH_X86)
-#include "rte_cmp_x86.h"
-#endif
+#include "rte_cuckoo_hash.h"
 
-#if defined(RTE_ARCH_ARM64)
-#include "rte_cmp_arm64.h"
+#if defined(RTE_ARCH_X86)
+#include "rte_cuckoo_hash_x86.h"
 #endif
 
 TAILQ_HEAD(rte_hash_list, rte_tailq_entry);
@@ -74,153 +72,6 @@ static struct rte_tailq_elem rte_hash_tailq = {
 };
 EAL_REGISTER_TAILQ(rte_hash_tailq)
 
-/* Macro to enable/disable run-time checking of function parameters */
-#if defined(RTE_LIBRTE_HASH_DEBUG)
-#define RETURN_IF_TRUE(cond, retval) do { \
-	if (cond) \
-		return retval; \
-} while (0)
-#else
-#define RETURN_IF_TRUE(cond, retval)
-#endif
-
-/* Hash function used if none is specified */
-#if defined(RTE_MACHINE_CPUFLAG_SSE4_2) || defined(RTE_MACHINE_CPUFLAG_CRC32)
-#include <rte_hash_crc.h>
-#define DEFAULT_HASH_FUNC       rte_hash_crc
-#else
-#include <rte_jhash.h>
-#define DEFAULT_HASH_FUNC       rte_jhash
-#endif
-
-/** Number of items per bucket. */
-#define RTE_HASH_BUCKET_ENTRIES		4
-
-#define NULL_SIGNATURE			0
-
-#define KEY_ALIGNMENT			16
-
-#define LCORE_CACHE_SIZE		8
-
-#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
-/*
- * All different options to select a key compare function,
- * based on the key size and custom function.
- */
-enum cmp_jump_table_case {
-	KEY_CUSTOM = 0,
-	KEY_16_BYTES,
-	KEY_32_BYTES,
-	KEY_48_BYTES,
-	KEY_64_BYTES,
-	KEY_80_BYTES,
-	KEY_96_BYTES,
-	KEY_112_BYTES,
-	KEY_128_BYTES,
-	KEY_OTHER_BYTES,
-	NUM_KEY_CMP_CASES,
-};
-
-/*
- * Table storing all different key compare functions
- * (multi-process supported)
- */
-const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = {
-	NULL,
-	rte_hash_k16_cmp_eq,
-	rte_hash_k32_cmp_eq,
-	rte_hash_k48_cmp_eq,
-	rte_hash_k64_cmp_eq,
-	rte_hash_k80_cmp_eq,
-	rte_hash_k96_cmp_eq,
-	rte_hash_k112_cmp_eq,
-	rte_hash_k128_cmp_eq,
-	memcmp
-};
-#else
-/*
- * All different options to select a key compare function,
- * based on the key size and custom function.
- */
-enum cmp_jump_table_case {
-	KEY_CUSTOM = 0,
-	KEY_OTHER_BYTES,
-	NUM_KEY_CMP_CASES,
-};
-
-/*
- * Table storing all different key compare functions
- * (multi-process supported)
- */
-const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = {
-	NULL,
-	memcmp
-};
-
-#endif
-
-struct lcore_cache {
-	unsigned len; /**< Cache len */
-	void *objs[LCORE_CACHE_SIZE]; /**< Cache objects */
-} __rte_cache_aligned;
-
-/** A hash table structure. */
-struct rte_hash {
-	char name[RTE_HASH_NAMESIZE];   /**< Name of the hash. */
-	uint32_t entries;               /**< Total table entries. */
-	uint32_t num_buckets;           /**< Number of buckets in table. */
-	uint32_t key_len;               /**< Length of hash key. */
-	rte_hash_function hash_func;    /**< Function used to calculate hash. */
-	uint32_t hash_func_init_val;    /**< Init value used by hash_func. */
-	rte_hash_cmp_eq_t rte_hash_custom_cmp_eq;
-	/**< Custom function used to compare keys. */
-	enum cmp_jump_table_case cmp_jump_table_idx;
-	/**< Indicates which compare function to use. */
-	uint32_t bucket_bitmask;        /**< Bitmask for getting bucket index
-						from hash signature. */
-	uint32_t key_entry_size;         /**< Size of each key entry. */
-
-	struct rte_ring *free_slots;    /**< Ring that stores all indexes
-						of the free slots in the key table */
-	void *key_store;                /**< Table storing all keys and data */
-	struct rte_hash_bucket *buckets;	/**< Table with buckets storing all the
-							hash values and key indexes
-							to the key table*/
-	uint8_t hw_trans_mem_support;	/**< Hardware transactional
-							memory support */
-	struct lcore_cache *local_free_slots;
-	/**< Local cache per lcore, storing some indexes of the free slots */
-} __rte_cache_aligned;
-
-/* Structure storing both primary and secondary hashes */
-struct rte_hash_signatures {
-	union {
-		struct {
-			hash_sig_t current;
-			hash_sig_t alt;
-		};
-		uint64_t sig;
-	};
-};
-
-/* Structure that stores key-value pair */
-struct rte_hash_key {
-	union {
-		uintptr_t idata;
-		void *pdata;
-	};
-	/* Variable key size */
-	char key[0];
-} __attribute__((aligned(KEY_ALIGNMENT)));
-
-/** Bucket structure */
-struct rte_hash_bucket {
-	struct rte_hash_signatures signatures[RTE_HASH_BUCKET_ENTRIES];
-	/* Includes dummy key index that always contains index 0 */
-	uint32_t key_idx[RTE_HASH_BUCKET_ENTRIES + 1];
-	uint8_t flag[RTE_HASH_BUCKET_ENTRIES];
-} __rte_cache_aligned;
-
 struct rte_hash *
 rte_hash_find_existing(const char *name)
 {
@@ -372,7 +223,7 @@ rte_hash_create(const struct rte_hash_parameters *params)
 
 /*
  * If x86 architecture is used, select appropriate compare function,
- * which may use x86 instrinsics, otherwise use memcmp
+ * which may use x86 intrinsics, otherwise use memcmp
  */
 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
 	/* Select function to compare keys */
@@ -431,7 +282,23 @@ rte_hash_create(const struct rte_hash_parameters *params)
 	h->free_slots = r;
 	h->hw_trans_mem_support = hw_trans_mem_support;
 
-	/* populate the free slots ring. Entry zero is reserved for key misses */
+	/* Turn on multi-writer only with explicit flat from user and TM
+	 * support.
+	 */
+	if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD) {
+		if (h->hw_trans_mem_support) {
+			h->add_key = ADD_KEY_MULTIWRITER_TM;
+		} else {
+			h->add_key = ADD_KEY_MULTIWRITER;
+			h->multiwriter_lock = rte_malloc(NULL,
+							sizeof(rte_spinlock_t),
+							LCORE_CACHE_SIZE);
+			rte_spinlock_init(h->multiwriter_lock);
+		}
+	} else
+		h->add_key = ADD_KEY_SINGLEWRITER;
+
+	/* Populate free slots ring. Entry zero is reserved for key misses. */
 	for (i = 1; i < params->entries + 1; i++)
 		rte_ring_sp_enqueue(r, (void *)((uintptr_t) i));
 
@@ -482,6 +349,8 @@ rte_hash_free(struct rte_hash *h)
 	if (h->hw_trans_mem_support)
 		rte_free(h->local_free_slots);
 
+	if (h->add_key == ADD_KEY_MULTIWRITER)
+		rte_free(h->multiwriter_lock);
 	rte_ring_free(h->free_slots);
 	rte_free(h->key_store);
 	rte_free(h->buckets);
@@ -632,6 +501,9 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
 	unsigned lcore_id;
 	struct lcore_cache *cached_free_slots = NULL;
 
+	if (h->add_key == ADD_KEY_MULTIWRITER)
+		rte_spinlock_lock(h->multiwriter_lock);
+
 	prim_bucket_idx = sig & h->bucket_bitmask;
 	prim_bkt = &h->buckets[prim_bucket_idx];
 	rte_prefetch0(prim_bkt);
@@ -712,35 +584,67 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
 	rte_memcpy(new_k->key, key, h->key_len);
 	new_k->pdata = data;
 
-	/* Insert new entry is there is room in the primary bucket */
-	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
-		/* Check if slot is available */
-		if (likely(prim_bkt->signatures[i].sig == NULL_SIGNATURE)) {
-			prim_bkt->signatures[i].current = sig;
-			prim_bkt->signatures[i].alt = alt_hash;
-			prim_bkt->key_idx[i] = new_idx;
+#if defined(RTE_ARCH_X86) /* currently only x86 support HTM */
+	if (h->add_key == ADD_KEY_MULTIWRITER_TM) {
+		ret = rte_hash_cuckoo_insert_mw_tm(prim_bkt,
+				sig, alt_hash, new_idx);
+		if (ret >= 0)
+			return new_idx - 1;
+
+		/* Primary bucket full, need to make space for new entry */
+		ret = rte_hash_cuckoo_make_space_mw_tm(h, prim_bkt, sig,
+							alt_hash, new_idx);
+
+		if (ret >= 0)
+			return new_idx - 1;
+
+		/* Also search secondary bucket to get better occupancy */
+		ret = rte_hash_cuckoo_make_space_mw_tm(h, sec_bkt, sig,
+							alt_hash, new_idx);
+
+		if (ret >= 0)
+			return new_idx - 1;
+	} else {
+#endif
+		for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
+			/* Check if slot is available */
+			if (likely(prim_bkt->signatures[i].sig == NULL_SIGNATURE)) {
+				prim_bkt->signatures[i].current = sig;
+				prim_bkt->signatures[i].alt = alt_hash;
+				prim_bkt->key_idx[i] = new_idx;
+				break;
+			}
+		}
+
+		if (i != RTE_HASH_BUCKET_ENTRIES) {
+			if (h->add_key == ADD_KEY_MULTIWRITER)
+				rte_spinlock_unlock(h->multiwriter_lock);
 			return new_idx - 1;
 		}
-	}
 
-	/* Primary bucket is full, so we need to make space for new entry */
-	ret = make_space_bucket(h, prim_bkt);
-	/*
-	 * After recursive function.
-	 * Insert the new entry in the position of the pushed entry
-	 * if successful or return error and
-	 * store the new slot back in the ring
-	 */
-	if (ret >= 0) {
-		prim_bkt->signatures[ret].current = sig;
-		prim_bkt->signatures[ret].alt = alt_hash;
-		prim_bkt->key_idx[ret] = new_idx;
-		return new_idx - 1;
+		/* Primary bucket full, need to make space for new entry
+		 * After recursive function.
+		 * Insert the new entry in the position of the pushed entry
+		 * if successful or return error and
+		 * store the new slot back in the ring
+		 */
+		ret = make_space_bucket(h, prim_bkt);
+		if (ret >= 0) {
+			prim_bkt->signatures[ret].current = sig;
+			prim_bkt->signatures[ret].alt = alt_hash;
+			prim_bkt->key_idx[ret] = new_idx;
+			if (h->add_key == ADD_KEY_MULTIWRITER)
+				rte_spinlock_unlock(h->multiwriter_lock);
+			return new_idx - 1;
+		}
+#if defined(RTE_ARCH_X86)
 	}
-
+#endif
 	/* Error in addition, store new slot back in the ring and return error */
 	enqueue_slot_back(h, cached_free_slots, (void *)((uintptr_t) new_idx));
 
+	if (h->add_key == ADD_KEY_MULTIWRITER)
+		rte_spinlock_unlock(h->multiwriter_lock);
 	return ret;
 }
 
diff --git a/lib/librte_hash/rte_cuckoo_hash.h b/lib/librte_hash/rte_cuckoo_hash.h
new file mode 100644
index 00000000..6c76700f
--- /dev/null
+++ b/lib/librte_hash/rte_cuckoo_hash.h
@@ -0,0 +1,219 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* rte_cuckoo_hash.h
+ * This file hold Cuckoo Hash private data structures to allows include from
+ * platform specific files like rte_cuckoo_hash_x86.h
+ */
+
+#ifndef _RTE_CUCKOO_HASH_H_
+#define _RTE_CUCKOO_HASH_H_
+
+#if defined(RTE_ARCH_X86)
+#include "rte_cmp_x86.h"
+#endif
+
+#if defined(RTE_ARCH_ARM64)
+#include "rte_cmp_arm64.h"
+#endif
+
+/* Macro to enable/disable run-time checking of function parameters */
+#if defined(RTE_LIBRTE_HASH_DEBUG)
+#define RETURN_IF_TRUE(cond, retval) do { \
+	if (cond) \
+		return retval; \
+} while (0)
+#else
+#define RETURN_IF_TRUE(cond, retval)
+#endif
+
+/* Hash function used if none is specified */
+#if defined(RTE_MACHINE_CPUFLAG_SSE4_2) || defined(RTE_MACHINE_CPUFLAG_CRC32)
+#include <rte_hash_crc.h>
+#define DEFAULT_HASH_FUNC       rte_hash_crc
+#else
+#include <rte_jhash.h>
+#define DEFAULT_HASH_FUNC       rte_jhash
+#endif
+
+#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
+/*
+ * All different options to select a key compare function,
+ * based on the key size and custom function.
+ */
+enum cmp_jump_table_case {
+	KEY_CUSTOM = 0,
+	KEY_16_BYTES,
+	KEY_32_BYTES,
+	KEY_48_BYTES,
+	KEY_64_BYTES,
+	KEY_80_BYTES,
+	KEY_96_BYTES,
+	KEY_112_BYTES,
+	KEY_128_BYTES,
+	KEY_OTHER_BYTES,
+	NUM_KEY_CMP_CASES,
+};
+
+/*
+ * Table storing all different key compare functions
+ * (multi-process supported)
+ */
+const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = {
+	NULL,
+	rte_hash_k16_cmp_eq,
+	rte_hash_k32_cmp_eq,
+	rte_hash_k48_cmp_eq,
+	rte_hash_k64_cmp_eq,
+	rte_hash_k80_cmp_eq,
+	rte_hash_k96_cmp_eq,
+	rte_hash_k112_cmp_eq,
+	rte_hash_k128_cmp_eq,
+	memcmp
+};
+#else
+/*
+ * All different options to select a key compare function,
+ * based on the key size and custom function.
+ */
+enum cmp_jump_table_case {
+	KEY_CUSTOM = 0,
+	KEY_OTHER_BYTES,
+	NUM_KEY_CMP_CASES,
+};
+
+/*
+ * Table storing all different key compare functions
+ * (multi-process supported)
+ */
+const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = {
+	NULL,
+	memcmp
+};
+
+#endif
+
+enum add_key_case {
+	ADD_KEY_SINGLEWRITER = 0,
+	ADD_KEY_MULTIWRITER,
+	ADD_KEY_MULTIWRITER_TM,
+};
+
+/** Number of items per bucket. */
+#define RTE_HASH_BUCKET_ENTRIES		4
+
+#define NULL_SIGNATURE			0
+
+#define KEY_ALIGNMENT			16
+
+#define LCORE_CACHE_SIZE		64
+
+#define RTE_HASH_BFS_QUEUE_MAX_LEN       1000
+
+#define RTE_XABORT_CUCKOO_PATH_INVALIDED 0x4
+
+#define RTE_HASH_TSX_MAX_RETRY  10
+
+struct lcore_cache {
+	unsigned len; /**< Cache len */
+	void *objs[LCORE_CACHE_SIZE]; /**< Cache objects */
+} __rte_cache_aligned;
+
+/* Structure storing both primary and secondary hashes */
+struct rte_hash_signatures {
+	union {
+		struct {
+			hash_sig_t current;
+			hash_sig_t alt;
+		};
+		uint64_t sig;
+	};
+};
+
+/* Structure that stores key-value pair */
+struct rte_hash_key {
+	union {
+		uintptr_t idata;
+		void *pdata;
+	};
+	/* Variable key size */
+	char key[0];
+} __attribute__((aligned(KEY_ALIGNMENT)));
+
+/** Bucket structure */
+struct rte_hash_bucket {
+	struct rte_hash_signatures signatures[RTE_HASH_BUCKET_ENTRIES];
+	/* Includes dummy key index that always contains index 0 */
+	uint32_t key_idx[RTE_HASH_BUCKET_ENTRIES + 1];
+	uint8_t flag[RTE_HASH_BUCKET_ENTRIES];
+} __rte_cache_aligned;
+
+/** A hash table structure. */
+struct rte_hash {
+	char name[RTE_HASH_NAMESIZE];   /**< Name of the hash. */
+	uint32_t entries;               /**< Total table entries. */
+	uint32_t num_buckets;           /**< Number of buckets in table. */
+	uint32_t key_len;               /**< Length of hash key. */
+	rte_hash_function hash_func;    /**< Function used to calculate hash. */
+	uint32_t hash_func_init_val;    /**< Init value used by hash_func. */
+	rte_hash_cmp_eq_t rte_hash_custom_cmp_eq;
+	/**< Custom function used to compare keys. */
+	enum cmp_jump_table_case cmp_jump_table_idx;
+	/**< Indicates which compare function to use. */
+	uint32_t bucket_bitmask;        /**< Bitmask for getting bucket index
+						from hash signature. */
+	uint32_t key_entry_size;         /**< Size of each key entry. */
+
+	struct rte_ring *free_slots;    /**< Ring that stores all indexes
+						of the free slots in the key table */
+	void *key_store;                /**< Table storing all keys and data */
+	struct rte_hash_bucket *buckets;	/**< Table with buckets storing all the
+							hash values and key indexes
+							to the key table*/
+	uint8_t hw_trans_mem_support;	/**< Hardware transactional
+							memory support */
+	struct lcore_cache *local_free_slots;
+	/**< Local cache per lcore, storing some indexes of the free slots */
+	enum add_key_case add_key; /**< Multi-writer hash add behavior */
+
+	rte_spinlock_t *multiwriter_lock; /**< Multi-writer spinlock for w/o TM */
+} __rte_cache_aligned;
+
+struct queue_node {
+	struct rte_hash_bucket *bkt; /* Current bucket on the bfs search */
+
+	struct queue_node *prev;     /* Parent(bucket) in search path */
+	int prev_slot;               /* Parent(slot) in search path */
+};
+
+#endif
diff --git a/lib/librte_hash/rte_cuckoo_hash_x86.h b/lib/librte_hash/rte_cuckoo_hash_x86.h
new file mode 100644
index 00000000..fa5630b7
--- /dev/null
+++ b/lib/librte_hash/rte_cuckoo_hash_x86.h
@@ -0,0 +1,193 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* rte_cuckoo_hash_x86.h
+ * This file holds all x86 specific Cuckoo Hash functions
+ */
+
+/* Only tries to insert at one bucket (@prim_bkt) without trying to push
+ * buckets around
+ */
+static inline unsigned
+rte_hash_cuckoo_insert_mw_tm(struct rte_hash_bucket *prim_bkt,
+		hash_sig_t sig, hash_sig_t alt_hash, uint32_t new_idx)
+{
+	unsigned i, status;
+	unsigned try = 0;
+
+	while (try < RTE_HASH_TSX_MAX_RETRY) {
+		status = rte_xbegin();
+		if (likely(status == RTE_XBEGIN_STARTED)) {
+			/* Insert new entry if there is room in the primary
+			* bucket.
+			*/
+			for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
+				/* Check if slot is available */
+				if (likely(prim_bkt->signatures[i].sig ==
+						NULL_SIGNATURE)) {
+					prim_bkt->signatures[i].current = sig;
+					prim_bkt->signatures[i].alt = alt_hash;
+					prim_bkt->key_idx[i] = new_idx;
+					break;
+				}
+			}
+			rte_xend();
+
+			if (i != RTE_HASH_BUCKET_ENTRIES)
+				return 0;
+
+			break; /* break off try loop if transaction commits */
+		} else {
+			/* If we abort we give up this cuckoo path. */
+			try++;
+			rte_pause();
+		}
+	}
+
+	return -1;
+}
+
+/* Shift buckets along provided cuckoo_path (@leaf and @leaf_slot) and fill
+ * the path head with new entry (sig, alt_hash, new_idx)
+ */
+static inline int
+rte_hash_cuckoo_move_insert_mw_tm(const struct rte_hash *h,
+			struct queue_node *leaf, uint32_t leaf_slot,
+			hash_sig_t sig, hash_sig_t alt_hash, uint32_t new_idx)
+{
+	unsigned try = 0;
+	unsigned status;
+	uint32_t prev_alt_bkt_idx;
+
+	struct queue_node *prev_node, *curr_node = leaf;
+	struct rte_hash_bucket *prev_bkt, *curr_bkt = leaf->bkt;
+	uint32_t prev_slot, curr_slot = leaf_slot;
+
+	while (try < RTE_HASH_TSX_MAX_RETRY) {
+		status = rte_xbegin();
+		if (likely(status == RTE_XBEGIN_STARTED)) {
+			while (likely(curr_node->prev != NULL)) {
+				prev_node = curr_node->prev;
+				prev_bkt = prev_node->bkt;
+				prev_slot = curr_node->prev_slot;
+
+				prev_alt_bkt_idx
+					= prev_bkt->signatures[prev_slot].alt
+					    & h->bucket_bitmask;
+
+				if (unlikely(&h->buckets[prev_alt_bkt_idx]
+					     != curr_bkt)) {
+					rte_xabort(RTE_XABORT_CUCKOO_PATH_INVALIDED);
+				}
+
+				/* Need to swap current/alt sig to allow later
+				 * Cuckoo insert to move elements back to its
+				 * primary bucket if available
+				 */
+				curr_bkt->signatures[curr_slot].alt =
+				    prev_bkt->signatures[prev_slot].current;
+				curr_bkt->signatures[curr_slot].current =
+				    prev_bkt->signatures[prev_slot].alt;
+				curr_bkt->key_idx[curr_slot]
+				    = prev_bkt->key_idx[prev_slot];
+
+				curr_slot = prev_slot;
+				curr_node = prev_node;
+				curr_bkt = curr_node->bkt;
+			}
+
+			curr_bkt->signatures[curr_slot].current = sig;
+			curr_bkt->signatures[curr_slot].alt = alt_hash;
+			curr_bkt->key_idx[curr_slot] = new_idx;
+
+			rte_xend();
+
+			return 0;
+		}
+
+		/* If we abort we give up this cuckoo path, since most likely it's
+		 * no longer valid as TSX detected data conflict
+		 */
+		try++;
+		rte_pause();
+	}
+
+	return -1;
+}
+
+/*
+ * Make space for new key, using bfs Cuckoo Search and Multi-Writer safe
+ * Cuckoo
+ */
+static inline int
+rte_hash_cuckoo_make_space_mw_tm(const struct rte_hash *h,
+			struct rte_hash_bucket *bkt,
+			hash_sig_t sig, hash_sig_t alt_hash,
+			uint32_t new_idx)
+{
+	unsigned i;
+	struct queue_node queue[RTE_HASH_BFS_QUEUE_MAX_LEN];
+	struct queue_node *tail, *head;
+	struct rte_hash_bucket *curr_bkt, *alt_bkt;
+
+	tail = queue;
+	head = queue + 1;
+	tail->bkt = bkt;
+	tail->prev = NULL;
+	tail->prev_slot = -1;
+
+	/* Cuckoo bfs Search */
+	while (likely(tail != head && head <
+					queue + RTE_HASH_BFS_QUEUE_MAX_LEN - 4)) {
+		curr_bkt = tail->bkt;
+		for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
+			if (curr_bkt->signatures[i].sig == NULL_SIGNATURE) {
+				if (likely(rte_hash_cuckoo_move_insert_mw_tm(h,
+						tail, i, sig,
+						alt_hash, new_idx) == 0))
+					return 0;
+			}
+
+			/* Enqueue new node and keep prev node info */
+			alt_bkt = &(h->buckets[curr_bkt->signatures[i].alt
+						    & h->bucket_bitmask]);
+			head->bkt = alt_bkt;
+			head->prev = tail;
+			head->prev_slot = i;
+			head++;
+		}
+		tail++;
+	}
+
+	return -ENOSPC;
+}
diff --git a/lib/librte_hash/rte_hash.h b/lib/librte_hash/rte_hash.h
index ae00b658..c9612fbd 100644
--- a/lib/librte_hash/rte_hash.h
+++ b/lib/librte_hash/rte_hash.h
@@ -60,6 +60,9 @@ extern "C" {
 /** Enable Hardware transactional memory support. */
 #define RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT	0x01
 
+/** Default behavior of insertion, single writer/multi writer */
+#define RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD 0x02
+
 /** Signature of key that is stored internally. */
 typedef uint32_t hash_sig_t;
 
@@ -362,8 +365,6 @@ rte_hash_lookup_with_hash(const struct rte_hash *h,
 hash_sig_t
 rte_hash_hash(const struct rte_hash *h, const void *key);
 
-#define rte_hash_lookup_multi rte_hash_lookup_bulk
-#define rte_hash_lookup_multi_data rte_hash_lookup_bulk_data
 /**
  * Find multiple keys in the hash table.
  * This operation is multi-thread safe.
diff --git a/lib/librte_ip_frag/Makefile b/lib/librte_ip_frag/Makefile
index 9d06780d..e97dfbd3 100644
--- a/lib/librte_ip_frag/Makefile
+++ b/lib/librte_ip_frag/Makefile
@@ -52,8 +52,9 @@ SRCS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += ip_frag_internal.c
 # install this header file
 SYMLINK-$(CONFIG_RTE_LIBRTE_IP_FRAG)-include += rte_ip_frag.h
 
-
-# this library depends on rte_ether
-DEPDIRS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += lib/librte_mempool lib/librte_ether
+DEPDIRS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += lib/librte_eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += lib/librte_ether
+DEPDIRS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += lib/librte_mbuf
+DEPDIRS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += lib/librte_mempool
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_ip_frag/ip_frag_common.h b/lib/librte_ip_frag/ip_frag_common.h
index cde6ed4a..835e4f93 100644
--- a/lib/librte_ip_frag/ip_frag_common.h
+++ b/lib/librte_ip_frag/ip_frag_common.h
@@ -38,17 +38,9 @@
 
 /* logging macros. */
 #ifdef RTE_LIBRTE_IP_FRAG_DEBUG
-
 #define	IP_FRAG_LOG(lvl, fmt, args...)	RTE_LOG(lvl, USER1, fmt, ##args)
-
-#define	IP_FRAG_ASSERT(exp)					\
-if (!(exp))	{							\
-	rte_panic("function %s, line%d\tassert \"" #exp "\" failed\n",	\
-		__func__, __LINE__);					\
-}
 #else
 #define	IP_FRAG_LOG(lvl, fmt, args...)	do {} while(0)
-#define IP_FRAG_ASSERT(exp)	do {} while (0)
 #endif /* IP_FRAG_DEBUG */
 
 #define IPV4_KEYLEN 1
@@ -76,8 +68,8 @@ struct ip_frag_pkt * ip_frag_lookup(struct rte_ip_frag_tbl *tbl,
 	struct ip_frag_pkt **free, struct ip_frag_pkt **stale);
 
 /* these functions need to be declared here as ip_frag_process relies on them */
-struct rte_mbuf * ipv4_frag_reassemble(const struct ip_frag_pkt *fp);
-struct rte_mbuf * ipv6_frag_reassemble(const struct ip_frag_pkt *fp);
+struct rte_mbuf *ipv4_frag_reassemble(struct ip_frag_pkt *fp);
+struct rte_mbuf *ipv6_frag_reassemble(struct ip_frag_pkt *fp);
 
 
 
diff --git a/lib/librte_ip_frag/rte_ipv4_fragmentation.c b/lib/librte_ip_frag/rte_ipv4_fragmentation.c
index a4ed9238..a2259e80 100644
--- a/lib/librte_ip_frag/rte_ipv4_fragmentation.c
+++ b/lib/librte_ip_frag/rte_ipv4_fragmentation.c
@@ -107,7 +107,7 @@ rte_ipv4_fragment_packet(struct rte_mbuf *pkt_in,
 	frag_size = (uint16_t)(mtu_size - sizeof(struct ipv4_hdr));
 
 	/* Fragment size should be a multiply of 8. */
-	IP_FRAG_ASSERT((frag_size & IPV4_HDR_FO_MASK) == 0);
+	RTE_ASSERT((frag_size & IPV4_HDR_FO_MASK) == 0);
 
 	in_hdr = rte_pktmbuf_mtod(pkt_in, struct ipv4_hdr *);
 	flag_offset = rte_cpu_to_be_16(in_hdr->fragment_offset);
diff --git a/lib/librte_ip_frag/rte_ipv4_reassembly.c b/lib/librte_ip_frag/rte_ipv4_reassembly.c
index 26d07f9a..e084ca59 100644
--- a/lib/librte_ip_frag/rte_ipv4_reassembly.c
+++ b/lib/librte_ip_frag/rte_ipv4_reassembly.c
@@ -41,11 +41,12 @@
  * Reassemble fragments into one packet.
  */
 struct rte_mbuf *
-ipv4_frag_reassemble(const struct ip_frag_pkt *fp)
+ipv4_frag_reassemble(struct ip_frag_pkt *fp)
 {
 	struct ipv4_hdr *ip_hdr;
 	struct rte_mbuf *m, *prev;
 	uint32_t i, n, ofs, first_len;
+	uint32_t curr_idx = 0;
 
 	first_len = fp->frags[IP_FIRST_FRAG_IDX].len;
 	n = fp->last_idx - 1;
@@ -53,6 +54,7 @@ ipv4_frag_reassemble(const struct ip_frag_pkt *fp)
 	/*start from the last fragment. */
 	m = fp->frags[IP_LAST_FRAG_IDX].mb;
 	ofs = fp->frags[IP_LAST_FRAG_IDX].ofs;
+	curr_idx = IP_LAST_FRAG_IDX;
 
 	while (ofs != first_len) {
 
@@ -67,6 +69,10 @@ ipv4_frag_reassemble(const struct ip_frag_pkt *fp)
 				rte_pktmbuf_adj(m, (uint16_t)(m->l2_len + m->l3_len));
 				rte_pktmbuf_chain(fp->frags[i].mb, m);
 
+				/* this mbuf should not be accessed directly */
+				fp->frags[curr_idx].mb = NULL;
+				curr_idx = i;
+
 				/* update our last fragment and offset. */
 				m = fp->frags[i].mb;
 				ofs = fp->frags[i].ofs;
diff --git a/lib/librte_ip_frag/rte_ipv6_fragmentation.c b/lib/librte_ip_frag/rte_ipv6_fragmentation.c
index 1e30004f..db666bbf 100644
--- a/lib/librte_ip_frag/rte_ipv6_fragmentation.c
+++ b/lib/librte_ip_frag/rte_ipv6_fragmentation.c
@@ -110,7 +110,7 @@ rte_ipv6_fragment_packet(struct rte_mbuf *pkt_in,
 	frag_size = (uint16_t)(mtu_size - sizeof(struct ipv6_hdr));
 
 	/* Fragment size should be a multiple of 8. */
-	IP_FRAG_ASSERT((frag_size & ~RTE_IPV6_EHDR_FO_MASK) == 0);
+	RTE_ASSERT((frag_size & ~RTE_IPV6_EHDR_FO_MASK) == 0);
 
 	/* Check that pkts_out is big enough to hold all fragments */
 	if (unlikely (frag_size * nb_pkts_out <
diff --git a/lib/librte_ip_frag/rte_ipv6_reassembly.c b/lib/librte_ip_frag/rte_ipv6_reassembly.c
index d29cb1da..21a5ef5d 100644
--- a/lib/librte_ip_frag/rte_ipv6_reassembly.c
+++ b/lib/librte_ip_frag/rte_ipv6_reassembly.c
@@ -59,13 +59,14 @@ ip_frag_memmove(char *dst, char *src, int len)
  * Reassemble fragments into one packet.
  */
 struct rte_mbuf *
-ipv6_frag_reassemble(const struct ip_frag_pkt *fp)
+ipv6_frag_reassemble(struct ip_frag_pkt *fp)
 {
 	struct ipv6_hdr *ip_hdr;
 	struct ipv6_extension_fragment *frag_hdr;
 	struct rte_mbuf *m, *prev;
 	uint32_t i, n, ofs, first_len;
 	uint32_t last_len, move_len, payload_len;
+	uint32_t curr_idx = 0;
 
 	first_len = fp->frags[IP_FIRST_FRAG_IDX].len;
 	n = fp->last_idx - 1;
@@ -74,6 +75,7 @@ ipv6_frag_reassemble(const struct ip_frag_pkt *fp)
 	m = fp->frags[IP_LAST_FRAG_IDX].mb;
 	ofs = fp->frags[IP_LAST_FRAG_IDX].ofs;
 	last_len = fp->frags[IP_LAST_FRAG_IDX].len;
+	curr_idx = IP_LAST_FRAG_IDX;
 
 	payload_len = ofs + last_len;
 
@@ -90,6 +92,10 @@ ipv6_frag_reassemble(const struct ip_frag_pkt *fp)
 				rte_pktmbuf_adj(m, (uint16_t)(m->l2_len + m->l3_len));
 				rte_pktmbuf_chain(fp->frags[i].mb, m);
 
+				/* this mbuf should not be accessed directly */
+				fp->frags[curr_idx].mb = NULL;
+				curr_idx = i;
+
 				/* update our last fragment and offset. */
 				m = fp->frags[i].mb;
 				ofs = fp->frags[i].ofs;
diff --git a/lib/librte_ivshmem/Makefile b/lib/librte_ivshmem/Makefile
index 16defdba..c099438c 100644
--- a/lib/librte_ivshmem/Makefile
+++ b/lib/librte_ivshmem/Makefile
@@ -46,7 +46,9 @@ SRCS-$(CONFIG_RTE_LIBRTE_IVSHMEM) := rte_ivshmem.c
 # install includes
 SYMLINK-$(CONFIG_RTE_LIBRTE_IVSHMEM)-include := rte_ivshmem.h
 
-# this lib needs eal
+# this lib needs EAL, ring and mempool
+DEPDIRS-$(CONFIG_RTE_LIBRTE_IVSHMEM) += lib/librte_eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_IVSHMEM) += lib/librte_ring
 DEPDIRS-$(CONFIG_RTE_LIBRTE_IVSHMEM) += lib/librte_mempool
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_ivshmem/rte_ivshmem.c b/lib/librte_ivshmem/rte_ivshmem.c
index c8b332ce..c26edb61 100644
--- a/lib/librte_ivshmem/rte_ivshmem.c
+++ b/lib/librte_ivshmem/rte_ivshmem.c
@@ -548,26 +548,40 @@ add_ring_to_metadata(const struct rte_ring * r,
 }
 
 static int
-add_mempool_to_metadata(const struct rte_mempool * mp,
-		struct ivshmem_config * config)
+add_mempool_memzone_to_metadata(const void *addr,
+		struct ivshmem_config *config)
 {
-	struct rte_memzone * mz;
-	int ret;
+	struct rte_memzone *mz;
 
-	mz = get_memzone_by_addr(mp);
-	ret = 0;
+	mz = get_memzone_by_addr(addr);
 
 	if (!mz) {
 		RTE_LOG(ERR, EAL, "Cannot find memzone for mempool!\n");
 		return -1;
 	}
 
-	/* mempool consists of memzone and ring */
-	ret = add_memzone_to_metadata(mz, config);
+	return add_memzone_to_metadata(mz, config);
+}
+
+static int
+add_mempool_to_metadata(const struct rte_mempool *mp,
+		struct ivshmem_config *config)
+{
+	struct rte_mempool_memhdr *memhdr;
+	int ret;
+
+	ret = add_mempool_memzone_to_metadata(mp, config);
 	if (ret < 0)
 		return -1;
 
-	return add_ring_to_metadata(mp->ring, config);
+	STAILQ_FOREACH(memhdr, &mp->mem_list, next) {
+		ret = add_mempool_memzone_to_metadata(memhdr->addr, config);
+		if (ret < 0)
+			return -1;
+	}
+
+	/* mempool consists of memzone and ring */
+	return add_ring_to_metadata(mp->pool_data, config);
 }
 
 int
diff --git a/lib/librte_jobstats/rte_jobstats.h b/lib/librte_jobstats/rte_jobstats.h
index c2b285f8..b3686030 100644
--- a/lib/librte_jobstats/rte_jobstats.h
+++ b/lib/librte_jobstats/rte_jobstats.h
@@ -85,7 +85,7 @@ struct rte_jobstats {
 	/**< Minimum execute time. */
 
 	uint64_t max_exec_time;
-	/**< Minimum execute time. */
+	/**< Maximum execute time. */
 
 	uint64_t exec_cnt;
 	/**< Execute count. */
diff --git a/lib/librte_kni/Makefile b/lib/librte_kni/Makefile
index 1398164d..09474461 100644
--- a/lib/librte_kni/Makefile
+++ b/lib/librte_kni/Makefile
@@ -46,8 +46,9 @@ SRCS-$(CONFIG_RTE_LIBRTE_KNI) := rte_kni.c
 # install includes
 SYMLINK-$(CONFIG_RTE_LIBRTE_KNI)-include := rte_kni.h
 
-# this lib needs eal
-DEPDIRS-$(CONFIG_RTE_LIBRTE_KNI) += lib/librte_eal lib/librte_mbuf
+DEPDIRS-$(CONFIG_RTE_LIBRTE_KNI) += lib/librte_eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_KNI) += lib/librte_mbuf
+DEPDIRS-$(CONFIG_RTE_LIBRTE_KNI) += lib/librte_mempool
 DEPDIRS-$(CONFIG_RTE_LIBRTE_KNI) += lib/librte_ether
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c
index ea9baf4c..3028fd43 100644
--- a/lib/librte_kni/rte_kni.c
+++ b/lib/librte_kni/rte_kni.c
@@ -323,6 +323,7 @@ rte_kni_alloc(struct rte_mempool *pktmbuf_pool,
 	char intf_name[RTE_KNI_NAMESIZE];
 	char mz_name[RTE_MEMZONE_NAMESIZE];
 	const struct rte_memzone *mz;
+	const struct rte_mempool *mp;
 	struct rte_kni_memzone_slot *slot = NULL;
 
 	if (!pktmbuf_pool || !conf || !conf->name[0])
@@ -415,12 +416,17 @@ rte_kni_alloc(struct rte_mempool *pktmbuf_pool,
 
 
 	/* MBUF mempool */
-	snprintf(mz_name, sizeof(mz_name), RTE_MEMPOOL_OBJ_NAME,
+	snprintf(mz_name, sizeof(mz_name), RTE_MEMPOOL_MZ_FORMAT,
 		pktmbuf_pool->name);
 	mz = rte_memzone_lookup(mz_name);
 	KNI_MEM_CHECK(mz == NULL);
-	dev_info.mbuf_va = mz->addr;
-	dev_info.mbuf_phys = mz->phys_addr;
+	mp = (struct rte_mempool *)mz->addr;
+	/* KNI currently requires to have only one memory chunk */
+	if (mp->nb_mem_chunks != 1)
+		goto kni_fail;
+
+	dev_info.mbuf_va = STAILQ_FIRST(&mp->mem_list)->addr;
+	dev_info.mbuf_phys = STAILQ_FIRST(&mp->mem_list)->phys_addr;
 	ctx->pktmbuf_pool = pktmbuf_pool;
 	ctx->group_id = conf->group_id;
 	ctx->slot_id = slot->id;
diff --git a/lib/librte_kni/rte_kni.h b/lib/librte_kni/rte_kni.h
index 9899a170..7363e6cf 100644
--- a/lib/librte_kni/rte_kni.h
+++ b/lib/librte_kni/rte_kni.h
@@ -113,6 +113,9 @@ void rte_kni_init(unsigned int max_kni_ifaces);
  * The rte_kni_alloc shall not be called before rte_kni_init() has been
  * called. rte_kni_alloc is thread safe.
  *
+ * The mempool should have capacity of more than "2 x KNI_FIFO_COUNT_MAX"
+ * elements for each KNI interface allocated.
+ *
  * @param pktmbuf_pool
  *  The mempool for allocting mbufs for packets.
  * @param conf
@@ -160,8 +163,8 @@ int rte_kni_handle_request(struct rte_kni *kni);
 /**
  * Retrieve a burst of packets from a KNI interface. The retrieved packets are
  * stored in rte_mbuf structures whose pointers are supplied in the array of
- * mbufs, and the maximum number is indicated by num. It handles the freeing of
- * the mbufs in the free queue of KNI interface.
+ * mbufs, and the maximum number is indicated by num. It handles allocating
+ * the mbufs for KNI interface alloc queue.
  *
  * @param kni
  *  The KNI interface context.
@@ -179,8 +182,8 @@ unsigned rte_kni_rx_burst(struct rte_kni *kni, struct rte_mbuf **mbufs,
 /**
  * Send a burst of packets to a KNI interface. The packets to be sent out are
  * stored in rte_mbuf structures whose pointers are supplied in the array of
- * mbufs, and the maximum number is indicated by num. It handles allocating
- * the mbufs for KNI interface alloc queue.
+ * mbufs, and the maximum number is indicated by num. It handles the freeing of
+ * the mbufs in the free queue of KNI interface.
  *
  * @param kni
  *  The KNI interface context.
diff --git a/lib/librte_lpm/rte_lpm.c b/lib/librte_lpm/rte_lpm.c
index 8bdf6065..6f65d1c2 100644
--- a/lib/librte_lpm/rte_lpm.c
+++ b/lib/librte_lpm/rte_lpm.c
@@ -373,7 +373,6 @@ rte_lpm_free_v20(struct rte_lpm_v20 *lpm)
 
 	rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
 
-	rte_free(lpm->rules_tbl);
 	rte_free(lpm);
 	rte_free(te);
 }
diff --git a/lib/librte_lpm/rte_lpm6.c b/lib/librte_lpm/rte_lpm6.c
index ba4353ce..32fdba01 100644
--- a/lib/librte_lpm/rte_lpm6.c
+++ b/lib/librte_lpm/rte_lpm6.c
@@ -601,7 +601,7 @@ int
 rte_lpm6_lookup(const struct rte_lpm6 *lpm, uint8_t *ip, uint8_t *next_hop)
 {
 	const struct rte_lpm6_tbl_entry *tbl;
-	const struct rte_lpm6_tbl_entry *tbl_next;
+	const struct rte_lpm6_tbl_entry *tbl_next = NULL;
 	int status;
 	uint8_t first_byte;
 	uint32_t tbl24_index;
@@ -636,7 +636,7 @@ rte_lpm6_lookup_bulk_func(const struct rte_lpm6 *lpm,
 {
 	unsigned i;
 	const struct rte_lpm6_tbl_entry *tbl;
-	const struct rte_lpm6_tbl_entry *tbl_next;
+	const struct rte_lpm6_tbl_entry *tbl_next = NULL;
 	uint32_t tbl24_index;
 	uint8_t first_byte, next_hop;
 	int status;
diff --git a/lib/librte_lpm/rte_lpm6.h b/lib/librte_lpm/rte_lpm6.h
index cedcea8d..13d027f9 100644
--- a/lib/librte_lpm/rte_lpm6.h
+++ b/lib/librte_lpm/rte_lpm6.h
@@ -38,6 +38,8 @@
  * RTE Longest Prefix Match for IPv6 (LPM6)
  */
 
+#include <stdint.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
diff --git a/lib/librte_mbuf/rte_mbuf.c b/lib/librte_mbuf/rte_mbuf.c
index dc0467c9..601e5282 100644
--- a/lib/librte_mbuf/rte_mbuf.c
+++ b/lib/librte_mbuf/rte_mbuf.c
@@ -86,7 +86,7 @@ rte_pktmbuf_pool_init(struct rte_mempool *mp, void *opaque_arg)
 	struct rte_pktmbuf_pool_private default_mbp_priv;
 	uint16_t roomsz;
 
-	RTE_MBUF_ASSERT(mp->elt_size >= sizeof(struct rte_mbuf));
+	RTE_ASSERT(mp->elt_size >= sizeof(struct rte_mbuf));
 
 	/* if no structure is provided, assume no mbuf private area */
 	user_mbp_priv = opaque_arg;
@@ -100,7 +100,7 @@ rte_pktmbuf_pool_init(struct rte_mempool *mp, void *opaque_arg)
 		user_mbp_priv = &default_mbp_priv;
 	}
 
-	RTE_MBUF_ASSERT(mp->elt_size >= sizeof(struct rte_mbuf) +
+	RTE_ASSERT(mp->elt_size >= sizeof(struct rte_mbuf) +
 		user_mbp_priv->mbuf_data_room_size +
 		user_mbp_priv->mbuf_priv_size);
 
@@ -126,9 +126,9 @@ rte_pktmbuf_init(struct rte_mempool *mp,
 	mbuf_size = sizeof(struct rte_mbuf) + priv_size;
 	buf_len = rte_pktmbuf_data_room_size(mp);
 
-	RTE_MBUF_ASSERT(RTE_ALIGN(priv_size, RTE_MBUF_PRIV_ALIGN) == priv_size);
-	RTE_MBUF_ASSERT(mp->elt_size >= mbuf_size);
-	RTE_MBUF_ASSERT(buf_len <= UINT16_MAX);
+	RTE_ASSERT(RTE_ALIGN(priv_size, RTE_MBUF_PRIV_ALIGN) == priv_size);
+	RTE_ASSERT(mp->elt_size >= mbuf_size);
+	RTE_ASSERT(buf_len <= UINT16_MAX);
 
 	memset(m, 0, mp->elt_size);
 
@@ -153,6 +153,7 @@ rte_pktmbuf_pool_create(const char *name, unsigned n,
 	unsigned cache_size, uint16_t priv_size, uint16_t data_room_size,
 	int socket_id)
 {
+	struct rte_mempool *mp;
 	struct rte_pktmbuf_pool_private mbp_priv;
 	unsigned elt_size;
 
@@ -167,10 +168,27 @@ rte_pktmbuf_pool_create(const char *name, unsigned n,
 	mbp_priv.mbuf_data_room_size = data_room_size;
 	mbp_priv.mbuf_priv_size = priv_size;
 
-	return rte_mempool_create(name, n, elt_size,
-		cache_size, sizeof(struct rte_pktmbuf_pool_private),
-		rte_pktmbuf_pool_init, &mbp_priv, rte_pktmbuf_init, NULL,
-		socket_id, 0);
+	mp = rte_mempool_create_empty(name, n, elt_size, cache_size,
+		 sizeof(struct rte_pktmbuf_pool_private), socket_id, 0);
+	if (mp == NULL)
+		return NULL;
+
+	rte_errno = rte_mempool_set_ops_byname(mp,
+			RTE_MBUF_DEFAULT_MEMPOOL_OPS, NULL);
+	if (rte_errno != 0) {
+		RTE_LOG(ERR, MBUF, "error setting mempool handler\n");
+		return NULL;
+	}
+	rte_pktmbuf_pool_init(mp, &mbp_priv);
+
+	if (rte_mempool_populate_default(mp) < 0) {
+		rte_mempool_free(mp);
+		return NULL;
+	}
+
+	rte_mempool_obj_iter(mp, rte_pktmbuf_init, NULL);
+
+	return mp;
 }
 
 /* do some sanity checks on a mbuf: panic if it fails */
@@ -218,7 +236,7 @@ rte_pktmbuf_dump(FILE *f, const struct rte_mbuf *m, unsigned dump_len)
 
 	__rte_mbuf_sanity_check(m, 1);
 
-	fprintf(f, "dump mbuf at 0x%p, phys=%"PRIx64", buf_len=%u\n",
+	fprintf(f, "dump mbuf at %p, phys=%"PRIx64", buf_len=%u\n",
 	       m, (uint64_t)m->buf_physaddr, (unsigned)m->buf_len);
 	fprintf(f, "  pkt_len=%"PRIu32", ol_flags=%"PRIx64", nb_segs=%u, "
 	       "in_port=%u\n", m->pkt_len, m->ol_flags,
@@ -228,7 +246,7 @@ rte_pktmbuf_dump(FILE *f, const struct rte_mbuf *m, unsigned dump_len)
 	while (m && nb_segs != 0) {
 		__rte_mbuf_sanity_check(m, 0);
 
-		fprintf(f, "  segment at 0x%p, data=0x%p, data_len=%u\n",
+		fprintf(f, "  segment at %p, data=%p, data_len=%u\n",
 			m, rte_pktmbuf_mtod(m, void *), (unsigned)m->data_len);
 		len = dump_len;
 		if (len > m->data_len)
@@ -254,12 +272,10 @@ const char *rte_get_rx_ol_flag_name(uint64_t mask)
 	case PKT_RX_L4_CKSUM_BAD: return "PKT_RX_L4_CKSUM_BAD";
 	case PKT_RX_IP_CKSUM_BAD: return "PKT_RX_IP_CKSUM_BAD";
 	case PKT_RX_EIP_CKSUM_BAD: return "PKT_RX_EIP_CKSUM_BAD";
-	/* case PKT_RX_OVERSIZE: return "PKT_RX_OVERSIZE"; */
-	/* case PKT_RX_HBUF_OVERFLOW: return "PKT_RX_HBUF_OVERFLOW"; */
-	/* case PKT_RX_RECIP_ERR: return "PKT_RX_RECIP_ERR"; */
-	/* case PKT_RX_MAC_ERR: return "PKT_RX_MAC_ERR"; */
+	case PKT_RX_VLAN_STRIPPED: return "PKT_RX_VLAN_STRIPPED";
 	case PKT_RX_IEEE1588_PTP: return "PKT_RX_IEEE1588_PTP";
 	case PKT_RX_IEEE1588_TMST: return "PKT_RX_IEEE1588_TMST";
+	case PKT_RX_QINQ_STRIPPED: return "PKT_RX_QINQ_STRIPPED";
 	default: return NULL;
 	}
 }
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index 75a227d8..101485fb 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -65,10 +65,6 @@
 extern "C" {
 #endif
 
-/* deprecated options */
-#pragma GCC poison RTE_MBUF_SCATTER_GATHER
-#pragma GCC poison RTE_MBUF_REFCNT
-
 /*
  * Packet Offload Features Flags. It also carry packet type information.
  * Critical resources. Both rx/tx shared these bits. Be cautious on any change
@@ -83,21 +79,51 @@ extern "C" {
  * Keep these flags synchronized with rte_get_rx_ol_flag_name() and
  * rte_get_tx_ol_flag_name().
  */
-#define PKT_RX_VLAN_PKT      (1ULL << 0)  /**< RX packet is a 802.1q VLAN packet. */
+
+/**
+ * RX packet is a 802.1q VLAN packet. This flag was set by PMDs when
+ * the packet is recognized as a VLAN, but the behavior between PMDs
+ * was not the same. This flag is kept for some time to avoid breaking
+ * applications and should be replaced by PKT_RX_VLAN_STRIPPED.
+ */
+#define PKT_RX_VLAN_PKT      (1ULL << 0)
+
 #define PKT_RX_RSS_HASH      (1ULL << 1)  /**< RX packet with RSS hash result. */
 #define PKT_RX_FDIR          (1ULL << 2)  /**< RX packet with FDIR match indicate. */
 #define PKT_RX_L4_CKSUM_BAD  (1ULL << 3)  /**< L4 cksum of RX pkt. is not OK. */
 #define PKT_RX_IP_CKSUM_BAD  (1ULL << 4)  /**< IP cksum of RX pkt. is not OK. */
 #define PKT_RX_EIP_CKSUM_BAD (1ULL << 5)  /**< External IP header checksum error. */
-#define PKT_RX_OVERSIZE      (0ULL << 0)  /**< Num of desc of an RX pkt oversize. */
-#define PKT_RX_HBUF_OVERFLOW (0ULL << 0)  /**< Header buffer overflow. */
-#define PKT_RX_RECIP_ERR     (0ULL << 0)  /**< Hardware processing error. */
-#define PKT_RX_MAC_ERR       (0ULL << 0)  /**< MAC error. */
+
+/**
+ * A vlan has been stripped by the hardware and its tci is saved in
+ * mbuf->vlan_tci. This can only happen if vlan stripping is enabled
+ * in the RX configuration of the PMD.
+ */
+#define PKT_RX_VLAN_STRIPPED (1ULL << 6)
+
+/* hole, some bits can be reused here  */
+
 #define PKT_RX_IEEE1588_PTP  (1ULL << 9)  /**< RX IEEE1588 L2 Ethernet PT Packet. */
 #define PKT_RX_IEEE1588_TMST (1ULL << 10) /**< RX IEEE1588 L2/L4 timestamped packet.*/
 #define PKT_RX_FDIR_ID       (1ULL << 13) /**< FD id reported if FDIR match. */
 #define PKT_RX_FDIR_FLX      (1ULL << 14) /**< Flexible bytes reported if FDIR match. */
-#define PKT_RX_QINQ_PKT      (1ULL << 15)  /**< RX packet with double VLAN stripped. */
+
+/**
+ * The 2 vlans have been stripped by the hardware and their tci are
+ * saved in mbuf->vlan_tci (inner) and mbuf->vlan_tci_outer (outer).
+ * This can only happen if vlan stripping is enabled in the RX
+ * configuration of the PMD. If this flag is set, PKT_RX_VLAN_STRIPPED
+ * must also be set.
+ */
+#define PKT_RX_QINQ_STRIPPED (1ULL << 15)
+
+/**
+ * Deprecated.
+ * RX packet with double VLAN stripped.
+ * This flag is replaced by PKT_RX_QINQ_STRIPPED.
+ */
+#define PKT_RX_QINQ_PKT      PKT_RX_QINQ_STRIPPED
+
 /* add new RX flags here */
 
 /* add new TX flags here */
@@ -278,6 +304,13 @@ extern "C" {
  */
 #define RTE_PTYPE_L2_ETHER_LLDP             0x00000004
 /**
+ * NSH (Network Service Header) packet type.
+ *
+ * Packet format:
+ * <'ether type'=0x894F>
+ */
+#define RTE_PTYPE_L2_ETHER_NSH              0x00000005
+/**
  * Mask of layer 2 packet types.
  * It is used for outer packet for tunneling cases.
  */
@@ -765,7 +798,10 @@ struct rte_mbuf {
 
 	/*
 	 * The packet type, which is the combination of outer/inner L2, L3, L4
-	 * and tunnel types.
+	 * and tunnel types. The packet_type is about data really present in the
+	 * mbuf. Example: if vlan stripping is enabled, a received vlan packet
+	 * would have RTE_PTYPE_L2_ETHER and not RTE_PTYPE_L2_VLAN because the
+	 * vlan is stripped from the data.
 	 */
 	union {
 		uint32_t packet_type; /**< L2/L3/L4 and tunnel information. */
@@ -782,7 +818,8 @@ struct rte_mbuf {
 
 	uint32_t pkt_len;         /**< Total pkt len: sum of all segments. */
 	uint16_t data_len;        /**< Amount of data in segment buffer. */
-	uint16_t vlan_tci;        /**< VLAN Tag Control Identifier (CPU order) */
+	/** VLAN TCI (CPU order), valid if PKT_RX_VLAN_STRIPPED is set. */
+	uint16_t vlan_tci;
 
 	union {
 		uint32_t rss;     /**< RSS hash result if RSS enabled */
@@ -808,7 +845,8 @@ struct rte_mbuf {
 
 	uint32_t seqn; /**< Sequence number. See also rte_reorder_insert() */
 
-	uint16_t vlan_tci_outer;  /**< Outer VLAN Tag Control Identifier (CPU order) */
+	/** Outer VLAN TCI (CPU order), valid if PKT_RX_QINQ_STRIPPED is set. */
+	uint16_t vlan_tci_outer;
 
 	/* second cache line - fields only used in slow path or on TX */
 	MARKER cacheline1 __rte_cache_min_aligned;
@@ -846,6 +884,44 @@ struct rte_mbuf {
 	uint16_t timesync;
 } __rte_cache_aligned;
 
+/**
+ * Prefetch the first part of the mbuf
+ *
+ * The first 64 bytes of the mbuf corresponds to fields that are used early
+ * in the receive path. If the cache line of the architecture is higher than
+ * 64B, the second part will also be prefetched.
+ *
+ * @param m
+ *   The pointer to the mbuf.
+ */
+static inline void
+rte_mbuf_prefetch_part1(struct rte_mbuf *m)
+{
+	rte_prefetch0(&m->cacheline0);
+}
+
+/**
+ * Prefetch the second part of the mbuf
+ *
+ * The next 64 bytes of the mbuf corresponds to fields that are used in the
+ * transmit path. If the cache line of the architecture is higher than 64B,
+ * this function does nothing as it is expected that the full mbuf is
+ * already in cache.
+ *
+ * @param m
+ *   The pointer to the mbuf.
+ */
+static inline void
+rte_mbuf_prefetch_part2(struct rte_mbuf *m)
+{
+#if RTE_CACHE_LINE_SIZE == 64
+	rte_prefetch0(&m->cacheline1);
+#else
+	RTE_SET_USED(m);
+#endif
+}
+
+
 static inline uint16_t rte_pktmbuf_priv_size(struct rte_mempool *mp);
 
 /**
@@ -936,29 +1012,11 @@ struct rte_pktmbuf_pool_private {
 /**  check mbuf type in debug mode */
 #define __rte_mbuf_sanity_check(m, is_h) rte_mbuf_sanity_check(m, is_h)
 
-/**  check mbuf type in debug mode if mbuf pointer is not null */
-#define __rte_mbuf_sanity_check_raw(m, is_h)	do {       \
-	if ((m) != NULL)                                   \
-		rte_mbuf_sanity_check(m, is_h);          \
-} while (0)
-
-/**  MBUF asserts in debug mode */
-#define RTE_MBUF_ASSERT(exp)                                         \
-if (!(exp)) {                                                        \
-	rte_panic("line%d\tassert \"" #exp "\" failed\n", __LINE__); \
-}
-
 #else /*  RTE_LIBRTE_MBUF_DEBUG */
 
 /**  check mbuf type in debug mode */
 #define __rte_mbuf_sanity_check(m, is_h) do { } while (0)
 
-/**  check mbuf type in debug mode if mbuf pointer is not null */
-#define __rte_mbuf_sanity_check_raw(m, is_h) do { } while (0)
-
-/**  MBUF asserts in debug mode */
-#define RTE_MBUF_ASSERT(exp)                do { } while (0)
-
 #endif /*  RTE_LIBRTE_MBUF_DEBUG */
 
 #ifdef RTE_MBUF_REFCNT_ATOMIC
@@ -1071,9 +1129,12 @@ void
 rte_mbuf_sanity_check(const struct rte_mbuf *m, int is_header);
 
 /**
- * @internal Allocate a new mbuf from mempool *mp*.
- * The use of that function is reserved for RTE internal needs.
- * Please use rte_pktmbuf_alloc().
+ * Allocate an unitialized mbuf from mempool *mp*.
+ *
+ * This function can be used by PMDs (especially in RX functions) to
+ * allocate an unitialized mbuf. The driver is responsible of
+ * initializing all the required fields. See rte_pktmbuf_reset().
+ * For standard needs, prefer rte_pktmbuf_alloc().
  *
  * @param mp
  *   The mempool from which mbuf is allocated.
@@ -1081,18 +1142,28 @@ rte_mbuf_sanity_check(const struct rte_mbuf *m, int is_header);
  *   - The pointer to the new mbuf on success.
  *   - NULL if allocation failed.
  */
-static inline struct rte_mbuf *__rte_mbuf_raw_alloc(struct rte_mempool *mp)
+static inline struct rte_mbuf *rte_mbuf_raw_alloc(struct rte_mempool *mp)
 {
 	struct rte_mbuf *m;
 	void *mb = NULL;
+
 	if (rte_mempool_get(mp, &mb) < 0)
 		return NULL;
 	m = (struct rte_mbuf *)mb;
-	RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(m) == 0);
+	RTE_ASSERT(rte_mbuf_refcnt_read(m) == 0);
 	rte_mbuf_refcnt_set(m, 1);
+	__rte_mbuf_sanity_check(m, 0);
+
 	return m;
 }
 
+/* compat with older versions */
+__rte_deprecated static inline struct rte_mbuf *
+__rte_mbuf_raw_alloc(struct rte_mempool *mp)
+{
+	return rte_mbuf_raw_alloc(mp);
+}
+
 /**
  * @internal Put mbuf back into its original mempool.
  * The use of that function is reserved for RTE internal needs.
@@ -1104,7 +1175,7 @@ static inline struct rte_mbuf *__rte_mbuf_raw_alloc(struct rte_mempool *mp)
 static inline void __attribute__((always_inline))
 __rte_mbuf_raw_free(struct rte_mbuf *m)
 {
-	RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(m) == 0);
+	RTE_ASSERT(rte_mbuf_refcnt_read(m) == 0);
 	rte_mempool_put(m->pool, m);
 }
 
@@ -1356,7 +1427,7 @@ static inline void rte_pktmbuf_reset(struct rte_mbuf *m)
 static inline struct rte_mbuf *rte_pktmbuf_alloc(struct rte_mempool *mp)
 {
 	struct rte_mbuf *m;
-	if ((m = __rte_mbuf_raw_alloc(mp)) != NULL)
+	if ((m = rte_mbuf_raw_alloc(mp)) != NULL)
 		rte_pktmbuf_reset(m);
 	return m;
 }
@@ -1392,22 +1463,22 @@ static inline int rte_pktmbuf_alloc_bulk(struct rte_mempool *pool,
 	switch (count % 4) {
 	case 0:
 		while (idx != count) {
-			RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
+			RTE_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
 			rte_mbuf_refcnt_set(mbufs[idx], 1);
 			rte_pktmbuf_reset(mbufs[idx]);
 			idx++;
 	case 3:
-			RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
+			RTE_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
 			rte_mbuf_refcnt_set(mbufs[idx], 1);
 			rte_pktmbuf_reset(mbufs[idx]);
 			idx++;
 	case 2:
-			RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
+			RTE_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
 			rte_mbuf_refcnt_set(mbufs[idx], 1);
 			rte_pktmbuf_reset(mbufs[idx]);
 			idx++;
 	case 1:
-			RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
+			RTE_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
 			rte_mbuf_refcnt_set(mbufs[idx], 1);
 			rte_pktmbuf_reset(mbufs[idx]);
 			idx++;
@@ -1421,6 +1492,8 @@ static inline int rte_pktmbuf_alloc_bulk(struct rte_mempool *pool,
  *
  * After attachment we refer the mbuf we attached as 'indirect',
  * while mbuf we attached to as 'direct'.
+ * The direct mbuf's reference counter is incremented.
+ *
  * Right now, not supported:
  *  - attachment for already indirect mbuf (e.g. - mi has to be direct).
  *  - mbuf we trying to attach (mi) is used by someone else
@@ -1435,7 +1508,7 @@ static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *m)
 {
 	struct rte_mbuf *md;
 
-	RTE_MBUF_ASSERT(RTE_MBUF_DIRECT(mi) &&
+	RTE_ASSERT(RTE_MBUF_DIRECT(mi) &&
 	    rte_mbuf_refcnt_read(mi) == 1);
 
 	/* if m is not direct, get the mbuf that embeds the data */
@@ -1474,13 +1547,17 @@ static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *m)
  *
  *  - restore original mbuf address and length values.
  *  - reset pktmbuf data and data_len to their default values.
- *  All other fields of the given packet mbuf will be left intact.
+ *  - decrement the direct mbuf's reference counter. When the
+ *  reference counter becomes 0, the direct mbuf is freed.
+ *
+ * All other fields of the given packet mbuf will be left intact.
  *
  * @param m
  *   The indirect attached packet mbuf.
  */
 static inline void rte_pktmbuf_detach(struct rte_mbuf *m)
 {
+	struct rte_mbuf *md = rte_mbuf_from_indirect(m);
 	struct rte_mempool *mp = m->pool;
 	uint32_t mbuf_size, buf_len, priv_size;
 
@@ -1495,6 +1572,9 @@ static inline void rte_pktmbuf_detach(struct rte_mbuf *m)
 	m->data_off = RTE_MIN(RTE_PKTMBUF_HEADROOM, (uint16_t)m->buf_len);
 	m->data_len = 0;
 	m->ol_flags = 0;
+
+	if (rte_mbuf_refcnt_update(md, -1) == 0)
+		__rte_mbuf_raw_free(md);
 }
 
 static inline struct rte_mbuf* __attribute__((always_inline))
@@ -1503,17 +1583,9 @@ __rte_pktmbuf_prefree_seg(struct rte_mbuf *m)
 	__rte_mbuf_sanity_check(m, 0);
 
 	if (likely(rte_mbuf_refcnt_update(m, -1) == 0)) {
-
-		/* if this is an indirect mbuf, then
-		 *  - detach mbuf
-		 *  - free attached mbuf segment
-		 */
-		if (RTE_MBUF_INDIRECT(m)) {
-			struct rte_mbuf *md = rte_mbuf_from_indirect(m);
+		/* if this is an indirect mbuf, it is detached. */
+		if (RTE_MBUF_INDIRECT(m))
 			rte_pktmbuf_detach(m);
-			if (rte_mbuf_refcnt_update(md, -1) == 0)
-				__rte_mbuf_raw_free(md);
-		}
 		return m;
 	}
 	return NULL;
diff --git a/lib/librte_mempool/Makefile b/lib/librte_mempool/Makefile
index a6898eff..057a6ab4 100644
--- a/lib/librte_mempool/Makefile
+++ b/lib/librte_mempool/Makefile
@@ -38,13 +38,13 @@ CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
 
 EXPORT_MAP := rte_mempool_version.map
 
-LIBABIVER := 1
+LIBABIVER := 2
 
 # all source are stored in SRCS-y
 SRCS-$(CONFIG_RTE_LIBRTE_MEMPOOL) +=  rte_mempool.c
-ifeq ($(CONFIG_RTE_LIBRTE_XEN_DOM0),y)
-SRCS-$(CONFIG_RTE_LIBRTE_MEMPOOL) +=  rte_dom0_mempool.c
-endif
+SRCS-$(CONFIG_RTE_LIBRTE_MEMPOOL) +=  rte_mempool_ops.c
+SRCS-$(CONFIG_RTE_LIBRTE_MEMPOOL) +=  rte_mempool_ring.c
+SRCS-$(CONFIG_RTE_LIBRTE_MEMPOOL) +=  rte_mempool_stack.c
 # install includes
 SYMLINK-$(CONFIG_RTE_LIBRTE_MEMPOOL)-include := rte_mempool.h
 
diff --git a/lib/librte_mempool/rte_dom0_mempool.c b/lib/librte_mempool/rte_dom0_mempool.c
deleted file mode 100644
index 0d6d7504..00000000
--- a/lib/librte_mempool/rte_dom0_mempool.c
+++ /dev/null
@@ -1,133 +0,0 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <stdio.h>
-#include <string.h>
-#include <stdint.h>
-#include <unistd.h>
-#include <stdarg.h>
-#include <inttypes.h>
-#include <errno.h>
-#include <sys/queue.h>
-
-#include <rte_common.h>
-#include <rte_log.h>
-#include <rte_debug.h>
-#include <rte_memory.h>
-#include <rte_memzone.h>
-#include <rte_atomic.h>
-#include <rte_launch.h>
-#include <rte_eal.h>
-#include <rte_eal_memconfig.h>
-#include <rte_per_lcore.h>
-#include <rte_lcore.h>
-#include <rte_branch_prediction.h>
-#include <rte_ring.h>
-#include <rte_errno.h>
-#include <rte_string_fns.h>
-#include <rte_spinlock.h>
-
-#include "rte_mempool.h"
-
-static void
-get_phys_map(void *va, phys_addr_t pa[], uint32_t pg_num,
-	uint32_t pg_sz, uint32_t memseg_id)
-{
-	uint32_t i;
-	uint64_t virt_addr, mfn_id;
-	struct rte_mem_config *mcfg;
-	uint32_t page_size = getpagesize();
-
-	/* get pointer to global configuration */
-	mcfg = rte_eal_get_configuration()->mem_config;
-	virt_addr = (uintptr_t) mcfg->memseg[memseg_id].addr;
-
-	for (i = 0; i != pg_num; i++) {
-		mfn_id = ((uintptr_t)va + i * pg_sz - virt_addr) / RTE_PGSIZE_2M;
-		pa[i] = mcfg->memseg[memseg_id].mfn[mfn_id] * page_size;
-	}
-}
-
-/* create the mempool for supporting Dom0 */
-struct rte_mempool *
-rte_dom0_mempool_create(const char *name, unsigned elt_num, unsigned elt_size,
-	unsigned cache_size, unsigned private_data_size,
-	rte_mempool_ctor_t *mp_init, void *mp_init_arg,
-	rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg,
-	int socket_id, unsigned flags)
-{
-	struct rte_mempool *mp = NULL;
-	phys_addr_t *pa;
-	char *va;
-	size_t sz;
-	uint32_t pg_num, pg_shift, pg_sz, total_size;
-	const struct rte_memzone *mz;
-	char mz_name[RTE_MEMZONE_NAMESIZE];
-	int mz_flags = RTE_MEMZONE_1GB|RTE_MEMZONE_SIZE_HINT_ONLY;
-
-	pg_sz = RTE_PGSIZE_2M;
-
-	pg_shift = rte_bsf32(pg_sz);
-	total_size = rte_mempool_calc_obj_size(elt_size, flags, NULL);
-
-	/* calc max memory size and max number of pages needed. */
-	sz = rte_mempool_xmem_size(elt_num, total_size, pg_shift) +
-		RTE_PGSIZE_2M;
-	pg_num = sz >> pg_shift;
-
-	/* extract physical mappings of the allocated memory. */
-	pa = calloc(pg_num, sizeof (*pa));
-	if (pa == NULL)
-		return mp;
-
-	snprintf(mz_name, sizeof(mz_name), RTE_MEMPOOL_OBJ_NAME, name);
-	mz = rte_memzone_reserve(mz_name, sz, socket_id, mz_flags);
-	if (mz == NULL) {
-		free(pa);
-		return mp;
-	}
-
-	va = (char *)RTE_ALIGN_CEIL((uintptr_t)mz->addr, RTE_PGSIZE_2M);
-	/* extract physical mappings of the allocated memory. */
-	get_phys_map(va, pa, pg_num, pg_sz, mz->memseg_id);
-
-	mp = rte_mempool_xmem_create(name, elt_num, elt_size,
-		cache_size, private_data_size,
-		mp_init, mp_init_arg,
-		obj_init, obj_init_arg,
-		socket_id, flags, va, pa, pg_num, pg_shift);
-
-	free(pa);
-
-	return mp;
-}
diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
index f8781e17..d78d02b7 100644
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -2,6 +2,7 @@
  *   BSD LICENSE
  *
  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2016 6WIND S.A.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -39,6 +40,7 @@
 #include <inttypes.h>
 #include <errno.h>
 #include <sys/queue.h>
+#include <sys/mman.h>
 
 #include <rte_common.h>
 #include <rte_log.h>
@@ -127,127 +129,63 @@ static unsigned optimize_object_size(unsigned obj_size)
 }
 
 static void
-mempool_add_elem(struct rte_mempool *mp, void *obj, uint32_t obj_idx,
-	rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg)
+mempool_add_elem(struct rte_mempool *mp, void *obj, phys_addr_t physaddr)
 {
 	struct rte_mempool_objhdr *hdr;
 	struct rte_mempool_objtlr *tlr __rte_unused;
 
-	obj = (char *)obj + mp->header_size;
-
 	/* set mempool ptr in header */
 	hdr = RTE_PTR_SUB(obj, sizeof(*hdr));
 	hdr->mp = mp;
+	hdr->physaddr = physaddr;
+	STAILQ_INSERT_TAIL(&mp->elt_list, hdr, next);
+	mp->populated_size++;
 
 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
 	hdr->cookie = RTE_MEMPOOL_HEADER_COOKIE2;
 	tlr = __mempool_get_trailer(obj);
 	tlr->cookie = RTE_MEMPOOL_TRAILER_COOKIE;
 #endif
-	/* call the initializer */
-	if (obj_init)
-		obj_init(mp, obj_init_arg, obj, obj_idx);
 
 	/* enqueue in ring */
-	rte_ring_sp_enqueue(mp->ring, obj);
+	rte_mempool_ops_enqueue_bulk(mp, &obj, 1);
 }
 
+/* call obj_cb() for each mempool element */
 uint32_t
-rte_mempool_obj_iter(void *vaddr, uint32_t elt_num, size_t elt_sz, size_t align,
-	const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift,
-	rte_mempool_obj_iter_t obj_iter, void *obj_iter_arg)
+rte_mempool_obj_iter(struct rte_mempool *mp,
+	rte_mempool_obj_cb_t *obj_cb, void *obj_cb_arg)
 {
-	uint32_t i, j, k;
-	uint32_t pgn, pgf;
-	uintptr_t end, start, va;
-	uintptr_t pg_sz;
-
-	pg_sz = (uintptr_t)1 << pg_shift;
-	va = (uintptr_t)vaddr;
-
-	i = 0;
-	j = 0;
-
-	while (i != elt_num && j != pg_num) {
-
-		start = RTE_ALIGN_CEIL(va, align);
-		end = start + elt_sz;
-
-		/* index of the first page for the next element. */
-		pgf = (end >> pg_shift) - (start >> pg_shift);
-
-		/* index of the last page for the current element. */
-		pgn = ((end - 1) >> pg_shift) - (start >> pg_shift);
-		pgn += j;
-
-		/* do we have enough space left for the element. */
-		if (pgn >= pg_num)
-			break;
-
-		for (k = j;
-				k != pgn &&
-				paddr[k] + pg_sz == paddr[k + 1];
-				k++)
-			;
+	struct rte_mempool_objhdr *hdr;
+	void *obj;
+	unsigned n = 0;
 
-		/*
-		 * if next pgn chunks of memory physically continuous,
-		 * use it to create next element.
-		 * otherwise, just skip that chunk unused.
-		 */
-		if (k == pgn) {
-			if (obj_iter != NULL)
-				obj_iter(obj_iter_arg, (void *)start,
-					(void *)end, i);
-			va = end;
-			j += pgf;
-			i++;
-		} else {
-			va = RTE_ALIGN_CEIL((va + 1), pg_sz);
-			j++;
-		}
+	STAILQ_FOREACH(hdr, &mp->elt_list, next) {
+		obj = (char *)hdr + sizeof(*hdr);
+		obj_cb(mp, obj_cb_arg, obj, n);
+		n++;
 	}
 
-	return i;
-}
-
-/*
- * Populate  mempool with the objects.
- */
-
-struct mempool_populate_arg {
-	struct rte_mempool     *mp;
-	rte_mempool_obj_ctor_t *obj_init;
-	void                   *obj_init_arg;
-};
-
-static void
-mempool_obj_populate(void *arg, void *start, void *end, uint32_t idx)
-{
-	struct mempool_populate_arg *pa = arg;
-
-	mempool_add_elem(pa->mp, start, idx, pa->obj_init, pa->obj_init_arg);
-	pa->mp->elt_va_end = (uintptr_t)end;
+	return n;
 }
 
-static void
-mempool_populate(struct rte_mempool *mp, size_t num, size_t align,
-	rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg)
+/* call mem_cb() for each mempool memory chunk */
+uint32_t
+rte_mempool_mem_iter(struct rte_mempool *mp,
+	rte_mempool_mem_cb_t *mem_cb, void *mem_cb_arg)
 {
-	uint32_t elt_sz;
-	struct mempool_populate_arg arg;
+	struct rte_mempool_memhdr *hdr;
+	unsigned n = 0;
 
-	elt_sz = mp->elt_size + mp->header_size + mp->trailer_size;
-	arg.mp = mp;
-	arg.obj_init = obj_init;
-	arg.obj_init_arg = obj_init_arg;
+	STAILQ_FOREACH(hdr, &mp->mem_list, next) {
+		mem_cb(mp, mem_cb_arg, hdr, n);
+		n++;
+	}
 
-	mp->size = rte_mempool_obj_iter((void *)mp->elt_va_start,
-		num, elt_sz, align,
-		mp->elt_pa, mp->pg_num, mp->pg_shift,
-		mempool_obj_populate, &arg);
+	return n;
 }
 
+/* get the header, trailer and total size of a mempool element. */
 uint32_t
 rte_mempool_calc_obj_size(uint32_t elt_size, uint32_t flags,
 	struct rte_mempool_objsz *sz)
@@ -256,24 +194,13 @@ rte_mempool_calc_obj_size(uint32_t elt_size, uint32_t flags,
 
 	sz = (sz != NULL) ? sz : &lsz;
 
-	/*
-	 * In header, we have at least the pointer to the pool, and
-	 * optionaly a 64 bits cookie.
-	 */
-	sz->header_size = 0;
-	sz->header_size += sizeof(struct rte_mempool *); /* ptr to pool */
-#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
-	sz->header_size += sizeof(uint64_t); /* cookie */
-#endif
+	sz->header_size = sizeof(struct rte_mempool_objhdr);
 	if ((flags & MEMPOOL_F_NO_CACHE_ALIGN) == 0)
 		sz->header_size = RTE_ALIGN_CEIL(sz->header_size,
 			RTE_MEMPOOL_ALIGN);
 
-	/* trailer contains the cookie in debug mode */
-	sz->trailer_size = 0;
-#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
-	sz->trailer_size += sizeof(uint64_t); /* cookie */
-#endif
+	sz->trailer_size = sizeof(struct rte_mempool_objtlr);
+
 	/* element size is 8 bytes-aligned at least */
 	sz->elt_size = RTE_ALIGN_CEIL(elt_size, sizeof(uint64_t));
 
@@ -297,23 +224,6 @@ rte_mempool_calc_obj_size(uint32_t elt_size, uint32_t flags,
 		sz->trailer_size = new_size - sz->header_size - sz->elt_size;
 	}
 
-	if (! rte_eal_has_hugepages()) {
-		/*
-		 * compute trailer size so that pool elements fit exactly in
-		 * a standard page
-		 */
-		int page_size = getpagesize();
-		int new_size = page_size - sz->header_size - sz->elt_size;
-		if (new_size < 0 || (unsigned int)new_size < sz->trailer_size) {
-			printf("When hugepages are disabled, pool objects "
-			       "can't exceed PAGE_SIZE: %d + %d + %d > %d\n",
-			       sz->header_size, sz->elt_size, sz->trailer_size,
-			       page_size);
-			return 0;
-		}
-		sz->trailer_size = new_size;
-	}
-
 	/* this is the size of an object, including header and trailer */
 	sz->total_size = sz->header_size + sz->elt_size + sz->trailer_size;
 
@@ -325,139 +235,514 @@ rte_mempool_calc_obj_size(uint32_t elt_size, uint32_t flags,
  * Calculate maximum amount of memory required to store given number of objects.
  */
 size_t
-rte_mempool_xmem_size(uint32_t elt_num, size_t elt_sz, uint32_t pg_shift)
+rte_mempool_xmem_size(uint32_t elt_num, size_t total_elt_sz, uint32_t pg_shift)
 {
-	size_t n, pg_num, pg_sz, sz;
+	size_t obj_per_page, pg_num, pg_sz;
 
-	pg_sz = (size_t)1 << pg_shift;
+	if (total_elt_sz == 0)
+		return 0;
 
-	if ((n = pg_sz / elt_sz) > 0) {
-		pg_num = (elt_num + n - 1) / n;
-		sz = pg_num << pg_shift;
-	} else {
-		sz = RTE_ALIGN_CEIL(elt_sz, pg_sz) * elt_num;
-	}
+	if (pg_shift == 0)
+		return total_elt_sz * elt_num;
 
-	return sz;
+	pg_sz = (size_t)1 << pg_shift;
+	obj_per_page = pg_sz / total_elt_sz;
+	if (obj_per_page == 0)
+		return RTE_ALIGN_CEIL(total_elt_sz, pg_sz) * elt_num;
+
+	pg_num = (elt_num + obj_per_page - 1) / obj_per_page;
+	return pg_num << pg_shift;
 }
 
 /*
  * Calculate how much memory would be actually required with the
  * given memory footprint to store required number of elements.
  */
+ssize_t
+rte_mempool_xmem_usage(__rte_unused void *vaddr, uint32_t elt_num,
+	size_t total_elt_sz, const phys_addr_t paddr[], uint32_t pg_num,
+	uint32_t pg_shift)
+{
+	uint32_t elt_cnt = 0;
+	phys_addr_t start, end;
+	uint32_t paddr_idx;
+	size_t pg_sz = (size_t)1 << pg_shift;
+
+	/* if paddr is NULL, assume contiguous memory */
+	if (paddr == NULL) {
+		start = 0;
+		end = pg_sz * pg_num;
+		paddr_idx = pg_num;
+	} else {
+		start = paddr[0];
+		end = paddr[0] + pg_sz;
+		paddr_idx = 1;
+	}
+	while (elt_cnt < elt_num) {
+
+		if (end - start >= total_elt_sz) {
+			/* enough contiguous memory, add an object */
+			start += total_elt_sz;
+			elt_cnt++;
+		} else if (paddr_idx < pg_num) {
+			/* no room to store one obj, add a page */
+			if (end == paddr[paddr_idx]) {
+				end += pg_sz;
+			} else {
+				start = paddr[paddr_idx];
+				end = paddr[paddr_idx] + pg_sz;
+			}
+			paddr_idx++;
+
+		} else {
+			/* no more page, return how many elements fit */
+			return -(size_t)elt_cnt;
+		}
+	}
+
+	return (size_t)paddr_idx << pg_shift;
+}
+
+/* free a memchunk allocated with rte_memzone_reserve() */
 static void
-mempool_lelem_iter(void *arg, __rte_unused void *start, void *end,
-	__rte_unused uint32_t idx)
+rte_mempool_memchunk_mz_free(__rte_unused struct rte_mempool_memhdr *memhdr,
+	void *opaque)
 {
-	*(uintptr_t *)arg = (uintptr_t)end;
+	const struct rte_memzone *mz = opaque;
+	rte_memzone_free(mz);
 }
 
-ssize_t
-rte_mempool_xmem_usage(void *vaddr, uint32_t elt_num, size_t elt_sz,
-	const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift)
+/* Free memory chunks used by a mempool. Objects must be in pool */
+static void
+rte_mempool_free_memchunks(struct rte_mempool *mp)
 {
-	uint32_t n;
-	uintptr_t va, uv;
-	size_t pg_sz, usz;
+	struct rte_mempool_memhdr *memhdr;
+	void *elt;
+
+	while (!STAILQ_EMPTY(&mp->elt_list)) {
+		rte_mempool_ops_dequeue_bulk(mp, &elt, 1);
+		(void)elt;
+		STAILQ_REMOVE_HEAD(&mp->elt_list, next);
+		mp->populated_size--;
+	}
 
-	pg_sz = (size_t)1 << pg_shift;
-	va = (uintptr_t)vaddr;
-	uv = va;
+	while (!STAILQ_EMPTY(&mp->mem_list)) {
+		memhdr = STAILQ_FIRST(&mp->mem_list);
+		STAILQ_REMOVE_HEAD(&mp->mem_list, next);
+		if (memhdr->free_cb != NULL)
+			memhdr->free_cb(memhdr, memhdr->opaque);
+		rte_free(memhdr);
+		mp->nb_mem_chunks--;
+	}
+}
+
+/* Add objects in the pool, using a physically contiguous memory
+ * zone. Return the number of objects added, or a negative value
+ * on error.
+ */
+int
+rte_mempool_populate_phys(struct rte_mempool *mp, char *vaddr,
+	phys_addr_t paddr, size_t len, rte_mempool_memchunk_free_cb_t *free_cb,
+	void *opaque)
+{
+	unsigned total_elt_sz;
+	unsigned i = 0;
+	size_t off;
+	struct rte_mempool_memhdr *memhdr;
+	int ret;
+
+	/* create the internal ring if not already done */
+	if ((mp->flags & MEMPOOL_F_POOL_CREATED) == 0) {
+		ret = rte_mempool_ops_alloc(mp);
+		if (ret != 0)
+			return ret;
+		mp->flags |= MEMPOOL_F_POOL_CREATED;
+	}
+
+	/* mempool is already populated */
+	if (mp->populated_size >= mp->size)
+		return -ENOSPC;
+
+	total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
 
-	if ((n = rte_mempool_obj_iter(vaddr, elt_num, elt_sz, 1,
-			paddr, pg_num, pg_shift, mempool_lelem_iter,
-			&uv)) != elt_num) {
-		return -(ssize_t)n;
+	memhdr = rte_zmalloc("MEMPOOL_MEMHDR", sizeof(*memhdr), 0);
+	if (memhdr == NULL)
+		return -ENOMEM;
+
+	memhdr->mp = mp;
+	memhdr->addr = vaddr;
+	memhdr->phys_addr = paddr;
+	memhdr->len = len;
+	memhdr->free_cb = free_cb;
+	memhdr->opaque = opaque;
+
+	if (mp->flags & MEMPOOL_F_NO_CACHE_ALIGN)
+		off = RTE_PTR_ALIGN_CEIL(vaddr, 8) - vaddr;
+	else
+		off = RTE_PTR_ALIGN_CEIL(vaddr, RTE_CACHE_LINE_SIZE) - vaddr;
+
+	while (off + total_elt_sz <= len && mp->populated_size < mp->size) {
+		off += mp->header_size;
+		if (paddr == RTE_BAD_PHYS_ADDR)
+			mempool_add_elem(mp, (char *)vaddr + off,
+				RTE_BAD_PHYS_ADDR);
+		else
+			mempool_add_elem(mp, (char *)vaddr + off, paddr + off);
+		off += mp->elt_size + mp->trailer_size;
+		i++;
 	}
 
-	uv = RTE_ALIGN_CEIL(uv, pg_sz);
-	usz = uv - va;
-	return usz;
+	/* not enough room to store one object */
+	if (i == 0)
+		return -EINVAL;
+
+	STAILQ_INSERT_TAIL(&mp->mem_list, memhdr, next);
+	mp->nb_mem_chunks++;
+	return i;
 }
 
-#ifndef RTE_LIBRTE_XEN_DOM0
-/* stub if DOM0 support not configured */
-struct rte_mempool *
-rte_dom0_mempool_create(const char *name __rte_unused,
-			unsigned n __rte_unused,
-			unsigned elt_size __rte_unused,
-			unsigned cache_size __rte_unused,
-			unsigned private_data_size __rte_unused,
-			rte_mempool_ctor_t *mp_init __rte_unused,
-			void *mp_init_arg __rte_unused,
-			rte_mempool_obj_ctor_t *obj_init __rte_unused,
-			void *obj_init_arg __rte_unused,
-			int socket_id __rte_unused,
-			unsigned flags __rte_unused)
-{
-	rte_errno = EINVAL;
-	return NULL;
+/* Add objects in the pool, using a table of physical pages. Return the
+ * number of objects added, or a negative value on error.
+ */
+int
+rte_mempool_populate_phys_tab(struct rte_mempool *mp, char *vaddr,
+	const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift,
+	rte_mempool_memchunk_free_cb_t *free_cb, void *opaque)
+{
+	uint32_t i, n;
+	int ret, cnt = 0;
+	size_t pg_sz = (size_t)1 << pg_shift;
+
+	/* mempool must not be populated */
+	if (mp->nb_mem_chunks != 0)
+		return -EEXIST;
+
+	if (mp->flags & MEMPOOL_F_NO_PHYS_CONTIG)
+		return rte_mempool_populate_phys(mp, vaddr, RTE_BAD_PHYS_ADDR,
+			pg_num * pg_sz, free_cb, opaque);
+
+	for (i = 0; i < pg_num && mp->populated_size < mp->size; i += n) {
+
+		/* populate with the largest group of contiguous pages */
+		for (n = 1; (i + n) < pg_num &&
+			     paddr[i] + pg_sz == paddr[i+n]; n++)
+			;
+
+		ret = rte_mempool_populate_phys(mp, vaddr + i * pg_sz,
+			paddr[i], n * pg_sz, free_cb, opaque);
+		if (ret < 0) {
+			rte_mempool_free_memchunks(mp);
+			return ret;
+		}
+		/* no need to call the free callback for next chunks */
+		free_cb = NULL;
+		cnt += ret;
+	}
+	return cnt;
 }
-#endif
 
-/* create the mempool */
-struct rte_mempool *
-rte_mempool_create(const char *name, unsigned n, unsigned elt_size,
-		   unsigned cache_size, unsigned private_data_size,
-		   rte_mempool_ctor_t *mp_init, void *mp_init_arg,
-		   rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg,
-		   int socket_id, unsigned flags)
-{
-	if (rte_xen_dom0_supported())
-		return rte_dom0_mempool_create(name, n, elt_size,
-					       cache_size, private_data_size,
-					       mp_init, mp_init_arg,
-					       obj_init, obj_init_arg,
-					       socket_id, flags);
-	else
-		return rte_mempool_xmem_create(name, n, elt_size,
-					       cache_size, private_data_size,
-					       mp_init, mp_init_arg,
-					       obj_init, obj_init_arg,
-					       socket_id, flags,
-					       NULL, NULL, MEMPOOL_PG_NUM_DEFAULT,
-					       MEMPOOL_PG_SHIFT_MAX);
+/* Populate the mempool with a virtual area. Return the number of
+ * objects added, or a negative value on error.
+ */
+int
+rte_mempool_populate_virt(struct rte_mempool *mp, char *addr,
+	size_t len, size_t pg_sz, rte_mempool_memchunk_free_cb_t *free_cb,
+	void *opaque)
+{
+	phys_addr_t paddr;
+	size_t off, phys_len;
+	int ret, cnt = 0;
+
+	/* mempool must not be populated */
+	if (mp->nb_mem_chunks != 0)
+		return -EEXIST;
+	/* address and len must be page-aligned */
+	if (RTE_PTR_ALIGN_CEIL(addr, pg_sz) != addr)
+		return -EINVAL;
+	if (RTE_ALIGN_CEIL(len, pg_sz) != len)
+		return -EINVAL;
+
+	if (mp->flags & MEMPOOL_F_NO_PHYS_CONTIG)
+		return rte_mempool_populate_phys(mp, addr, RTE_BAD_PHYS_ADDR,
+			len, free_cb, opaque);
+
+	for (off = 0; off + pg_sz <= len &&
+		     mp->populated_size < mp->size; off += phys_len) {
+
+		paddr = rte_mem_virt2phy(addr + off);
+		/* required for xen_dom0 to get the machine address */
+		paddr = rte_mem_phy2mch(-1, paddr);
+
+		if (paddr == RTE_BAD_PHYS_ADDR) {
+			ret = -EINVAL;
+			goto fail;
+		}
+
+		/* populate with the largest group of contiguous pages */
+		for (phys_len = pg_sz; off + phys_len < len; phys_len += pg_sz) {
+			phys_addr_t paddr_tmp;
+
+			paddr_tmp = rte_mem_virt2phy(addr + off + phys_len);
+			paddr_tmp = rte_mem_phy2mch(-1, paddr_tmp);
+
+			if (paddr_tmp != paddr + phys_len)
+				break;
+		}
+
+		ret = rte_mempool_populate_phys(mp, addr + off, paddr,
+			phys_len, free_cb, opaque);
+		if (ret < 0)
+			goto fail;
+		/* no need to call the free callback for next chunks */
+		free_cb = NULL;
+		cnt += ret;
+	}
+
+	return cnt;
+
+ fail:
+	rte_mempool_free_memchunks(mp);
+	return ret;
+}
+
+/* Default function to populate the mempool: allocate memory in memzones,
+ * and populate them. Return the number of objects added, or a negative
+ * value on error.
+ */
+int
+rte_mempool_populate_default(struct rte_mempool *mp)
+{
+	int mz_flags = RTE_MEMZONE_1GB|RTE_MEMZONE_SIZE_HINT_ONLY;
+	char mz_name[RTE_MEMZONE_NAMESIZE];
+	const struct rte_memzone *mz;
+	size_t size, total_elt_sz, align, pg_sz, pg_shift;
+	phys_addr_t paddr;
+	unsigned mz_id, n;
+	int ret;
+
+	/* mempool must not be populated */
+	if (mp->nb_mem_chunks != 0)
+		return -EEXIST;
+
+	if (rte_eal_has_hugepages()) {
+		pg_shift = 0; /* not needed, zone is physically contiguous */
+		pg_sz = 0;
+		align = RTE_CACHE_LINE_SIZE;
+	} else {
+		pg_sz = getpagesize();
+		pg_shift = rte_bsf32(pg_sz);
+		align = pg_sz;
+	}
+
+	total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
+	for (mz_id = 0, n = mp->size; n > 0; mz_id++, n -= ret) {
+		size = rte_mempool_xmem_size(n, total_elt_sz, pg_shift);
+
+		ret = snprintf(mz_name, sizeof(mz_name),
+			RTE_MEMPOOL_MZ_FORMAT "_%d", mp->name, mz_id);
+		if (ret < 0 || ret >= (int)sizeof(mz_name)) {
+			ret = -ENAMETOOLONG;
+			goto fail;
+		}
+
+		mz = rte_memzone_reserve_aligned(mz_name, size,
+			mp->socket_id, mz_flags, align);
+		/* not enough memory, retry with the biggest zone we have */
+		if (mz == NULL)
+			mz = rte_memzone_reserve_aligned(mz_name, 0,
+				mp->socket_id, mz_flags, align);
+		if (mz == NULL) {
+			ret = -rte_errno;
+			goto fail;
+		}
+
+		if (mp->flags & MEMPOOL_F_NO_PHYS_CONTIG)
+			paddr = RTE_BAD_PHYS_ADDR;
+		else
+			paddr = mz->phys_addr;
+
+		if (rte_eal_has_hugepages() && !rte_xen_dom0_supported())
+			ret = rte_mempool_populate_phys(mp, mz->addr,
+				paddr, mz->len,
+				rte_mempool_memchunk_mz_free,
+				(void *)(uintptr_t)mz);
+		else
+			ret = rte_mempool_populate_virt(mp, mz->addr,
+				mz->len, pg_sz,
+				rte_mempool_memchunk_mz_free,
+				(void *)(uintptr_t)mz);
+		if (ret < 0)
+			goto fail;
+	}
+
+	return mp->size;
+
+ fail:
+	rte_mempool_free_memchunks(mp);
+	return ret;
+}
+
+/* return the memory size required for mempool objects in anonymous mem */
+static size_t
+get_anon_size(const struct rte_mempool *mp)
+{
+	size_t size, total_elt_sz, pg_sz, pg_shift;
+
+	pg_sz = getpagesize();
+	pg_shift = rte_bsf32(pg_sz);
+	total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
+	size = rte_mempool_xmem_size(mp->size, total_elt_sz, pg_shift);
+
+	return size;
+}
+
+/* unmap a memory zone mapped by rte_mempool_populate_anon() */
+static void
+rte_mempool_memchunk_anon_free(struct rte_mempool_memhdr *memhdr,
+	void *opaque)
+{
+	munmap(opaque, get_anon_size(memhdr->mp));
+}
+
+/* populate the mempool with an anonymous mapping */
+int
+rte_mempool_populate_anon(struct rte_mempool *mp)
+{
+	size_t size;
+	int ret;
+	char *addr;
+
+	/* mempool is already populated, error */
+	if (!STAILQ_EMPTY(&mp->mem_list)) {
+		rte_errno = EINVAL;
+		return 0;
+	}
+
+	/* get chunk of virtually continuous memory */
+	size = get_anon_size(mp);
+	addr = mmap(NULL, size, PROT_READ | PROT_WRITE,
+		MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+	if (addr == MAP_FAILED) {
+		rte_errno = errno;
+		return 0;
+	}
+	/* can't use MMAP_LOCKED, it does not exist on BSD */
+	if (mlock(addr, size) < 0) {
+		rte_errno = errno;
+		munmap(addr, size);
+		return 0;
+	}
+
+	ret = rte_mempool_populate_virt(mp, addr, size, getpagesize(),
+		rte_mempool_memchunk_anon_free, addr);
+	if (ret == 0)
+		goto fail;
+
+	return mp->populated_size;
+
+ fail:
+	rte_mempool_free_memchunks(mp);
+	return 0;
+}
+
+/* free a mempool */
+void
+rte_mempool_free(struct rte_mempool *mp)
+{
+	struct rte_mempool_list *mempool_list = NULL;
+	struct rte_tailq_entry *te;
+
+	if (mp == NULL)
+		return;
+
+	mempool_list = RTE_TAILQ_CAST(rte_mempool_tailq.head, rte_mempool_list);
+	rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
+	/* find out tailq entry */
+	TAILQ_FOREACH(te, mempool_list, next) {
+		if (te->data == (void *)mp)
+			break;
+	}
+
+	if (te != NULL) {
+		TAILQ_REMOVE(mempool_list, te, next);
+		rte_free(te);
+	}
+	rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+
+	rte_mempool_free_memchunks(mp);
+	rte_mempool_ops_free(mp);
+	rte_memzone_free(mp->mz);
+}
+
+static void
+mempool_cache_init(struct rte_mempool_cache *cache, uint32_t size)
+{
+	cache->size = size;
+	cache->flushthresh = CALC_CACHE_FLUSHTHRESH(size);
+	cache->len = 0;
 }
 
 /*
- * Create the mempool over already allocated chunk of memory.
- * That external memory buffer can consists of physically disjoint pages.
- * Setting vaddr to NULL, makes mempool to fallback to original behaviour
- * and allocate space for mempool and it's elements as one big chunk of
- * physically continuos memory.
- * */
+ * Create and initialize a cache for objects that are retrieved from and
+ * returned to an underlying mempool. This structure is identical to the
+ * local_cache[lcore_id] pointed to by the mempool structure.
+ */
+struct rte_mempool_cache *
+rte_mempool_cache_create(uint32_t size, int socket_id)
+{
+	struct rte_mempool_cache *cache;
+
+	if (size == 0 || size > RTE_MEMPOOL_CACHE_MAX_SIZE) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
+	cache = rte_zmalloc_socket("MEMPOOL_CACHE", sizeof(*cache),
+				  RTE_CACHE_LINE_SIZE, socket_id);
+	if (cache == NULL) {
+		RTE_LOG(ERR, MEMPOOL, "Cannot allocate mempool cache.\n");
+		rte_errno = ENOMEM;
+		return NULL;
+	}
+
+	mempool_cache_init(cache, size);
+
+	return cache;
+}
+
+/*
+ * Free a cache. It's the responsibility of the user to make sure that any
+ * remaining objects in the cache are flushed to the corresponding
+ * mempool.
+ */
+void
+rte_mempool_cache_free(struct rte_mempool_cache *cache)
+{
+	rte_free(cache);
+}
+
+/* create an empty mempool */
 struct rte_mempool *
-rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
-		unsigned cache_size, unsigned private_data_size,
-		rte_mempool_ctor_t *mp_init, void *mp_init_arg,
-		rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg,
-		int socket_id, unsigned flags, void *vaddr,
-		const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift)
+rte_mempool_create_empty(const char *name, unsigned n, unsigned elt_size,
+	unsigned cache_size, unsigned private_data_size,
+	int socket_id, unsigned flags)
 {
 	char mz_name[RTE_MEMZONE_NAMESIZE];
-	char rg_name[RTE_RING_NAMESIZE];
 	struct rte_mempool_list *mempool_list;
 	struct rte_mempool *mp = NULL;
 	struct rte_tailq_entry *te = NULL;
-	struct rte_ring *r = NULL;
-	const struct rte_memzone *mz;
+	const struct rte_memzone *mz = NULL;
 	size_t mempool_size;
 	int mz_flags = RTE_MEMZONE_1GB|RTE_MEMZONE_SIZE_HINT_ONLY;
-	int rg_flags = 0;
-	void *obj;
 	struct rte_mempool_objsz objsz;
-	void *startaddr;
-	int page_size = getpagesize();
+	unsigned lcore_id;
+	int ret;
 
 	/* compilation-time checks */
 	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool) &
 			  RTE_CACHE_LINE_MASK) != 0);
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_cache) &
 			  RTE_CACHE_LINE_MASK) != 0);
-	RTE_BUILD_BUG_ON((offsetof(struct rte_mempool, local_cache) &
-			  RTE_CACHE_LINE_MASK) != 0);
-#endif
 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
 	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_debug_stats) &
 			  RTE_CACHE_LINE_MASK) != 0);
@@ -474,28 +759,10 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 		return NULL;
 	}
 
-	/* check that we have both VA and PA */
-	if (vaddr != NULL && paddr == NULL) {
-		rte_errno = EINVAL;
-		return NULL;
-	}
-
-	/* Check that pg_num and pg_shift parameters are valid. */
-	if (pg_num < RTE_DIM(mp->elt_pa) || pg_shift > MEMPOOL_PG_SHIFT_MAX) {
-		rte_errno = EINVAL;
-		return NULL;
-	}
-
 	/* "no cache align" imply "no spread" */
 	if (flags & MEMPOOL_F_NO_CACHE_ALIGN)
 		flags |= MEMPOOL_F_NO_SPREAD;
 
-	/* ring flags */
-	if (flags & MEMPOOL_F_SP_PUT)
-		rg_flags |= RING_F_SP_ENQ;
-	if (flags & MEMPOOL_F_SC_GET)
-		rg_flags |= RING_F_SC_DEQ;
-
 	/* calculate mempool object sizes. */
 	if (!rte_mempool_calc_obj_size(elt_size, flags, &objsz)) {
 		rte_errno = EINVAL;
@@ -504,15 +771,6 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 
 	rte_rwlock_write_lock(RTE_EAL_MEMPOOL_RWLOCK);
 
-	/* allocate the ring that will be used to store objects */
-	/* Ring functions will return appropriate errors if we are
-	 * running as a secondary process etc., so no checks made
-	 * in this function for that condition */
-	snprintf(rg_name, sizeof(rg_name), RTE_MEMPOOL_MZ_FORMAT, name);
-	r = rte_ring_create(rg_name, rte_align32pow2(n+1), socket_id, rg_flags);
-	if (r == NULL)
-		goto exit_unlock;
-
 	/*
 	 * reserve a memory zone for this mempool: private data is
 	 * cache-aligned
@@ -520,17 +778,6 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 	private_data_size = (private_data_size +
 			     RTE_MEMPOOL_ALIGN_MASK) & (~RTE_MEMPOOL_ALIGN_MASK);
 
-	if (! rte_eal_has_hugepages()) {
-		/*
-		 * expand private data size to a whole page, so that the
-		 * first pool element will start on a new standard page
-		 */
-		int head = sizeof(struct rte_mempool);
-		int new_size = (private_data_size + head) % page_size;
-		if (new_size) {
-			private_data_size += page_size - new_size;
-		}
-	}
 
 	/* try to allocate tailq entry */
 	te = rte_zmalloc("MEMPOOL_TAILQ_ENTRY", sizeof(*te), 0);
@@ -539,89 +786,57 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 		goto exit_unlock;
 	}
 
-	/*
-	 * If user provided an external memory buffer, then use it to
-	 * store mempool objects. Otherwise reserve a memzone that is large
-	 * enough to hold mempool header and metadata plus mempool objects.
-	 */
-	mempool_size = MEMPOOL_HEADER_SIZE(mp, pg_num) + private_data_size;
+	mempool_size = MEMPOOL_HEADER_SIZE(mp, cache_size);
+	mempool_size += private_data_size;
 	mempool_size = RTE_ALIGN_CEIL(mempool_size, RTE_MEMPOOL_ALIGN);
-	if (vaddr == NULL)
-		mempool_size += (size_t)objsz.total_size * n;
 
-	if (! rte_eal_has_hugepages()) {
-		/*
-		 * we want the memory pool to start on a page boundary,
-		 * because pool elements crossing page boundaries would
-		 * result in discontiguous physical addresses
-		 */
-		mempool_size += page_size;
+	ret = snprintf(mz_name, sizeof(mz_name), RTE_MEMPOOL_MZ_FORMAT, name);
+	if (ret < 0 || ret >= (int)sizeof(mz_name)) {
+		rte_errno = ENAMETOOLONG;
+		goto exit_unlock;
 	}
 
-	snprintf(mz_name, sizeof(mz_name), RTE_MEMPOOL_MZ_FORMAT, name);
-
 	mz = rte_memzone_reserve(mz_name, mempool_size, socket_id, mz_flags);
 	if (mz == NULL)
 		goto exit_unlock;
 
-	if (rte_eal_has_hugepages()) {
-		startaddr = (void*)mz->addr;
-	} else {
-		/* align memory pool start address on a page boundary */
-		unsigned long addr = (unsigned long)mz->addr;
-		if (addr & (page_size - 1)) {
-			addr += page_size;
-			addr &= ~(page_size - 1);
-		}
-		startaddr = (void*)addr;
-	}
-
 	/* init the mempool structure */
-	mp = startaddr;
-	memset(mp, 0, sizeof(*mp));
-	snprintf(mp->name, sizeof(mp->name), "%s", name);
-	mp->phys_addr = mz->phys_addr;
-	mp->ring = r;
+	mp = mz->addr;
+	memset(mp, 0, MEMPOOL_HEADER_SIZE(mp, cache_size));
+	ret = snprintf(mp->name, sizeof(mp->name), "%s", name);
+	if (ret < 0 || ret >= (int)sizeof(mp->name)) {
+		rte_errno = ENAMETOOLONG;
+		goto exit_unlock;
+	}
+	mp->mz = mz;
+	mp->socket_id = socket_id;
 	mp->size = n;
 	mp->flags = flags;
+	mp->socket_id = socket_id;
 	mp->elt_size = objsz.elt_size;
 	mp->header_size = objsz.header_size;
 	mp->trailer_size = objsz.trailer_size;
+	/* Size of default caches, zero means disabled. */
 	mp->cache_size = cache_size;
-	mp->cache_flushthresh = CALC_CACHE_FLUSHTHRESH(cache_size);
 	mp->private_data_size = private_data_size;
+	STAILQ_INIT(&mp->elt_list);
+	STAILQ_INIT(&mp->mem_list);
 
-	/* calculate address of the first element for continuous mempool. */
-	obj = (char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num) +
-		private_data_size;
-	obj = RTE_PTR_ALIGN_CEIL(obj, RTE_MEMPOOL_ALIGN);
-
-	/* populate address translation fields. */
-	mp->pg_num = pg_num;
-	mp->pg_shift = pg_shift;
-	mp->pg_mask = RTE_LEN2MASK(mp->pg_shift, typeof(mp->pg_mask));
-
-	/* mempool elements allocated together with mempool */
-	if (vaddr == NULL) {
-		mp->elt_va_start = (uintptr_t)obj;
-		mp->elt_pa[0] = mp->phys_addr +
-			(mp->elt_va_start - (uintptr_t)mp);
+	/*
+	 * local_cache pointer is set even if cache_size is zero.
+	 * The local_cache points to just past the elt_pa[] array.
+	 */
+	mp->local_cache = (struct rte_mempool_cache *)
+		RTE_PTR_ADD(mp, MEMPOOL_HEADER_SIZE(mp, 0));
 
-	/* mempool elements in a separate chunk of memory. */
-	} else {
-		mp->elt_va_start = (uintptr_t)vaddr;
-		memcpy(mp->elt_pa, paddr, sizeof (mp->elt_pa[0]) * pg_num);
+	/* Init all default caches. */
+	if (cache_size != 0) {
+		for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
+			mempool_cache_init(&mp->local_cache[lcore_id],
+					   cache_size);
 	}
 
-	mp->elt_va_end = mp->elt_va_start;
-
-	/* call the initializer */
-	if (mp_init)
-		mp_init(mp, mp_init_arg);
-
-	mempool_populate(mp, n, 1, obj_init, obj_init_arg);
-
-	te->data = (void *) mp;
+	te->data = mp;
 
 	rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
 	TAILQ_INSERT_TAIL(mempool_list, te, next);
@@ -632,30 +847,132 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 
 exit_unlock:
 	rte_rwlock_write_unlock(RTE_EAL_MEMPOOL_RWLOCK);
-	rte_ring_free(r);
 	rte_free(te);
+	rte_mempool_free(mp);
+	return NULL;
+}
+
+/* create the mempool */
+struct rte_mempool *
+rte_mempool_create(const char *name, unsigned n, unsigned elt_size,
+	unsigned cache_size, unsigned private_data_size,
+	rte_mempool_ctor_t *mp_init, void *mp_init_arg,
+	rte_mempool_obj_cb_t *obj_init, void *obj_init_arg,
+	int socket_id, unsigned flags)
+{
+	struct rte_mempool *mp;
+
+	mp = rte_mempool_create_empty(name, n, elt_size, cache_size,
+		private_data_size, socket_id, flags);
+	if (mp == NULL)
+		return NULL;
+
+	/*
+	 * Since we have 4 combinations of the SP/SC/MP/MC examine the flags to
+	 * set the correct index into the table of ops structs.
+	 */
+	if (flags & (MEMPOOL_F_SP_PUT | MEMPOOL_F_SC_GET))
+		rte_mempool_set_ops_byname(mp, "ring_sp_sc", NULL);
+	else if (flags & MEMPOOL_F_SP_PUT)
+		rte_mempool_set_ops_byname(mp, "ring_sp_mc", NULL);
+	else if (flags & MEMPOOL_F_SC_GET)
+		rte_mempool_set_ops_byname(mp, "ring_mp_sc", NULL);
+	else
+		rte_mempool_set_ops_byname(mp, "ring_mp_mc", NULL);
+
+	/* call the mempool priv initializer */
+	if (mp_init)
+		mp_init(mp, mp_init_arg);
+
+	if (rte_mempool_populate_default(mp) < 0)
+		goto fail;
+
+	/* call the object initializers */
+	if (obj_init)
+		rte_mempool_obj_iter(mp, obj_init, obj_init_arg);
+
+	return mp;
 
+ fail:
+	rte_mempool_free(mp);
+	return NULL;
+}
+
+/*
+ * Create the mempool over already allocated chunk of memory.
+ * That external memory buffer can consists of physically disjoint pages.
+ * Setting vaddr to NULL, makes mempool to fallback to original behaviour
+ * and allocate space for mempool and it's elements as one big chunk of
+ * physically continuos memory.
+ */
+struct rte_mempool *
+rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
+		unsigned cache_size, unsigned private_data_size,
+		rte_mempool_ctor_t *mp_init, void *mp_init_arg,
+		rte_mempool_obj_cb_t *obj_init, void *obj_init_arg,
+		int socket_id, unsigned flags, void *vaddr,
+		const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift)
+{
+	struct rte_mempool *mp = NULL;
+	int ret;
+
+	/* no virtual address supplied, use rte_mempool_create() */
+	if (vaddr == NULL)
+		return rte_mempool_create(name, n, elt_size, cache_size,
+			private_data_size, mp_init, mp_init_arg,
+			obj_init, obj_init_arg, socket_id, flags);
+
+	/* check that we have both VA and PA */
+	if (paddr == NULL) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
+	/* Check that pg_shift parameter is valid. */
+	if (pg_shift > MEMPOOL_PG_SHIFT_MAX) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
+	mp = rte_mempool_create_empty(name, n, elt_size, cache_size,
+		private_data_size, socket_id, flags);
+	if (mp == NULL)
+		return NULL;
+
+	/* call the mempool priv initializer */
+	if (mp_init)
+		mp_init(mp, mp_init_arg);
+
+	ret = rte_mempool_populate_phys_tab(mp, vaddr, paddr, pg_num, pg_shift,
+		NULL, NULL);
+	if (ret < 0 || ret != (int)mp->size)
+		goto fail;
+
+	/* call the object initializers */
+	if (obj_init)
+		rte_mempool_obj_iter(mp, obj_init, obj_init_arg);
+
+	return mp;
+
+ fail:
+	rte_mempool_free(mp);
 	return NULL;
 }
 
 /* Return the number of entries in the mempool */
-unsigned
-rte_mempool_count(const struct rte_mempool *mp)
+unsigned int
+rte_mempool_avail_count(const struct rte_mempool *mp)
 {
 	unsigned count;
+	unsigned lcore_id;
 
-	count = rte_ring_count(mp->ring);
+	count = rte_mempool_ops_get_count(mp);
 
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
-	{
-		unsigned lcore_id;
-		if (mp->cache_size == 0)
-			return count;
+	if (mp->cache_size == 0)
+		return count;
 
-		for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
-			count += mp->local_cache[lcore_id].len;
-	}
-#endif
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
+		count += mp->local_cache[lcore_id].len;
 
 	/*
 	 * due to race condition (access to len is not locked), the
@@ -666,116 +983,168 @@ rte_mempool_count(const struct rte_mempool *mp)
 	return count;
 }
 
+/* return the number of entries allocated from the mempool */
+unsigned int
+rte_mempool_in_use_count(const struct rte_mempool *mp)
+{
+	return mp->size - rte_mempool_avail_count(mp);
+}
+
+unsigned int
+rte_mempool_count(const struct rte_mempool *mp)
+{
+	return rte_mempool_avail_count(mp);
+}
+
 /* dump the cache status */
 static unsigned
 rte_mempool_dump_cache(FILE *f, const struct rte_mempool *mp)
 {
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 	unsigned lcore_id;
 	unsigned count = 0;
 	unsigned cache_count;
 
-	fprintf(f, "  cache infos:\n");
+	fprintf(f, "  internal cache infos:\n");
 	fprintf(f, "    cache_size=%"PRIu32"\n", mp->cache_size);
+
+	if (mp->cache_size == 0)
+		return count;
+
 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
 		cache_count = mp->local_cache[lcore_id].len;
-		fprintf(f, "    cache_count[%u]=%u\n", lcore_id, cache_count);
+		fprintf(f, "    cache_count[%u]=%"PRIu32"\n",
+			lcore_id, cache_count);
 		count += cache_count;
 	}
 	fprintf(f, "    total_cache_count=%u\n", count);
 	return count;
-#else
-	RTE_SET_USED(mp);
-	fprintf(f, "  cache disabled\n");
-	return 0;
-#endif
 }
 
-#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
-/* check cookies before and after objects */
 #ifndef __INTEL_COMPILER
 #pragma GCC diagnostic ignored "-Wcast-qual"
 #endif
 
-struct mempool_audit_arg {
-	const struct rte_mempool *mp;
-	uintptr_t obj_end;
-	uint32_t obj_num;
-};
-
-static void
-mempool_obj_audit(void *arg, void *start, void *end, uint32_t idx)
+/* check and update cookies or panic (internal) */
+void rte_mempool_check_cookies(const struct rte_mempool *mp,
+	void * const *obj_table_const, unsigned n, int free)
 {
-	struct mempool_audit_arg *pa = arg;
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
+	struct rte_mempool_objhdr *hdr;
+	struct rte_mempool_objtlr *tlr;
+	uint64_t cookie;
+	void *tmp;
 	void *obj;
-
-	obj = (char *)start + pa->mp->header_size;
-	pa->obj_end = (uintptr_t)end;
-	pa->obj_num = idx + 1;
-	__mempool_check_cookies(pa->mp, &obj, 1, 2);
+	void **obj_table;
+
+	/* Force to drop the "const" attribute. This is done only when
+	 * DEBUG is enabled */
+	tmp = (void *) obj_table_const;
+	obj_table = (void **) tmp;
+
+	while (n--) {
+		obj = obj_table[n];
+
+		if (rte_mempool_from_obj(obj) != mp)
+			rte_panic("MEMPOOL: object is owned by another "
+				  "mempool\n");
+
+		hdr = __mempool_get_header(obj);
+		cookie = hdr->cookie;
+
+		if (free == 0) {
+			if (cookie != RTE_MEMPOOL_HEADER_COOKIE1) {
+				RTE_LOG(CRIT, MEMPOOL,
+					"obj=%p, mempool=%p, cookie=%" PRIx64 "\n",
+					obj, (const void *) mp, cookie);
+				rte_panic("MEMPOOL: bad header cookie (put)\n");
+			}
+			hdr->cookie = RTE_MEMPOOL_HEADER_COOKIE2;
+		} else if (free == 1) {
+			if (cookie != RTE_MEMPOOL_HEADER_COOKIE2) {
+				RTE_LOG(CRIT, MEMPOOL,
+					"obj=%p, mempool=%p, cookie=%" PRIx64 "\n",
+					obj, (const void *) mp, cookie);
+				rte_panic("MEMPOOL: bad header cookie (get)\n");
+			}
+			hdr->cookie = RTE_MEMPOOL_HEADER_COOKIE1;
+		} else if (free == 2) {
+			if (cookie != RTE_MEMPOOL_HEADER_COOKIE1 &&
+			    cookie != RTE_MEMPOOL_HEADER_COOKIE2) {
+				RTE_LOG(CRIT, MEMPOOL,
+					"obj=%p, mempool=%p, cookie=%" PRIx64 "\n",
+					obj, (const void *) mp, cookie);
+				rte_panic("MEMPOOL: bad header cookie (audit)\n");
+			}
+		}
+		tlr = __mempool_get_trailer(obj);
+		cookie = tlr->cookie;
+		if (cookie != RTE_MEMPOOL_TRAILER_COOKIE) {
+			RTE_LOG(CRIT, MEMPOOL,
+				"obj=%p, mempool=%p, cookie=%" PRIx64 "\n",
+				obj, (const void *) mp, cookie);
+			rte_panic("MEMPOOL: bad trailer cookie\n");
+		}
+	}
+#else
+	RTE_SET_USED(mp);
+	RTE_SET_USED(obj_table_const);
+	RTE_SET_USED(n);
+	RTE_SET_USED(free);
+#endif
 }
 
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
 static void
-mempool_audit_cookies(const struct rte_mempool *mp)
+mempool_obj_audit(struct rte_mempool *mp, __rte_unused void *opaque,
+	void *obj, __rte_unused unsigned idx)
 {
-	uint32_t elt_sz, num;
-	struct mempool_audit_arg arg;
-
-	elt_sz = mp->elt_size + mp->header_size + mp->trailer_size;
-
-	arg.mp = mp;
-	arg.obj_end = mp->elt_va_start;
-	arg.obj_num = 0;
+	__mempool_check_cookies(mp, &obj, 1, 2);
+}
 
-	num = rte_mempool_obj_iter((void *)mp->elt_va_start,
-		mp->size, elt_sz, 1,
-		mp->elt_pa, mp->pg_num, mp->pg_shift,
-		mempool_obj_audit, &arg);
+static void
+mempool_audit_cookies(struct rte_mempool *mp)
+{
+	unsigned num;
 
+	num = rte_mempool_obj_iter(mp, mempool_obj_audit, NULL);
 	if (num != mp->size) {
-			rte_panic("rte_mempool_obj_iter(mempool=%p, size=%u) "
+		rte_panic("rte_mempool_obj_iter(mempool=%p, size=%u) "
 			"iterated only over %u elements\n",
 			mp, mp->size, num);
-	} else if (arg.obj_end != mp->elt_va_end || arg.obj_num != mp->size) {
-			rte_panic("rte_mempool_obj_iter(mempool=%p, size=%u) "
-			"last callback va_end: %#tx (%#tx expeceted), "
-			"num of objects: %u (%u expected)\n",
-			mp, mp->size,
-			arg.obj_end, mp->elt_va_end,
-			arg.obj_num, mp->size);
 	}
 }
+#else
+#define mempool_audit_cookies(mp) do {} while(0)
+#endif
 
 #ifndef __INTEL_COMPILER
 #pragma GCC diagnostic error "-Wcast-qual"
 #endif
-#else
-#define mempool_audit_cookies(mp) do {} while(0)
-#endif
 
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 /* check cookies before and after objects */
 static void
 mempool_audit_cache(const struct rte_mempool *mp)
 {
 	/* check cache size consistency */
 	unsigned lcore_id;
+
+	if (mp->cache_size == 0)
+		return;
+
 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
-		if (mp->local_cache[lcore_id].len > mp->cache_flushthresh) {
+		const struct rte_mempool_cache *cache;
+		cache = &mp->local_cache[lcore_id];
+		if (cache->len > cache->flushthresh) {
 			RTE_LOG(CRIT, MEMPOOL, "badness on cache[%u]\n",
 				lcore_id);
 			rte_panic("MEMPOOL: invalid cache len\n");
 		}
 	}
 }
-#else
-#define mempool_audit_cache(mp) do {} while(0)
-#endif
-
 
 /* check the consistency of mempool (size, cookies, ...) */
 void
-rte_mempool_audit(const struct rte_mempool *mp)
+rte_mempool_audit(struct rte_mempool *mp)
 {
 	mempool_audit_cache(mp);
 	mempool_audit_cookies(mp);
@@ -786,23 +1155,27 @@ rte_mempool_audit(const struct rte_mempool *mp)
 
 /* dump the status of the mempool on the console */
 void
-rte_mempool_dump(FILE *f, const struct rte_mempool *mp)
+rte_mempool_dump(FILE *f, struct rte_mempool *mp)
 {
 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
 	struct rte_mempool_debug_stats sum;
 	unsigned lcore_id;
 #endif
+	struct rte_mempool_memhdr *memhdr;
 	unsigned common_count;
 	unsigned cache_count;
+	size_t mem_len = 0;
 
-	RTE_VERIFY(f != NULL);
-	RTE_VERIFY(mp != NULL);
+	RTE_ASSERT(f != NULL);
+	RTE_ASSERT(mp != NULL);
 
 	fprintf(f, "mempool <%s>@%p\n", mp->name, mp);
 	fprintf(f, "  flags=%x\n", mp->flags);
-	fprintf(f, "  ring=<%s>@%p\n", mp->ring->name, mp->ring);
-	fprintf(f, "  phys_addr=0x%" PRIx64 "\n", mp->phys_addr);
+	fprintf(f, "  pool=%p\n", mp->pool_data);
+	fprintf(f, "  phys_addr=0x%" PRIx64 "\n", mp->mz->phys_addr);
+	fprintf(f, "  nb_mem_chunks=%u\n", mp->nb_mem_chunks);
 	fprintf(f, "  size=%"PRIu32"\n", mp->size);
+	fprintf(f, "  populated_size=%"PRIu32"\n", mp->populated_size);
 	fprintf(f, "  header_size=%"PRIu32"\n", mp->header_size);
 	fprintf(f, "  elt_size=%"PRIu32"\n", mp->elt_size);
 	fprintf(f, "  trailer_size=%"PRIu32"\n", mp->trailer_size);
@@ -810,20 +1183,16 @@ rte_mempool_dump(FILE *f, const struct rte_mempool *mp)
 	       mp->header_size + mp->elt_size + mp->trailer_size);
 
 	fprintf(f, "  private_data_size=%"PRIu32"\n", mp->private_data_size);
-	fprintf(f, "  pg_num=%"PRIu32"\n", mp->pg_num);
-	fprintf(f, "  pg_shift=%"PRIu32"\n", mp->pg_shift);
-	fprintf(f, "  pg_mask=%#tx\n", mp->pg_mask);
-	fprintf(f, "  elt_va_start=%#tx\n", mp->elt_va_start);
-	fprintf(f, "  elt_va_end=%#tx\n", mp->elt_va_end);
-	fprintf(f, "  elt_pa[0]=0x%" PRIx64 "\n", mp->elt_pa[0]);
-
-	if (mp->size != 0)
+
+	STAILQ_FOREACH(memhdr, &mp->mem_list, next)
+		mem_len += memhdr->len;
+	if (mem_len != 0) {
 		fprintf(f, "  avg bytes/object=%#Lf\n",
-			(long double)(mp->elt_va_end - mp->elt_va_start) /
-			mp->size);
+			(long double)mem_len / mp->size);
+	}
 
 	cache_count = rte_mempool_dump_cache(f, mp);
-	common_count = rte_ring_count(mp->ring);
+	common_count = rte_mempool_ops_get_count(mp);
 	if ((cache_count + common_count) > mp->size)
 		common_count = mp->size - cache_count;
 	fprintf(f, "  common_pool_count=%u\n", common_count);
@@ -857,7 +1226,7 @@ rte_mempool_dump(FILE *f, const struct rte_mempool *mp)
 void
 rte_mempool_list_dump(FILE *f)
 {
-	const struct rte_mempool *mp = NULL;
+	struct rte_mempool *mp = NULL;
 	struct rte_tailq_entry *te;
 	struct rte_mempool_list *mempool_list;
 
@@ -901,7 +1270,7 @@ rte_mempool_lookup(const char *name)
 	return mp;
 }
 
-void rte_mempool_walk(void (*func)(const struct rte_mempool *, void *),
+void rte_mempool_walk(void (*func)(struct rte_mempool *, void *),
 		      void *arg)
 {
 	struct rte_tailq_entry *te = NULL;
diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index 9745bf0d..fb7052e1 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -2,6 +2,7 @@
  *   BSD LICENSE
  *
  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2016 6WIND S.A.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -66,12 +67,14 @@
 #include <inttypes.h>
 #include <sys/queue.h>
 
+#include <rte_spinlock.h>
 #include <rte_log.h>
 #include <rte_debug.h>
 #include <rte_lcore.h>
 #include <rte_memory.h>
 #include <rte_branch_prediction.h>
 #include <rte_ring.h>
+#include <rte_memcpy.h>
 
 #ifdef __cplusplus
 extern "C" {
@@ -95,19 +98,19 @@ struct rte_mempool_debug_stats {
 } __rte_cache_aligned;
 #endif
 
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 /**
  * A structure that stores a per-core object cache.
  */
 struct rte_mempool_cache {
-	unsigned len; /**< Cache len */
+	uint32_t size;	      /**< Size of the cache */
+	uint32_t flushthresh; /**< Threshold before we flush excess elements */
+	uint32_t len;	      /**< Current cache count */
 	/*
 	 * Cache is allocated to this size to allow it to overflow in certain
 	 * cases to avoid needless emptying of cache.
 	 */
 	void *objs[RTE_MEMPOOL_CACHE_MAX_SIZE * 3]; /**< Cache objects */
 } __rte_cache_aligned;
-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
 
 /**
  * A structure that stores the size of mempool elements.
@@ -126,17 +129,6 @@ struct rte_mempool_objsz {
 /* "MP_<name>" */
 #define	RTE_MEMPOOL_MZ_FORMAT	RTE_MEMPOOL_MZ_PREFIX "%s"
 
-#ifdef RTE_LIBRTE_XEN_DOM0
-
-/* "<name>_MP_elt" */
-#define	RTE_MEMPOOL_OBJ_NAME	"%s_" RTE_MEMPOOL_MZ_PREFIX "elt"
-
-#else
-
-#define	RTE_MEMPOOL_OBJ_NAME	RTE_MEMPOOL_MZ_FORMAT
-
-#endif /* RTE_LIBRTE_XEN_DOM0 */
-
 #define	MEMPOOL_PG_SHIFT_MAX	(sizeof(uintptr_t) * CHAR_BIT - 1)
 
 /** Mempool over one chunk of physically continuous memory */
@@ -152,18 +144,26 @@ struct rte_mempool_objsz {
  * Mempool object header structure
  *
  * Each object stored in mempools are prefixed by this header structure,
- * it allows to retrieve the mempool pointer from the object. When debug
- * is enabled, a cookie is also added in this structure preventing
- * corruptions and double-frees.
+ * it allows to retrieve the mempool pointer from the object and to
+ * iterate on all objects attached to a mempool. When debug is enabled,
+ * a cookie is also added in this structure preventing corruptions and
+ * double-frees.
  */
 struct rte_mempool_objhdr {
+	STAILQ_ENTRY(rte_mempool_objhdr) next; /**< Next in list. */
 	struct rte_mempool *mp;          /**< The mempool owning the object. */
+	phys_addr_t physaddr;            /**< Physical address of the object. */
 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
 	uint64_t cookie;                 /**< Debug cookie. */
 #endif
 };
 
 /**
+ * A list of object headers type
+ */
+STAILQ_HEAD(rte_mempool_objhdr_list, rte_mempool_objhdr);
+
+/**
  * Mempool object trailer structure
  *
  * In debug mode, each object stored in mempools are suffixed by this
@@ -176,53 +176,82 @@ struct rte_mempool_objtlr {
 };
 
 /**
+ * A list of memory where objects are stored
+ */
+STAILQ_HEAD(rte_mempool_memhdr_list, rte_mempool_memhdr);
+
+/**
+ * Callback used to free a memory chunk
+ */
+typedef void (rte_mempool_memchunk_free_cb_t)(struct rte_mempool_memhdr *memhdr,
+	void *opaque);
+
+/**
+ * Mempool objects memory header structure
+ *
+ * The memory chunks where objects are stored. Each chunk is virtually
+ * and physically contiguous.
+ */
+struct rte_mempool_memhdr {
+	STAILQ_ENTRY(rte_mempool_memhdr) next; /**< Next in list. */
+	struct rte_mempool *mp;  /**< The mempool owning the chunk */
+	void *addr;              /**< Virtual address of the chunk */
+	phys_addr_t phys_addr;   /**< Physical address of the chunk */
+	size_t len;              /**< length of the chunk */
+	rte_mempool_memchunk_free_cb_t *free_cb; /**< Free callback */
+	void *opaque;            /**< Argument passed to the free callback */
+};
+
+/**
  * The RTE mempool structure.
  */
 struct rte_mempool {
 	char name[RTE_MEMPOOL_NAMESIZE]; /**< Name of mempool. */
-	struct rte_ring *ring;           /**< Ring to store objects. */
-	phys_addr_t phys_addr;           /**< Phys. addr. of mempool struct. */
+	union {
+		void *pool_data;         /**< Ring or pool to store objects. */
+		uint64_t pool_id;        /**< External mempool identifier. */
+	};
+	void *pool_config;               /**< optional args for ops alloc. */
+	const struct rte_memzone *mz;    /**< Memzone where pool is alloc'd. */
 	int flags;                       /**< Flags of the mempool. */
-	uint32_t size;                   /**< Size of the mempool. */
-	uint32_t cache_size;             /**< Size of per-lcore local cache. */
-	uint32_t cache_flushthresh;
-	/**< Threshold before we flush excess elements. */
+	int socket_id;                   /**< Socket id passed at create. */
+	uint32_t size;                   /**< Max size of the mempool. */
+	uint32_t cache_size;
+	/**< Size of per-lcore default local cache. */
 
 	uint32_t elt_size;               /**< Size of an element. */
 	uint32_t header_size;            /**< Size of header (before elt). */
 	uint32_t trailer_size;           /**< Size of trailer (after elt). */
 
 	unsigned private_data_size;      /**< Size of private data. */
+	/**
+	 * Index into rte_mempool_ops_table array of mempool ops
+	 * structs, which contain callback function pointers.
+	 * We're using an index here rather than pointers to the callbacks
+	 * to facilitate any secondary processes that may want to use
+	 * this mempool.
+	 */
+	int32_t ops_index;
 
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
-	/** Per-lcore local cache. */
-	struct rte_mempool_cache local_cache[RTE_MAX_LCORE];
-#endif
+	struct rte_mempool_cache *local_cache; /**< Per-lcore local cache */
+
+	uint32_t populated_size;         /**< Number of populated objects. */
+	struct rte_mempool_objhdr_list elt_list; /**< List of objects in pool */
+	uint32_t nb_mem_chunks;          /**< Number of memory chunks */
+	struct rte_mempool_memhdr_list mem_list; /**< List of memory chunks */
 
 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
 	/** Per-lcore statistics. */
 	struct rte_mempool_debug_stats stats[RTE_MAX_LCORE];
 #endif
-
-	/* Address translation support, starts from next cache line. */
-
-	/** Number of elements in the elt_pa array. */
-	uint32_t    pg_num __rte_cache_aligned;
-	uint32_t    pg_shift;     /**< LOG2 of the physical pages. */
-	uintptr_t   pg_mask;      /**< physical page mask value. */
-	uintptr_t   elt_va_start;
-	/**< Virtual address of the first mempool object. */
-	uintptr_t   elt_va_end;
-	/**< Virtual address of the <size + 1> mempool object. */
-	phys_addr_t elt_pa[MEMPOOL_PG_NUM_DEFAULT];
-	/**< Array of physical page addresses for the mempool objects buffer. */
-
 }  __rte_cache_aligned;
 
-#define MEMPOOL_F_NO_SPREAD      0x0001 /**< Do not spread in memory. */
+#define MEMPOOL_F_NO_SPREAD      0x0001 /**< Do not spread among memory channels. */
 #define MEMPOOL_F_NO_CACHE_ALIGN 0x0002 /**< Do not align objs on cache lines.*/
 #define MEMPOOL_F_SP_PUT         0x0004 /**< Default put is "single-producer".*/
 #define MEMPOOL_F_SC_GET         0x0008 /**< Default get is "single-consumer".*/
+#define MEMPOOL_F_POOL_CREATED   0x0010 /**< Internal: pool is created. */
+#define MEMPOOL_F_NO_PHYS_CONTIG 0x0020 /**< Don't need physically contiguous objs. */
 
 /**
  * @internal When debug is enabled, store some statistics.
@@ -251,24 +280,18 @@ struct rte_mempool {
  *
  * @param mp
  *   Pointer to the memory pool.
- * @param pgn
- *   Number of pages used to store mempool objects.
- */
-#define	MEMPOOL_HEADER_SIZE(mp, pgn)	(sizeof(*(mp)) + \
-	RTE_ALIGN_CEIL(((pgn) - RTE_DIM((mp)->elt_pa)) * \
-	sizeof ((mp)->elt_pa[0]), RTE_CACHE_LINE_SIZE))
-
-/**
- * Return true if the whole mempool is in contiguous memory.
+ * @param cs
+ *   Size of the per-lcore cache.
  */
-#define	MEMPOOL_IS_CONTIG(mp)                      \
-	((mp)->pg_num == MEMPOOL_PG_NUM_DEFAULT && \
-	(mp)->phys_addr == (mp)->elt_pa[0])
+#define MEMPOOL_HEADER_SIZE(mp, cs) \
+	(sizeof(*(mp)) + (((cs) == 0) ? 0 : \
+	(sizeof(struct rte_mempool_cache) * RTE_MAX_LCORE)))
 
 /* return the header of a mempool object (internal) */
 static inline struct rte_mempool_objhdr *__mempool_get_header(void *obj)
 {
-	return (struct rte_mempool_objhdr *)RTE_PTR_SUB(obj, sizeof(struct rte_mempool_objhdr));
+	return (struct rte_mempool_objhdr *)RTE_PTR_SUB(obj,
+		sizeof(struct rte_mempool_objhdr));
 }
 
 /**
@@ -307,141 +330,242 @@ static inline struct rte_mempool_objtlr *__mempool_get_trailer(void *obj)
  *   - 1: object is supposed to be free, mark it as allocated
  *   - 2: just check that cookie is valid (free or allocated)
  */
+void rte_mempool_check_cookies(const struct rte_mempool *mp,
+	void * const *obj_table_const, unsigned n, int free);
+
 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
-#ifndef __INTEL_COMPILER
-#pragma GCC diagnostic ignored "-Wcast-qual"
-#endif
-static inline void __mempool_check_cookies(const struct rte_mempool *mp,
-					   void * const *obj_table_const,
-					   unsigned n, int free)
-{
-	struct rte_mempool_objhdr *hdr;
-	struct rte_mempool_objtlr *tlr;
-	uint64_t cookie;
-	void *tmp;
-	void *obj;
-	void **obj_table;
-
-	/* Force to drop the "const" attribute. This is done only when
-	 * DEBUG is enabled */
-	tmp = (void *) obj_table_const;
-	obj_table = (void **) tmp;
-
-	while (n--) {
-		obj = obj_table[n];
-
-		if (rte_mempool_from_obj(obj) != mp)
-			rte_panic("MEMPOOL: object is owned by another "
-				  "mempool\n");
-
-		hdr = __mempool_get_header(obj);
-		cookie = hdr->cookie;
-
-		if (free == 0) {
-			if (cookie != RTE_MEMPOOL_HEADER_COOKIE1) {
-				rte_log_set_history(0);
-				RTE_LOG(CRIT, MEMPOOL,
-					"obj=%p, mempool=%p, cookie=%" PRIx64 "\n",
-					obj, (const void *) mp, cookie);
-				rte_panic("MEMPOOL: bad header cookie (put)\n");
-			}
-			hdr->cookie = RTE_MEMPOOL_HEADER_COOKIE2;
-		}
-		else if (free == 1) {
-			if (cookie != RTE_MEMPOOL_HEADER_COOKIE2) {
-				rte_log_set_history(0);
-				RTE_LOG(CRIT, MEMPOOL,
-					"obj=%p, mempool=%p, cookie=%" PRIx64 "\n",
-					obj, (const void *) mp, cookie);
-				rte_panic("MEMPOOL: bad header cookie (get)\n");
-			}
-			hdr->cookie = RTE_MEMPOOL_HEADER_COOKIE1;
-		}
-		else if (free == 2) {
-			if (cookie != RTE_MEMPOOL_HEADER_COOKIE1 &&
-			    cookie != RTE_MEMPOOL_HEADER_COOKIE2) {
-				rte_log_set_history(0);
-				RTE_LOG(CRIT, MEMPOOL,
-					"obj=%p, mempool=%p, cookie=%" PRIx64 "\n",
-					obj, (const void *) mp, cookie);
-				rte_panic("MEMPOOL: bad header cookie (audit)\n");
-			}
-		}
-		tlr = __mempool_get_trailer(obj);
-		cookie = tlr->cookie;
-		if (cookie != RTE_MEMPOOL_TRAILER_COOKIE) {
-			rte_log_set_history(0);
-			RTE_LOG(CRIT, MEMPOOL,
-				"obj=%p, mempool=%p, cookie=%" PRIx64 "\n",
-				obj, (const void *) mp, cookie);
-			rte_panic("MEMPOOL: bad trailer cookie\n");
-		}
-	}
-}
-#ifndef __INTEL_COMPILER
-#pragma GCC diagnostic error "-Wcast-qual"
-#endif
+#define __mempool_check_cookies(mp, obj_table_const, n, free) \
+	rte_mempool_check_cookies(mp, obj_table_const, n, free)
 #else
 #define __mempool_check_cookies(mp, obj_table_const, n, free) do {} while(0)
 #endif /* RTE_LIBRTE_MEMPOOL_DEBUG */
 
+#define RTE_MEMPOOL_OPS_NAMESIZE 32 /**< Max length of ops struct name. */
+
+/**
+ * Prototype for implementation specific data provisioning function.
+ *
+ * The function should provide the implementation specific memory for
+ * for use by the other mempool ops functions in a given mempool ops struct.
+ * E.g. the default ops provides an instance of the rte_ring for this purpose.
+ * it will most likely point to a different type of data structure, and
+ * will be transparent to the application programmer.
+ * This function should set mp->pool_data.
+ */
+typedef int (*rte_mempool_alloc_t)(struct rte_mempool *mp);
+
+/**
+ * Free the opaque private data pointed to by mp->pool_data pointer.
+ */
+typedef void (*rte_mempool_free_t)(struct rte_mempool *mp);
+
+/**
+ * Enqueue an object into the external pool.
+ */
+typedef int (*rte_mempool_enqueue_t)(struct rte_mempool *mp,
+		void * const *obj_table, unsigned int n);
+
 /**
- * A mempool object iterator callback function.
+ * Dequeue an object from the external pool.
  */
-typedef void (*rte_mempool_obj_iter_t)(void * /*obj_iter_arg*/,
-	void * /*obj_start*/,
-	void * /*obj_end*/,
-	uint32_t /*obj_index */);
+typedef int (*rte_mempool_dequeue_t)(struct rte_mempool *mp,
+		void **obj_table, unsigned int n);
 
 /**
- * Call a function for each mempool object in a memory chunk
+ * Return the number of available objects in the external pool.
+ */
+typedef unsigned (*rte_mempool_get_count)(const struct rte_mempool *mp);
+
+/** Structure defining mempool operations structure */
+struct rte_mempool_ops {
+	char name[RTE_MEMPOOL_OPS_NAMESIZE]; /**< Name of mempool ops struct. */
+	rte_mempool_alloc_t alloc;       /**< Allocate private data. */
+	rte_mempool_free_t free;         /**< Free the external pool. */
+	rte_mempool_enqueue_t enqueue;   /**< Enqueue an object. */
+	rte_mempool_dequeue_t dequeue;   /**< Dequeue an object. */
+	rte_mempool_get_count get_count; /**< Get qty of available objs. */
+} __rte_cache_aligned;
+
+#define RTE_MEMPOOL_MAX_OPS_IDX 16  /**< Max registered ops structs */
+
+/**
+ * Structure storing the table of registered ops structs, each of which contain
+ * the function pointers for the mempool ops functions.
+ * Each process has its own storage for this ops struct array so that
+ * the mempools can be shared across primary and secondary processes.
+ * The indices used to access the array are valid across processes, whereas
+ * any function pointers stored directly in the mempool struct would not be.
+ * This results in us simply having "ops_index" in the mempool struct.
+ */
+struct rte_mempool_ops_table {
+	rte_spinlock_t sl;     /**< Spinlock for add/delete. */
+	uint32_t num_ops;      /**< Number of used ops structs in the table. */
+	/**
+	 * Storage for all possible ops structs.
+	 */
+	struct rte_mempool_ops ops[RTE_MEMPOOL_MAX_OPS_IDX];
+} __rte_cache_aligned;
+
+/** Array of registered ops structs. */
+extern struct rte_mempool_ops_table rte_mempool_ops_table;
+
+/**
+ * @internal Get the mempool ops struct from its index.
  *
- * Iterate across objects of the given size and alignment in the
- * provided chunk of memory. The given memory buffer can consist of
- * disjointed physical pages.
+ * @param ops_index
+ *   The index of the ops struct in the ops struct table. It must be a valid
+ *   index: (0 <= idx < num_ops).
+ * @return
+ *   The pointer to the ops struct in the table.
+ */
+static inline struct rte_mempool_ops *
+rte_mempool_get_ops(int ops_index)
+{
+	RTE_VERIFY((ops_index >= 0) && (ops_index < RTE_MEMPOOL_MAX_OPS_IDX));
+
+	return &rte_mempool_ops_table.ops[ops_index];
+}
+
+/**
+ * @internal Wrapper for mempool_ops alloc callback.
  *
- * For each object, call the provided callback (if any). This function
- * is used to populate a mempool, or walk through all the elements of a
- * mempool, or estimate how many elements of the given size could be
- * created in the given memory buffer.
+ * @param mp
+ *   Pointer to the memory pool.
+ * @return
+ *   - 0: Success; successfully allocated mempool pool_data.
+ *   - <0: Error; code of alloc function.
+ */
+int
+rte_mempool_ops_alloc(struct rte_mempool *mp);
+
+/**
+ * @internal Wrapper for mempool_ops dequeue callback.
  *
- * @param vaddr
- *   Virtual address of the memory buffer.
- * @param elt_num
- *   Maximum number of objects to iterate through.
- * @param elt_sz
- *   Size of each object.
- * @param align
- *   Alignment of each object.
- * @param paddr
- *   Array of physical addresses of the pages that comprises given memory
- *   buffer.
- * @param pg_num
- *   Number of elements in the paddr array.
- * @param pg_shift
- *   LOG2 of the physical pages size.
- * @param obj_iter
- *   Object iterator callback function (could be NULL).
- * @param obj_iter_arg
- *   User defined parameter for the object iterator callback function.
+ * @param mp
+ *   Pointer to the memory pool.
+ * @param obj_table
+ *   Pointer to a table of void * pointers (objects).
+ * @param n
+ *   Number of objects to get.
+ * @return
+ *   - 0: Success; got n objects.
+ *   - <0: Error; code of dequeue function.
+ */
+static inline int
+rte_mempool_ops_dequeue_bulk(struct rte_mempool *mp,
+		void **obj_table, unsigned n)
+{
+	struct rte_mempool_ops *ops;
+
+	ops = rte_mempool_get_ops(mp->ops_index);
+	return ops->dequeue(mp, obj_table, n);
+}
+
+/**
+ * @internal wrapper for mempool_ops enqueue callback.
  *
+ * @param mp
+ *   Pointer to the memory pool.
+ * @param obj_table
+ *   Pointer to a table of void * pointers (objects).
+ * @param n
+ *   Number of objects to put.
  * @return
- *   Number of objects iterated through.
+ *   - 0: Success; n objects supplied.
+ *   - <0: Error; code of enqueue function.
  */
-uint32_t rte_mempool_obj_iter(void *vaddr,
-	uint32_t elt_num, size_t elt_sz, size_t align,
-	const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift,
-	rte_mempool_obj_iter_t obj_iter, void *obj_iter_arg);
+static inline int
+rte_mempool_ops_enqueue_bulk(struct rte_mempool *mp, void * const *obj_table,
+		unsigned n)
+{
+	struct rte_mempool_ops *ops;
+
+	ops = rte_mempool_get_ops(mp->ops_index);
+	return ops->enqueue(mp, obj_table, n);
+}
 
 /**
- * An object constructor callback function for mempool.
+ * @internal wrapper for mempool_ops get_count callback.
  *
- * Arguments are the mempool, the opaque pointer given by the user in
- * rte_mempool_create(), the pointer to the element and the index of
- * the element in the pool.
+ * @param mp
+ *   Pointer to the memory pool.
+ * @return
+ *   The number of available objects in the external pool.
  */
-typedef void (rte_mempool_obj_ctor_t)(struct rte_mempool *, void *,
-				      void *, unsigned);
+unsigned
+rte_mempool_ops_get_count(const struct rte_mempool *mp);
+
+/**
+ * @internal wrapper for mempool_ops free callback.
+ *
+ * @param mp
+ *   Pointer to the memory pool.
+ */
+void
+rte_mempool_ops_free(struct rte_mempool *mp);
+
+/**
+ * Set the ops of a mempool.
+ *
+ * This can only be done on a mempool that is not populated, i.e. just after
+ * a call to rte_mempool_create_empty().
+ *
+ * @param mp
+ *   Pointer to the memory pool.
+ * @param name
+ *   Name of the ops structure to use for this mempool.
+ * @param pool_config
+ *   Opaque data that can be passed by the application to the ops functions.
+ * @return
+ *   - 0: Success; the mempool is now using the requested ops functions.
+ *   - -EINVAL - Invalid ops struct name provided.
+ *   - -EEXIST - mempool already has an ops struct assigned.
+ */
+int
+rte_mempool_set_ops_byname(struct rte_mempool *mp, const char *name,
+		void *pool_config);
+
+/**
+ * Register mempool operations.
+ *
+ * @param ops
+ *   Pointer to an ops structure to register.
+ * @return
+ *   - >=0: Success; return the index of the ops struct in the table.
+ *   - -EINVAL - some missing callbacks while registering ops struct.
+ *   - -ENOSPC - the maximum number of ops structs has been reached.
+ */
+int rte_mempool_register_ops(const struct rte_mempool_ops *ops);
+
+/**
+ * Macro to statically register the ops of a mempool handler.
+ * Note that the rte_mempool_register_ops fails silently here when
+ * more then RTE_MEMPOOL_MAX_OPS_IDX is registered.
+ */
+#define MEMPOOL_REGISTER_OPS(ops)					\
+	void mp_hdlr_init_##ops(void);					\
+	void __attribute__((constructor, used)) mp_hdlr_init_##ops(void)\
+	{								\
+		rte_mempool_register_ops(&ops);			\
+	}
+
+/**
+ * An object callback function for mempool.
+ *
+ * Used by rte_mempool_create() and rte_mempool_obj_iter().
+ */
+typedef void (rte_mempool_obj_cb_t)(struct rte_mempool *mp,
+		void *opaque, void *obj, unsigned obj_idx);
+typedef rte_mempool_obj_cb_t rte_mempool_obj_ctor_t; /* compat */
+
+/**
+ * A memory callback function for mempool.
+ *
+ * Used by rte_mempool_mem_iter().
+ */
+typedef void (rte_mempool_mem_cb_t)(struct rte_mempool *mp,
+		void *opaque, struct rte_mempool_memhdr *memhdr,
+		unsigned mem_idx);
 
 /**
  * A mempool constructor callback function.
@@ -522,6 +646,8 @@ typedef void (rte_mempool_ctor_t)(struct rte_mempool *, void *);
  *   - MEMPOOL_F_SC_GET: If this flag is set, the default behavior
  *     when using rte_mempool_get() or rte_mempool_get_bulk() is
  *     "single-consumer". Otherwise, it is "multi-consumers".
+ *   - MEMPOOL_F_NO_PHYS_CONTIG: If set, allocated objects won't
+ *     necessarilly be contiguous in physical memory.
  * @return
  *   The pointer to the new allocated mempool, on success. NULL on error
  *   with rte_errno set appropriately. Possible rte_errno values include:
@@ -536,14 +662,15 @@ struct rte_mempool *
 rte_mempool_create(const char *name, unsigned n, unsigned elt_size,
 		   unsigned cache_size, unsigned private_data_size,
 		   rte_mempool_ctor_t *mp_init, void *mp_init_arg,
-		   rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg,
+		   rte_mempool_obj_cb_t *obj_init, void *obj_init_arg,
 		   int socket_id, unsigned flags);
 
 /**
  * Create a new mempool named *name* in memory.
  *
- * This function uses ``memzone_reserve()`` to allocate memory. The
- * pool contains n elements of elt_size. Its size is set to n.
+ * The pool contains n elements of elt_size. Its size is set to n.
+ * This function uses ``memzone_reserve()`` to allocate the mempool header
+ * (and the objects if vaddr is NULL).
  * Depending on the input parameters, mempool elements can be either allocated
  * together with the mempool header, or an externally provided memory buffer
  * could be used to store mempool objects. In later case, that external
@@ -558,18 +685,7 @@ rte_mempool_create(const char *name, unsigned n, unsigned elt_size,
  * @param elt_size
  *   The size of each element.
  * @param cache_size
- *   If cache_size is non-zero, the rte_mempool library will try to
- *   limit the accesses to the common lockless pool, by maintaining a
- *   per-lcore object cache. This argument must be lower or equal to
- *   CONFIG_RTE_MEMPOOL_CACHE_MAX_SIZE. It is advised to choose
- *   cache_size to have "n modulo cache_size == 0": if this is
- *   not the case, some elements will always stay in the pool and will
- *   never be used. The access to the per-lcore table is of course
- *   faster than the multi-producer/consumer pool. The cache can be
- *   disabled if the cache_size argument is set to 0; it can be useful to
- *   avoid losing objects in cache. Note that even if not used, the
- *   memory space for cache is always reserved in a mempool structure,
- *   except if CONFIG_RTE_MEMPOOL_CACHE_MAX_SIZE is set to 0.
+ *   Size of the cache. See rte_mempool_create() for details.
  * @param private_data_size
  *   The size of the private data appended after the mempool
  *   structure. This is useful for storing some private data after the
@@ -583,35 +699,17 @@ rte_mempool_create(const char *name, unsigned n, unsigned elt_size,
  *   An opaque pointer to data that can be used in the mempool
  *   constructor function.
  * @param obj_init
- *   A function pointer that is called for each object at
- *   initialization of the pool. The user can set some meta data in
- *   objects if needed. This parameter can be NULL if not needed.
- *   The obj_init() function takes the mempool pointer, the init_arg,
- *   the object pointer and the object number as parameters.
+ *   A function called for each object at initialization of the pool.
+ *   See rte_mempool_create() for details.
  * @param obj_init_arg
- *   An opaque pointer to data that can be used as an argument for
- *   each call to the object constructor function.
+ *   An opaque pointer passed to the object constructor function.
  * @param socket_id
  *   The *socket_id* argument is the socket identifier in the case of
  *   NUMA. The value can be *SOCKET_ID_ANY* if there is no NUMA
  *   constraint for the reserved zone.
  * @param flags
- *   The *flags* arguments is an OR of following flags:
- *   - MEMPOOL_F_NO_SPREAD: By default, objects addresses are spread
- *     between channels in RAM: the pool allocator will add padding
- *     between objects depending on the hardware configuration. See
- *     Memory alignment constraints for details. If this flag is set,
- *     the allocator will just align them to a cache line.
- *   - MEMPOOL_F_NO_CACHE_ALIGN: By default, the returned objects are
- *     cache-aligned. This flag removes this constraint, and no
- *     padding will be present between objects. This flag implies
- *     MEMPOOL_F_NO_SPREAD.
- *   - MEMPOOL_F_SP_PUT: If this flag is set, the default behavior
- *     when using rte_mempool_put() or rte_mempool_put_bulk() is
- *     "single-producer". Otherwise, it is "multi-producers".
- *   - MEMPOOL_F_SC_GET: If this flag is set, the default behavior
- *     when using rte_mempool_get() or rte_mempool_get_bulk() is
- *     "single-consumer". Otherwise, it is "multi-consumers".
+ *   Flags controlling the behavior of the mempool. See
+ *   rte_mempool_create() for details.
  * @param vaddr
  *   Virtual address of the externally allocated memory buffer.
  *   Will be used to store mempool objects.
@@ -624,110 +722,219 @@ rte_mempool_create(const char *name, unsigned n, unsigned elt_size,
  *   LOG2 of the physical pages size.
  * @return
  *   The pointer to the new allocated mempool, on success. NULL on error
- *   with rte_errno set appropriately. Possible rte_errno values include:
- *    - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure
- *    - E_RTE_SECONDARY - function was called from a secondary process instance
- *    - EINVAL - cache size provided is too large
- *    - ENOSPC - the maximum number of memzones has already been allocated
- *    - EEXIST - a memzone with the same name already exists
- *    - ENOMEM - no appropriate memory area found in which to create memzone
+ *   with rte_errno set appropriately. See rte_mempool_create() for details.
  */
 struct rte_mempool *
 rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 		unsigned cache_size, unsigned private_data_size,
 		rte_mempool_ctor_t *mp_init, void *mp_init_arg,
-		rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg,
+		rte_mempool_obj_cb_t *obj_init, void *obj_init_arg,
 		int socket_id, unsigned flags, void *vaddr,
 		const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift);
 
 /**
- * Create a new mempool named *name* in memory on Xen Dom0.
+ * Create an empty mempool
  *
- * This function uses ``rte_mempool_xmem_create()`` to allocate memory. The
- * pool contains n elements of elt_size. Its size is set to n.
- * All elements of the mempool are allocated together with the mempool header,
- * and memory buffer can consist of set of disjoint physical pages.
+ * The mempool is allocated and initialized, but it is not populated: no
+ * memory is allocated for the mempool elements. The user has to call
+ * rte_mempool_populate_*() or to add memory chunks to the pool. Once
+ * populated, the user may also want to initialize each object with
+ * rte_mempool_obj_iter().
  *
  * @param name
  *   The name of the mempool.
  * @param n
- *   The number of elements in the mempool. The optimum size (in terms of
- *   memory usage) for a mempool is when n is a power of two minus one:
- *   n = (2^q - 1).
+ *   The maximum number of elements that can be added in the mempool.
+ *   The optimum size (in terms of memory usage) for a mempool is when n
+ *   is a power of two minus one: n = (2^q - 1).
  * @param elt_size
  *   The size of each element.
  * @param cache_size
- *   If cache_size is non-zero, the rte_mempool library will try to
- *   limit the accesses to the common lockless pool, by maintaining a
- *   per-lcore object cache. This argument must be lower or equal to
- *   CONFIG_RTE_MEMPOOL_CACHE_MAX_SIZE. It is advised to choose
- *   cache_size to have "n modulo cache_size == 0": if this is
- *   not the case, some elements will always stay in the pool and will
- *   never be used. The access to the per-lcore table is of course
- *   faster than the multi-producer/consumer pool. The cache can be
- *   disabled if the cache_size argument is set to 0; it can be useful to
- *   avoid losing objects in cache. Note that even if not used, the
- *   memory space for cache is always reserved in a mempool structure,
- *   except if CONFIG_RTE_MEMPOOL_CACHE_MAX_SIZE is set to 0.
+ *   Size of the cache. See rte_mempool_create() for details.
  * @param private_data_size
  *   The size of the private data appended after the mempool
  *   structure. This is useful for storing some private data after the
  *   mempool structure, as is done for rte_mbuf_pool for example.
- * @param mp_init
- *   A function pointer that is called for initialization of the pool,
- *   before object initialization. The user can initialize the private
- *   data in this function if needed. This parameter can be NULL if
- *   not needed.
- * @param mp_init_arg
- *   An opaque pointer to data that can be used in the mempool
- *   constructor function.
- * @param obj_init
- *   A function pointer that is called for each object at
- *   initialization of the pool. The user can set some meta data in
- *   objects if needed. This parameter can be NULL if not needed.
- *   The obj_init() function takes the mempool pointer, the init_arg,
- *   the object pointer and the object number as parameters.
- * @param obj_init_arg
- *   An opaque pointer to data that can be used as an argument for
- *   each call to the object constructor function.
  * @param socket_id
  *   The *socket_id* argument is the socket identifier in the case of
  *   NUMA. The value can be *SOCKET_ID_ANY* if there is no NUMA
  *   constraint for the reserved zone.
  * @param flags
- *   The *flags* arguments is an OR of following flags:
- *   - MEMPOOL_F_NO_SPREAD: By default, objects addresses are spread
- *     between channels in RAM: the pool allocator will add padding
- *     between objects depending on the hardware configuration. See
- *     Memory alignment constraints for details. If this flag is set,
- *     the allocator will just align them to a cache line.
- *   - MEMPOOL_F_NO_CACHE_ALIGN: By default, the returned objects are
- *     cache-aligned. This flag removes this constraint, and no
- *     padding will be present between objects. This flag implies
- *     MEMPOOL_F_NO_SPREAD.
- *   - MEMPOOL_F_SP_PUT: If this flag is set, the default behavior
- *     when using rte_mempool_put() or rte_mempool_put_bulk() is
- *     "single-producer". Otherwise, it is "multi-producers".
- *   - MEMPOOL_F_SC_GET: If this flag is set, the default behavior
- *     when using rte_mempool_get() or rte_mempool_get_bulk() is
- *     "single-consumer". Otherwise, it is "multi-consumers".
+ *   Flags controlling the behavior of the mempool. See
+ *   rte_mempool_create() for details.
  * @return
  *   The pointer to the new allocated mempool, on success. NULL on error
- *   with rte_errno set appropriately. Possible rte_errno values include:
- *    - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure
- *    - E_RTE_SECONDARY - function was called from a secondary process instance
- *    - EINVAL - cache size provided is too large
- *    - ENOSPC - the maximum number of memzones has already been allocated
- *    - EEXIST - a memzone with the same name already exists
- *    - ENOMEM - no appropriate memory area found in which to create memzone
+ *   with rte_errno set appropriately. See rte_mempool_create() for details.
  */
 struct rte_mempool *
-rte_dom0_mempool_create(const char *name, unsigned n, unsigned elt_size,
-		unsigned cache_size, unsigned private_data_size,
-		rte_mempool_ctor_t *mp_init, void *mp_init_arg,
-		rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg,
-		int socket_id, unsigned flags);
+rte_mempool_create_empty(const char *name, unsigned n, unsigned elt_size,
+	unsigned cache_size, unsigned private_data_size,
+	int socket_id, unsigned flags);
+/**
+ * Free a mempool
+ *
+ * Unlink the mempool from global list, free the memory chunks, and all
+ * memory referenced by the mempool. The objects must not be used by
+ * other cores as they will be freed.
+ *
+ * @param mp
+ *   A pointer to the mempool structure.
+ */
+void
+rte_mempool_free(struct rte_mempool *mp);
+
+/**
+ * Add physically contiguous memory for objects in the pool at init
+ *
+ * Add a virtually and physically contiguous memory chunk in the pool
+ * where objects can be instanciated.
+ *
+ * @param mp
+ *   A pointer to the mempool structure.
+ * @param vaddr
+ *   The virtual address of memory that should be used to store objects.
+ * @param paddr
+ *   The physical address
+ * @param len
+ *   The length of memory in bytes.
+ * @param free_cb
+ *   The callback used to free this chunk when destroying the mempool.
+ * @param opaque
+ *   An opaque argument passed to free_cb.
+ * @return
+ *   The number of objects added on success.
+ *   On error, the chunk is not added in the memory list of the
+ *   mempool and a negative errno is returned.
+ */
+int rte_mempool_populate_phys(struct rte_mempool *mp, char *vaddr,
+	phys_addr_t paddr, size_t len, rte_mempool_memchunk_free_cb_t *free_cb,
+	void *opaque);
 
+/**
+ * Add physical memory for objects in the pool at init
+ *
+ * Add a virtually contiguous memory chunk in the pool where objects can
+ * be instanciated. The physical addresses corresponding to the virtual
+ * area are described in paddr[], pg_num, pg_shift.
+ *
+ * @param mp
+ *   A pointer to the mempool structure.
+ * @param vaddr
+ *   The virtual address of memory that should be used to store objects.
+ * @param paddr
+ *   An array of physical addresses of each page composing the virtual
+ *   area.
+ * @param pg_num
+ *   Number of elements in the paddr array.
+ * @param pg_shift
+ *   LOG2 of the physical pages size.
+ * @param free_cb
+ *   The callback used to free this chunk when destroying the mempool.
+ * @param opaque
+ *   An opaque argument passed to free_cb.
+ * @return
+ *   The number of objects added on success.
+ *   On error, the chunks are not added in the memory list of the
+ *   mempool and a negative errno is returned.
+ */
+int rte_mempool_populate_phys_tab(struct rte_mempool *mp, char *vaddr,
+	const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift,
+	rte_mempool_memchunk_free_cb_t *free_cb, void *opaque);
+
+/**
+ * Add virtually contiguous memory for objects in the pool at init
+ *
+ * Add a virtually contiguous memory chunk in the pool where objects can
+ * be instanciated.
+ *
+ * @param mp
+ *   A pointer to the mempool structure.
+ * @param addr
+ *   The virtual address of memory that should be used to store objects.
+ *   Must be page-aligned.
+ * @param len
+ *   The length of memory in bytes. Must be page-aligned.
+ * @param pg_sz
+ *   The size of memory pages in this virtual area.
+ * @param free_cb
+ *   The callback used to free this chunk when destroying the mempool.
+ * @param opaque
+ *   An opaque argument passed to free_cb.
+ * @return
+ *   The number of objects added on success.
+ *   On error, the chunk is not added in the memory list of the
+ *   mempool and a negative errno is returned.
+ */
+int
+rte_mempool_populate_virt(struct rte_mempool *mp, char *addr,
+	size_t len, size_t pg_sz, rte_mempool_memchunk_free_cb_t *free_cb,
+	void *opaque);
+
+/**
+ * Add memory for objects in the pool at init
+ *
+ * This is the default function used by rte_mempool_create() to populate
+ * the mempool. It adds memory allocated using rte_memzone_reserve().
+ *
+ * @param mp
+ *   A pointer to the mempool structure.
+ * @return
+ *   The number of objects added on success.
+ *   On error, the chunk is not added in the memory list of the
+ *   mempool and a negative errno is returned.
+ */
+int rte_mempool_populate_default(struct rte_mempool *mp);
+
+/**
+ * Add memory from anonymous mapping for objects in the pool at init
+ *
+ * This function mmap an anonymous memory zone that is locked in
+ * memory to store the objects of the mempool.
+ *
+ * @param mp
+ *   A pointer to the mempool structure.
+ * @return
+ *   The number of objects added on success.
+ *   On error, the chunk is not added in the memory list of the
+ *   mempool and a negative errno is returned.
+ */
+int rte_mempool_populate_anon(struct rte_mempool *mp);
+
+/**
+ * Call a function for each mempool element
+ *
+ * Iterate across all objects attached to a rte_mempool and call the
+ * callback function on it.
+ *
+ * @param mp
+ *   A pointer to an initialized mempool.
+ * @param obj_cb
+ *   A function pointer that is called for each object.
+ * @param obj_cb_arg
+ *   An opaque pointer passed to the callback function.
+ * @return
+ *   Number of objects iterated.
+ */
+uint32_t rte_mempool_obj_iter(struct rte_mempool *mp,
+	rte_mempool_obj_cb_t *obj_cb, void *obj_cb_arg);
+
+/**
+ * Call a function for each mempool memory chunk
+ *
+ * Iterate across all memory chunks attached to a rte_mempool and call
+ * the callback function on it.
+ *
+ * @param mp
+ *   A pointer to an initialized mempool.
+ * @param mem_cb
+ *   A function pointer that is called for each memory chunk.
+ * @param mem_cb_arg
+ *   An opaque pointer passed to the callback function.
+ * @return
+ *   Number of memory chunks iterated.
+ */
+uint32_t rte_mempool_mem_iter(struct rte_mempool *mp,
+	rte_mempool_mem_cb_t *mem_cb, void *mem_cb_arg);
 
 /**
  * Dump the status of the mempool to the console.
@@ -737,7 +944,71 @@ rte_dom0_mempool_create(const char *name, unsigned n, unsigned elt_size,
  * @param mp
  *   A pointer to the mempool structure.
  */
-void rte_mempool_dump(FILE *f, const struct rte_mempool *mp);
+void rte_mempool_dump(FILE *f, struct rte_mempool *mp);
+
+/**
+ * Create a user-owned mempool cache.
+ *
+ * This can be used by non-EAL threads to enable caching when they
+ * interact with a mempool.
+ *
+ * @param size
+ *   The size of the mempool cache. See rte_mempool_create()'s cache_size
+ *   parameter description for more information. The same limits and
+ *   considerations apply here too.
+ * @param socket_id
+ *   The socket identifier in the case of NUMA. The value can be
+ *   SOCKET_ID_ANY if there is no NUMA constraint for the reserved zone.
+ */
+struct rte_mempool_cache *
+rte_mempool_cache_create(uint32_t size, int socket_id);
+
+/**
+ * Free a user-owned mempool cache.
+ *
+ * @param cache
+ *   A pointer to the mempool cache.
+ */
+void
+rte_mempool_cache_free(struct rte_mempool_cache *cache);
+
+/**
+ * Flush a user-owned mempool cache to the specified mempool.
+ *
+ * @param cache
+ *   A pointer to the mempool cache.
+ * @param mp
+ *   A pointer to the mempool.
+ */
+static inline void __attribute__((always_inline))
+rte_mempool_cache_flush(struct rte_mempool_cache *cache,
+			struct rte_mempool *mp)
+{
+	rte_mempool_ops_enqueue_bulk(mp, cache->objs, cache->len);
+	cache->len = 0;
+}
+
+/**
+ * Get a pointer to the per-lcore default mempool cache.
+ *
+ * @param mp
+ *   A pointer to the mempool structure.
+ * @param lcore_id
+ *   The logical core id.
+ * @return
+ *   A pointer to the mempool cache or NULL if disabled or non-EAL thread.
+ */
+static inline struct rte_mempool_cache *__attribute__((always_inline))
+rte_mempool_default_cache(struct rte_mempool *mp, unsigned lcore_id)
+{
+	if (mp->cache_size == 0)
+		return NULL;
+
+	if (lcore_id >= RTE_MAX_LCORE)
+		return NULL;
+
+	return &mp->local_cache[lcore_id];
+}
 
 /**
  * @internal Put several objects back in the mempool; used internally.
@@ -748,36 +1019,29 @@ void rte_mempool_dump(FILE *f, const struct rte_mempool *mp);
  * @param n
  *   The number of objects to store back in the mempool, must be strictly
  *   positive.
- * @param is_mp
- *   Mono-producer (0) or multi-producers (1).
+ * @param cache
+ *   A pointer to a mempool cache structure. May be NULL if not needed.
+ * @param flags
+ *   The flags used for the mempool creation.
+ *   Single-producer (MEMPOOL_F_SP_PUT flag) or multi-producers.
  */
 static inline void __attribute__((always_inline))
-__mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
-		    unsigned n, int is_mp)
+__mempool_generic_put(struct rte_mempool *mp, void * const *obj_table,
+		      unsigned n, struct rte_mempool_cache *cache, int flags)
 {
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
-	struct rte_mempool_cache *cache;
-	uint32_t index;
 	void **cache_objs;
-	unsigned lcore_id = rte_lcore_id();
-	uint32_t cache_size = mp->cache_size;
-	uint32_t flushthresh = mp->cache_flushthresh;
-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
 
 	/* increment stat now, adding in mempool always success */
 	__MEMPOOL_STAT_ADD(mp, put, n);
 
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
-	/* cache is not enabled or single producer or non-EAL thread */
-	if (unlikely(cache_size == 0 || is_mp == 0 ||
-		     lcore_id >= RTE_MAX_LCORE))
+	/* No cache provided or single producer */
+	if (unlikely(cache == NULL || flags & MEMPOOL_F_SP_PUT))
 		goto ring_enqueue;
 
 	/* Go straight to ring if put would overflow mem allocated for cache */
 	if (unlikely(n > RTE_MEMPOOL_CACHE_MAX_SIZE))
 		goto ring_enqueue;
 
-	cache = &mp->local_cache[lcore_id];
 	cache_objs = &cache->objs[cache->len];
 
 	/*
@@ -788,42 +1052,55 @@ __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
 	 */
 
 	/* Add elements back into the cache */
-	for (index = 0; index < n; ++index, obj_table++)
-		cache_objs[index] = *obj_table;
+	rte_memcpy(&cache_objs[0], obj_table, sizeof(void *) * n);
 
 	cache->len += n;
 
-	if (cache->len >= flushthresh) {
-		rte_ring_mp_enqueue_bulk(mp->ring, &cache->objs[cache_size],
-				cache->len - cache_size);
-		cache->len = cache_size;
+	if (cache->len >= cache->flushthresh) {
+		rte_mempool_ops_enqueue_bulk(mp, &cache->objs[cache->size],
+				cache->len - cache->size);
+		cache->len = cache->size;
 	}
 
 	return;
 
 ring_enqueue:
-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
 
 	/* push remaining objects in ring */
 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
-	if (is_mp) {
-		if (rte_ring_mp_enqueue_bulk(mp->ring, obj_table, n) < 0)
-			rte_panic("cannot put objects in mempool\n");
-	}
-	else {
-		if (rte_ring_sp_enqueue_bulk(mp->ring, obj_table, n) < 0)
-			rte_panic("cannot put objects in mempool\n");
-	}
+	if (rte_mempool_ops_enqueue_bulk(mp, obj_table, n) < 0)
+		rte_panic("cannot put objects in mempool\n");
 #else
-	if (is_mp)
-		rte_ring_mp_enqueue_bulk(mp->ring, obj_table, n);
-	else
-		rte_ring_sp_enqueue_bulk(mp->ring, obj_table, n);
+	rte_mempool_ops_enqueue_bulk(mp, obj_table, n);
 #endif
 }
 
 
 /**
+ * Put several objects back in the mempool.
+ *
+ * @param mp
+ *   A pointer to the mempool structure.
+ * @param obj_table
+ *   A pointer to a table of void * pointers (objects).
+ * @param n
+ *   The number of objects to add in the mempool from the obj_table.
+ * @param cache
+ *   A pointer to a mempool cache structure. May be NULL if not needed.
+ * @param flags
+ *   The flags used for the mempool creation.
+ *   Single-producer (MEMPOOL_F_SP_PUT flag) or multi-producers.
+ */
+static inline void __attribute__((always_inline))
+rte_mempool_generic_put(struct rte_mempool *mp, void * const *obj_table,
+			unsigned n, struct rte_mempool_cache *cache, int flags)
+{
+	__mempool_check_cookies(mp, obj_table, n, 0);
+	__mempool_generic_put(mp, obj_table, n, cache, flags);
+}
+
+/**
+ * @deprecated
  * Put several objects back in the mempool (multi-producers safe).
  *
  * @param mp
@@ -833,15 +1110,18 @@ ring_enqueue:
  * @param n
  *   The number of objects to add in the mempool from the obj_table.
  */
+__rte_deprecated
 static inline void __attribute__((always_inline))
 rte_mempool_mp_put_bulk(struct rte_mempool *mp, void * const *obj_table,
 			unsigned n)
 {
-	__mempool_check_cookies(mp, obj_table, n, 0);
-	__mempool_put_bulk(mp, obj_table, n, 1);
+	struct rte_mempool_cache *cache;
+	cache = rte_mempool_default_cache(mp, rte_lcore_id());
+	rte_mempool_generic_put(mp, obj_table, n, cache, 0);
 }
 
 /**
+ * @deprecated
  * Put several objects back in the mempool (NOT multi-producers safe).
  *
  * @param mp
@@ -851,12 +1131,12 @@ rte_mempool_mp_put_bulk(struct rte_mempool *mp, void * const *obj_table,
  * @param n
  *   The number of objects to add in the mempool from obj_table.
  */
-static inline void
+__rte_deprecated
+static inline void __attribute__((always_inline))
 rte_mempool_sp_put_bulk(struct rte_mempool *mp, void * const *obj_table,
 			unsigned n)
 {
-	__mempool_check_cookies(mp, obj_table, n, 0);
-	__mempool_put_bulk(mp, obj_table, n, 0);
+	rte_mempool_generic_put(mp, obj_table, n, NULL, MEMPOOL_F_SP_PUT);
 }
 
 /**
@@ -877,11 +1157,13 @@ static inline void __attribute__((always_inline))
 rte_mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
 		     unsigned n)
 {
-	__mempool_check_cookies(mp, obj_table, n, 0);
-	__mempool_put_bulk(mp, obj_table, n, !(mp->flags & MEMPOOL_F_SP_PUT));
+	struct rte_mempool_cache *cache;
+	cache = rte_mempool_default_cache(mp, rte_lcore_id());
+	rte_mempool_generic_put(mp, obj_table, n, cache, mp->flags);
 }
 
 /**
+ * @deprecated
  * Put one object in the mempool (multi-producers safe).
  *
  * @param mp
@@ -889,13 +1171,17 @@ rte_mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
  * @param obj
  *   A pointer to the object to be added.
  */
+__rte_deprecated
 static inline void __attribute__((always_inline))
 rte_mempool_mp_put(struct rte_mempool *mp, void *obj)
 {
-	rte_mempool_mp_put_bulk(mp, &obj, 1);
+	struct rte_mempool_cache *cache;
+	cache = rte_mempool_default_cache(mp, rte_lcore_id());
+	rte_mempool_generic_put(mp, &obj, 1, cache, 0);
 }
 
 /**
+ * @deprecated
  * Put one object back in the mempool (NOT multi-producers safe).
  *
  * @param mp
@@ -903,10 +1189,11 @@ rte_mempool_mp_put(struct rte_mempool *mp, void *obj)
  * @param obj
  *   A pointer to the object to be added.
  */
+__rte_deprecated
 static inline void __attribute__((always_inline))
 rte_mempool_sp_put(struct rte_mempool *mp, void *obj)
 {
-	rte_mempool_sp_put_bulk(mp, &obj, 1);
+	rte_mempool_generic_put(mp, &obj, 1, NULL, MEMPOOL_F_SP_PUT);
 }
 
 /**
@@ -935,39 +1222,38 @@ rte_mempool_put(struct rte_mempool *mp, void *obj)
  *   A pointer to a table of void * pointers (objects).
  * @param n
  *   The number of objects to get, must be strictly positive.
- * @param is_mc
- *   Mono-consumer (0) or multi-consumers (1).
+ * @param cache
+ *   A pointer to a mempool cache structure. May be NULL if not needed.
+ * @param flags
+ *   The flags used for the mempool creation.
+ *   Single-consumer (MEMPOOL_F_SC_GET flag) or multi-consumers.
  * @return
  *   - >=0: Success; number of objects supplied.
  *   - <0: Error; code of ring dequeue function.
  */
 static inline int __attribute__((always_inline))
-__mempool_get_bulk(struct rte_mempool *mp, void **obj_table,
-		   unsigned n, int is_mc)
+__mempool_generic_get(struct rte_mempool *mp, void **obj_table,
+		      unsigned n, struct rte_mempool_cache *cache, int flags)
 {
 	int ret;
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
-	struct rte_mempool_cache *cache;
 	uint32_t index, len;
 	void **cache_objs;
-	unsigned lcore_id = rte_lcore_id();
-	uint32_t cache_size = mp->cache_size;
 
-	/* cache is not enabled or single consumer */
-	if (unlikely(cache_size == 0 || is_mc == 0 ||
-		     n >= cache_size || lcore_id >= RTE_MAX_LCORE))
+	/* No cache provided or single consumer */
+	if (unlikely(cache == NULL || flags & MEMPOOL_F_SC_GET ||
+		     n >= cache->size))
 		goto ring_dequeue;
 
-	cache = &mp->local_cache[lcore_id];
 	cache_objs = cache->objs;
 
 	/* Can this be satisfied from the cache? */
 	if (cache->len < n) {
 		/* No. Backfill the cache first, and then fill from it */
-		uint32_t req = n + (cache_size - cache->len);
+		uint32_t req = n + (cache->size - cache->len);
 
 		/* How many do we require i.e. number to fill the cache + the request */
-		ret = rte_ring_mc_dequeue_bulk(mp->ring, &cache->objs[cache->len], req);
+		ret = rte_mempool_ops_dequeue_bulk(mp,
+			&cache->objs[cache->len], req);
 		if (unlikely(ret < 0)) {
 			/*
 			 * In the offchance that we are buffer constrained,
@@ -992,13 +1278,9 @@ __mempool_get_bulk(struct rte_mempool *mp, void **obj_table,
 	return 0;
 
 ring_dequeue:
-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
 
 	/* get remaining objects from ring */
-	if (is_mc)
-		ret = rte_ring_mc_dequeue_bulk(mp->ring, obj_table, n);
-	else
-		ret = rte_ring_sc_dequeue_bulk(mp->ring, obj_table, n);
+	ret = rte_mempool_ops_dequeue_bulk(mp, obj_table, n);
 
 	if (ret < 0)
 		__MEMPOOL_STAT_ADD(mp, get_fail, n);
@@ -1009,7 +1291,7 @@ ring_dequeue:
 }
 
 /**
- * Get several objects from the mempool (multi-consumers safe).
+ * Get several objects from the mempool.
  *
  * If cache is enabled, objects will be retrieved first from cache,
  * subsequently from the common pool. Note that it can return -ENOENT when
@@ -1022,21 +1304,56 @@ ring_dequeue:
  *   A pointer to a table of void * pointers (objects) that will be filled.
  * @param n
  *   The number of objects to get from mempool to obj_table.
+ * @param cache
+ *   A pointer to a mempool cache structure. May be NULL if not needed.
+ * @param flags
+ *   The flags used for the mempool creation.
+ *   Single-consumer (MEMPOOL_F_SC_GET flag) or multi-consumers.
  * @return
  *   - 0: Success; objects taken.
  *   - -ENOENT: Not enough entries in the mempool; no object is retrieved.
  */
 static inline int __attribute__((always_inline))
-rte_mempool_mc_get_bulk(struct rte_mempool *mp, void **obj_table, unsigned n)
+rte_mempool_generic_get(struct rte_mempool *mp, void **obj_table, unsigned n,
+			struct rte_mempool_cache *cache, int flags)
 {
 	int ret;
-	ret = __mempool_get_bulk(mp, obj_table, n, 1);
+	ret = __mempool_generic_get(mp, obj_table, n, cache, flags);
 	if (ret == 0)
 		__mempool_check_cookies(mp, obj_table, n, 1);
 	return ret;
 }
 
 /**
+ * @deprecated
+ * Get several objects from the mempool (multi-consumers safe).
+ *
+ * If cache is enabled, objects will be retrieved first from cache,
+ * subsequently from the common pool. Note that it can return -ENOENT when
+ * the local cache and common pool are empty, even if cache from other
+ * lcores are full.
+ *
+ * @param mp
+ *   A pointer to the mempool structure.
+ * @param obj_table
+ *   A pointer to a table of void * pointers (objects) that will be filled.
+ * @param n
+ *   The number of objects to get from mempool to obj_table.
+ * @return
+ *   - 0: Success; objects taken.
+ *   - -ENOENT: Not enough entries in the mempool; no object is retrieved.
+ */
+__rte_deprecated
+static inline int __attribute__((always_inline))
+rte_mempool_mc_get_bulk(struct rte_mempool *mp, void **obj_table, unsigned n)
+{
+	struct rte_mempool_cache *cache;
+	cache = rte_mempool_default_cache(mp, rte_lcore_id());
+	return rte_mempool_generic_get(mp, obj_table, n, cache, 0);
+}
+
+/**
+ * @deprecated
  * Get several objects from the mempool (NOT multi-consumers safe).
  *
  * If cache is enabled, objects will be retrieved first from cache,
@@ -1055,14 +1372,12 @@ rte_mempool_mc_get_bulk(struct rte_mempool *mp, void **obj_table, unsigned n)
  *   - -ENOENT: Not enough entries in the mempool; no object is
  *     retrieved.
  */
+__rte_deprecated
 static inline int __attribute__((always_inline))
 rte_mempool_sc_get_bulk(struct rte_mempool *mp, void **obj_table, unsigned n)
 {
-	int ret;
-	ret = __mempool_get_bulk(mp, obj_table, n, 0);
-	if (ret == 0)
-		__mempool_check_cookies(mp, obj_table, n, 1);
-	return ret;
+	return rte_mempool_generic_get(mp, obj_table, n, NULL,
+				       MEMPOOL_F_SC_GET);
 }
 
 /**
@@ -1090,15 +1405,13 @@ rte_mempool_sc_get_bulk(struct rte_mempool *mp, void **obj_table, unsigned n)
 static inline int __attribute__((always_inline))
 rte_mempool_get_bulk(struct rte_mempool *mp, void **obj_table, unsigned n)
 {
-	int ret;
-	ret = __mempool_get_bulk(mp, obj_table, n,
-				 !(mp->flags & MEMPOOL_F_SC_GET));
-	if (ret == 0)
-		__mempool_check_cookies(mp, obj_table, n, 1);
-	return ret;
+	struct rte_mempool_cache *cache;
+	cache = rte_mempool_default_cache(mp, rte_lcore_id());
+	return rte_mempool_generic_get(mp, obj_table, n, cache, mp->flags);
 }
 
 /**
+ * @deprecated
  * Get one object from the mempool (multi-consumers safe).
  *
  * If cache is enabled, objects will be retrieved first from cache,
@@ -1114,13 +1427,17 @@ rte_mempool_get_bulk(struct rte_mempool *mp, void **obj_table, unsigned n)
  *   - 0: Success; objects taken.
  *   - -ENOENT: Not enough entries in the mempool; no object is retrieved.
  */
+__rte_deprecated
 static inline int __attribute__((always_inline))
 rte_mempool_mc_get(struct rte_mempool *mp, void **obj_p)
 {
-	return rte_mempool_mc_get_bulk(mp, obj_p, 1);
+	struct rte_mempool_cache *cache;
+	cache = rte_mempool_default_cache(mp, rte_lcore_id());
+	return rte_mempool_generic_get(mp, obj_p, 1, cache, 0);
 }
 
 /**
+ * @deprecated
  * Get one object from the mempool (NOT multi-consumers safe).
  *
  * If cache is enabled, objects will be retrieved first from cache,
@@ -1136,10 +1453,11 @@ rte_mempool_mc_get(struct rte_mempool *mp, void **obj_p)
  *   - 0: Success; objects taken.
  *   - -ENOENT: Not enough entries in the mempool; no object is retrieved.
  */
+__rte_deprecated
 static inline int __attribute__((always_inline))
 rte_mempool_sc_get(struct rte_mempool *mp, void **obj_p)
 {
-	return rte_mempool_sc_get_bulk(mp, obj_p, 1);
+	return rte_mempool_generic_get(mp, obj_p, 1, NULL, MEMPOOL_F_SC_GET);
 }
 
 /**
@@ -1173,6 +1491,21 @@ rte_mempool_get(struct rte_mempool *mp, void **obj_p)
  *
  * When cache is enabled, this function has to browse the length of
  * all lcores, so it should not be used in a data path, but only for
+ * debug purposes. User-owned mempool caches are not accounted for.
+ *
+ * @param mp
+ *   A pointer to the mempool structure.
+ * @return
+ *   The number of entries in the mempool.
+ */
+unsigned int rte_mempool_avail_count(const struct rte_mempool *mp);
+
+/**
+ * @deprecated
+ * Return the number of entries in the mempool.
+ *
+ * When cache is enabled, this function has to browse the length of
+ * all lcores, so it should not be used in a data path, but only for
  * debug purposes.
  *
  * @param mp
@@ -1180,9 +1513,26 @@ rte_mempool_get(struct rte_mempool *mp, void **obj_p)
  * @return
  *   The number of entries in the mempool.
  */
+__rte_deprecated
 unsigned rte_mempool_count(const struct rte_mempool *mp);
 
 /**
+ * Return the number of elements which have been allocated from the mempool
+ *
+ * When cache is enabled, this function has to browse the length of
+ * all lcores, so it should not be used in a data path, but only for
+ * debug purposes.
+ *
+ * @param mp
+ *   A pointer to the mempool structure.
+ * @return
+ *   The number of free entries in the mempool.
+ */
+unsigned int
+rte_mempool_in_use_count(const struct rte_mempool *mp);
+
+/**
+ * @deprecated
  * Return the number of free entries in the mempool ring.
  * i.e. how many entries can be freed back to the mempool.
  *
@@ -1192,17 +1542,18 @@ unsigned rte_mempool_count(const struct rte_mempool *mp);
  *
  * When cache is enabled, this function has to browse the length of
  * all lcores, so it should not be used in a data path, but only for
- * debug purposes.
+ * debug purposes. User-owned mempool caches are not accounted for.
  *
  * @param mp
  *   A pointer to the mempool structure.
  * @return
  *   The number of free entries in the mempool.
  */
+__rte_deprecated
 static inline unsigned
 rte_mempool_free_count(const struct rte_mempool *mp)
 {
-	return mp->size - rte_mempool_count(mp);
+	return rte_mempool_in_use_count(mp);
 }
 
 /**
@@ -1210,7 +1561,7 @@ rte_mempool_free_count(const struct rte_mempool *mp)
  *
  * When cache is enabled, this function has to browse the length of all
  * lcores, so it should not be used in a data path, but only for debug
- * purposes.
+ * purposes. User-owned mempool caches are not accounted for.
  *
  * @param mp
  *   A pointer to the mempool structure.
@@ -1221,7 +1572,7 @@ rte_mempool_free_count(const struct rte_mempool *mp)
 static inline int
 rte_mempool_full(const struct rte_mempool *mp)
 {
-	return !!(rte_mempool_count(mp) == mp->size);
+	return !!(rte_mempool_avail_count(mp) == mp->size);
 }
 
 /**
@@ -1229,7 +1580,7 @@ rte_mempool_full(const struct rte_mempool *mp)
  *
  * When cache is enabled, this function has to browse the length of all
  * lcores, so it should not be used in a data path, but only for debug
- * purposes.
+ * purposes. User-owned mempool caches are not accounted for.
  *
  * @param mp
  *   A pointer to the mempool structure.
@@ -1240,7 +1591,7 @@ rte_mempool_full(const struct rte_mempool *mp)
 static inline int
 rte_mempool_empty(const struct rte_mempool *mp)
 {
-	return !!(rte_mempool_count(mp) == 0);
+	return !!(rte_mempool_avail_count(mp) == 0);
 }
 
 /**
@@ -1252,23 +1603,16 @@ rte_mempool_empty(const struct rte_mempool *mp)
  *   A pointer (virtual address) to the element of the pool.
  * @return
  *   The physical address of the elt element.
+ *   If the mempool was created with MEMPOOL_F_NO_PHYS_CONTIG, the
+ *   returned value is RTE_BAD_PHYS_ADDR.
  */
 static inline phys_addr_t
-rte_mempool_virt2phy(const struct rte_mempool *mp, const void *elt)
+rte_mempool_virt2phy(__rte_unused const struct rte_mempool *mp, const void *elt)
 {
-	if (rte_eal_has_hugepages()) {
-		uintptr_t off;
-
-		off = (const char *)elt - (const char *)mp->elt_va_start;
-		return mp->elt_pa[off >> mp->pg_shift] + (off & mp->pg_mask);
-	} else {
-		/*
-		 * If huge pages are disabled, we cannot assume the
-		 * memory region to be physically contiguous.
-		 * Lookup for each element.
-		 */
-		return rte_mem_virt2phy(elt);
-	}
+	const struct rte_mempool_objhdr *hdr;
+	hdr = (const struct rte_mempool_objhdr *)RTE_PTR_SUB(elt,
+		sizeof(*hdr));
+	return hdr->physaddr;
 }
 
 /**
@@ -1281,7 +1625,7 @@ rte_mempool_virt2phy(const struct rte_mempool *mp, const void *elt)
  * @param mp
  *   A pointer to the mempool structure.
  */
-void rte_mempool_audit(const struct rte_mempool *mp);
+void rte_mempool_audit(struct rte_mempool *mp);
 
 /**
  * Return a pointer to the private data in an mempool structure.
@@ -1293,7 +1637,8 @@ void rte_mempool_audit(const struct rte_mempool *mp);
  */
 static inline void *rte_mempool_get_priv(struct rte_mempool *mp)
 {
-	return (char *)mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num);
+	return (char *)mp +
+		MEMPOOL_HEADER_SIZE(mp, mp->cache_size);
 }
 
 /**
@@ -1325,7 +1670,7 @@ struct rte_mempool *rte_mempool_lookup(const char *name);
  * calculates header, trailer, body and total sizes of the mempool object.
  *
  * @param elt_size
- *   The size of each element.
+ *   The size of each element, without header and trailer.
  * @param flags
  *   The flags used for the mempool creation.
  *   Consult rte_mempool_create() for more information about possible values.
@@ -1351,14 +1696,15 @@ uint32_t rte_mempool_calc_obj_size(uint32_t elt_size, uint32_t flags,
  *
  * @param elt_num
  *   Number of elements.
- * @param elt_sz
- *   The size of each element.
+ * @param total_elt_sz
+ *   The size of each element, including header and trailer, as returned
+ *   by rte_mempool_calc_obj_size().
  * @param pg_shift
- *   LOG2 of the physical pages size.
+ *   LOG2 of the physical pages size. If set to 0, ignore page boundaries.
  * @return
  *   Required memory size aligned at page boundary.
  */
-size_t rte_mempool_xmem_size(uint32_t elt_num, size_t elt_sz,
+size_t rte_mempool_xmem_size(uint32_t elt_num, size_t total_elt_sz,
 	uint32_t pg_shift);
 
 /**
@@ -1372,8 +1718,9 @@ size_t rte_mempool_xmem_size(uint32_t elt_num, size_t elt_sz,
  *   Will be used to store mempool objects.
  * @param elt_num
  *   Number of elements.
- * @param elt_sz
- *   The size of each element.
+ * @param total_elt_sz
+ *   The size of each element, including header and trailer, as returned
+ *   by rte_mempool_calc_obj_size().
  * @param paddr
  *   Array of physical addresses of the pages that comprises given memory
  *   buffer.
@@ -1387,8 +1734,9 @@ size_t rte_mempool_xmem_size(uint32_t elt_num, size_t elt_sz,
  *   buffer is too small, return a negative value whose absolute value
  *   is the actual number of elements that can be stored in that buffer.
  */
-ssize_t rte_mempool_xmem_usage(void *vaddr, uint32_t elt_num, size_t elt_sz,
-	const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift);
+ssize_t rte_mempool_xmem_usage(void *vaddr, uint32_t elt_num,
+	size_t total_elt_sz, const phys_addr_t paddr[], uint32_t pg_num,
+	uint32_t pg_shift);
 
 /**
  * Walk list of all memory pools
@@ -1398,7 +1746,7 @@ ssize_t rte_mempool_xmem_usage(void *vaddr, uint32_t elt_num, size_t elt_sz,
  * @param arg
  *   Argument passed to iterator
  */
-void rte_mempool_walk(void (*func)(const struct rte_mempool *, void *arg),
+void rte_mempool_walk(void (*func)(struct rte_mempool *, void *arg),
 		      void *arg);
 
 #ifdef __cplusplus
diff --git a/lib/librte_mempool/rte_mempool_ops.c b/lib/librte_mempool/rte_mempool_ops.c
new file mode 100644
index 00000000..fd0b64cf
--- /dev/null
+++ b/lib/librte_mempool/rte_mempool_ops.c
@@ -0,0 +1,151 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2016 6WIND S.A.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include <rte_mempool.h>
+#include <rte_errno.h>
+
+/* indirect jump table to support external memory pools. */
+struct rte_mempool_ops_table rte_mempool_ops_table = {
+	.sl =  RTE_SPINLOCK_INITIALIZER,
+	.num_ops = 0
+};
+
+/* add a new ops struct in rte_mempool_ops_table, return its index. */
+int
+rte_mempool_register_ops(const struct rte_mempool_ops *h)
+{
+	struct rte_mempool_ops *ops;
+	int16_t ops_index;
+
+	rte_spinlock_lock(&rte_mempool_ops_table.sl);
+
+	if (rte_mempool_ops_table.num_ops >=
+			RTE_MEMPOOL_MAX_OPS_IDX) {
+		rte_spinlock_unlock(&rte_mempool_ops_table.sl);
+		RTE_LOG(ERR, MEMPOOL,
+			"Maximum number of mempool ops structs exceeded\n");
+		return -ENOSPC;
+	}
+
+	if (h->alloc == NULL || h->enqueue == NULL ||
+			h->dequeue == NULL || h->get_count == NULL) {
+		rte_spinlock_unlock(&rte_mempool_ops_table.sl);
+		RTE_LOG(ERR, MEMPOOL,
+			"Missing callback while registering mempool ops\n");
+		return -EINVAL;
+	}
+
+	if (strlen(h->name) >= sizeof(ops->name) - 1) {
+		rte_spinlock_unlock(&rte_mempool_ops_table.sl);
+		RTE_LOG(DEBUG, EAL, "%s(): mempool_ops <%s>: name too long\n",
+				__func__, h->name);
+		rte_errno = EEXIST;
+		return -EEXIST;
+	}
+
+	ops_index = rte_mempool_ops_table.num_ops++;
+	ops = &rte_mempool_ops_table.ops[ops_index];
+	snprintf(ops->name, sizeof(ops->name), "%s", h->name);
+	ops->alloc = h->alloc;
+	ops->enqueue = h->enqueue;
+	ops->dequeue = h->dequeue;
+	ops->get_count = h->get_count;
+
+	rte_spinlock_unlock(&rte_mempool_ops_table.sl);
+
+	return ops_index;
+}
+
+/* wrapper to allocate an external mempool's private (pool) data. */
+int
+rte_mempool_ops_alloc(struct rte_mempool *mp)
+{
+	struct rte_mempool_ops *ops;
+
+	ops = rte_mempool_get_ops(mp->ops_index);
+	return ops->alloc(mp);
+}
+
+/* wrapper to free an external pool ops. */
+void
+rte_mempool_ops_free(struct rte_mempool *mp)
+{
+	struct rte_mempool_ops *ops;
+
+	ops = rte_mempool_get_ops(mp->ops_index);
+	if (ops->free == NULL)
+		return;
+	ops->free(mp);
+}
+
+/* wrapper to get available objects in an external mempool. */
+unsigned int
+rte_mempool_ops_get_count(const struct rte_mempool *mp)
+{
+	struct rte_mempool_ops *ops;
+
+	ops = rte_mempool_get_ops(mp->ops_index);
+	return ops->get_count(mp);
+}
+
+/* sets mempool ops previously registered by rte_mempool_register_ops. */
+int
+rte_mempool_set_ops_byname(struct rte_mempool *mp, const char *name,
+	void *pool_config)
+{
+	struct rte_mempool_ops *ops = NULL;
+	unsigned i;
+
+	/* too late, the mempool is already populated. */
+	if (mp->flags & MEMPOOL_F_POOL_CREATED)
+		return -EEXIST;
+
+	for (i = 0; i < rte_mempool_ops_table.num_ops; i++) {
+		if (!strcmp(name,
+				rte_mempool_ops_table.ops[i].name)) {
+			ops = &rte_mempool_ops_table.ops[i];
+			break;
+		}
+	}
+
+	if (ops == NULL)
+		return -EINVAL;
+
+	mp->ops_index = i;
+	mp->pool_config = pool_config;
+	return 0;
+}
diff --git a/lib/librte_mempool/rte_mempool_ring.c b/lib/librte_mempool/rte_mempool_ring.c
new file mode 100644
index 00000000..b9aa64dd
--- /dev/null
+++ b/lib/librte_mempool/rte_mempool_ring.c
@@ -0,0 +1,161 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include <rte_errno.h>
+#include <rte_ring.h>
+#include <rte_mempool.h>
+
+static int
+common_ring_mp_enqueue(struct rte_mempool *mp, void * const *obj_table,
+		unsigned n)
+{
+	return rte_ring_mp_enqueue_bulk(mp->pool_data, obj_table, n);
+}
+
+static int
+common_ring_sp_enqueue(struct rte_mempool *mp, void * const *obj_table,
+		unsigned n)
+{
+	return rte_ring_sp_enqueue_bulk(mp->pool_data, obj_table, n);
+}
+
+static int
+common_ring_mc_dequeue(struct rte_mempool *mp, void **obj_table, unsigned n)
+{
+	return rte_ring_mc_dequeue_bulk(mp->pool_data, obj_table, n);
+}
+
+static int
+common_ring_sc_dequeue(struct rte_mempool *mp, void **obj_table, unsigned n)
+{
+	return rte_ring_sc_dequeue_bulk(mp->pool_data, obj_table, n);
+}
+
+static unsigned
+common_ring_get_count(const struct rte_mempool *mp)
+{
+	return rte_ring_count(mp->pool_data);
+}
+
+
+static int
+common_ring_alloc(struct rte_mempool *mp)
+{
+	int rg_flags = 0, ret;
+	char rg_name[RTE_RING_NAMESIZE];
+	struct rte_ring *r;
+
+	ret = snprintf(rg_name, sizeof(rg_name),
+		RTE_MEMPOOL_MZ_FORMAT, mp->name);
+	if (ret < 0 || ret >= (int)sizeof(rg_name)) {
+		rte_errno = ENAMETOOLONG;
+		return -rte_errno;
+	}
+
+	/* ring flags */
+	if (mp->flags & MEMPOOL_F_SP_PUT)
+		rg_flags |= RING_F_SP_ENQ;
+	if (mp->flags & MEMPOOL_F_SC_GET)
+		rg_flags |= RING_F_SC_DEQ;
+
+	/*
+	 * Allocate the ring that will be used to store objects.
+	 * Ring functions will return appropriate errors if we are
+	 * running as a secondary process etc., so no checks made
+	 * in this function for that condition.
+	 */
+	r = rte_ring_create(rg_name, rte_align32pow2(mp->size + 1),
+		mp->socket_id, rg_flags);
+	if (r == NULL)
+		return -rte_errno;
+
+	mp->pool_data = r;
+
+	return 0;
+}
+
+static void
+common_ring_free(struct rte_mempool *mp)
+{
+	rte_ring_free(mp->pool_data);
+}
+
+/*
+ * The following 4 declarations of mempool ops structs address
+ * the need for the backward compatible mempool handlers for
+ * single/multi producers and single/multi consumers as dictated by the
+ * flags provided to the rte_mempool_create function
+ */
+static const struct rte_mempool_ops ops_mp_mc = {
+	.name = "ring_mp_mc",
+	.alloc = common_ring_alloc,
+	.free = common_ring_free,
+	.enqueue = common_ring_mp_enqueue,
+	.dequeue = common_ring_mc_dequeue,
+	.get_count = common_ring_get_count,
+};
+
+static const struct rte_mempool_ops ops_sp_sc = {
+	.name = "ring_sp_sc",
+	.alloc = common_ring_alloc,
+	.free = common_ring_free,
+	.enqueue = common_ring_sp_enqueue,
+	.dequeue = common_ring_sc_dequeue,
+	.get_count = common_ring_get_count,
+};
+
+static const struct rte_mempool_ops ops_mp_sc = {
+	.name = "ring_mp_sc",
+	.alloc = common_ring_alloc,
+	.free = common_ring_free,
+	.enqueue = common_ring_mp_enqueue,
+	.dequeue = common_ring_sc_dequeue,
+	.get_count = common_ring_get_count,
+};
+
+static const struct rte_mempool_ops ops_sp_mc = {
+	.name = "ring_sp_mc",
+	.alloc = common_ring_alloc,
+	.free = common_ring_free,
+	.enqueue = common_ring_sp_enqueue,
+	.dequeue = common_ring_mc_dequeue,
+	.get_count = common_ring_get_count,
+};
+
+MEMPOOL_REGISTER_OPS(ops_mp_mc);
+MEMPOOL_REGISTER_OPS(ops_sp_sc);
+MEMPOOL_REGISTER_OPS(ops_mp_sc);
+MEMPOOL_REGISTER_OPS(ops_sp_mc);
diff --git a/lib/librte_mempool/rte_mempool_stack.c b/lib/librte_mempool/rte_mempool_stack.c
new file mode 100644
index 00000000..5fd8af24
--- /dev/null
+++ b/lib/librte_mempool/rte_mempool_stack.c
@@ -0,0 +1,147 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <rte_mempool.h>
+#include <rte_malloc.h>
+
+struct rte_mempool_stack {
+	rte_spinlock_t sl;
+
+	uint32_t size;
+	uint32_t len;
+	void *objs[];
+};
+
+static int
+stack_alloc(struct rte_mempool *mp)
+{
+	struct rte_mempool_stack *s;
+	unsigned n = mp->size;
+	int size = sizeof(*s) + (n+16)*sizeof(void *);
+
+	/* Allocate our local memory structure */
+	s = rte_zmalloc_socket("mempool-stack",
+			size,
+			RTE_CACHE_LINE_SIZE,
+			mp->socket_id);
+	if (s == NULL) {
+		RTE_LOG(ERR, MEMPOOL, "Cannot allocate stack!\n");
+		return -ENOMEM;
+	}
+
+	rte_spinlock_init(&s->sl);
+
+	s->size = n;
+	mp->pool_data = s;
+
+	return 0;
+}
+
+static int
+stack_enqueue(struct rte_mempool *mp, void * const *obj_table,
+		unsigned n)
+{
+	struct rte_mempool_stack *s = mp->pool_data;
+	void **cache_objs;
+	unsigned index;
+
+	rte_spinlock_lock(&s->sl);
+	cache_objs = &s->objs[s->len];
+
+	/* Is there sufficient space in the stack ? */
+	if ((s->len + n) > s->size) {
+		rte_spinlock_unlock(&s->sl);
+		return -ENOBUFS;
+	}
+
+	/* Add elements back into the cache */
+	for (index = 0; index < n; ++index, obj_table++)
+		cache_objs[index] = *obj_table;
+
+	s->len += n;
+
+	rte_spinlock_unlock(&s->sl);
+	return 0;
+}
+
+static int
+stack_dequeue(struct rte_mempool *mp, void **obj_table,
+		unsigned n)
+{
+	struct rte_mempool_stack *s = mp->pool_data;
+	void **cache_objs;
+	unsigned index, len;
+
+	rte_spinlock_lock(&s->sl);
+
+	if (unlikely(n > s->len)) {
+		rte_spinlock_unlock(&s->sl);
+		return -ENOENT;
+	}
+
+	cache_objs = s->objs;
+
+	for (index = 0, len = s->len - 1; index < n;
+			++index, len--, obj_table++)
+		*obj_table = cache_objs[len];
+
+	s->len -= n;
+	rte_spinlock_unlock(&s->sl);
+	return n;
+}
+
+static unsigned
+stack_get_count(const struct rte_mempool *mp)
+{
+	struct rte_mempool_stack *s = mp->pool_data;
+
+	return s->len;
+}
+
+static void
+stack_free(struct rte_mempool *mp)
+{
+	rte_free((void *)(mp->pool_data));
+}
+
+static struct rte_mempool_ops ops_stack = {
+	.name = "stack",
+	.alloc = stack_alloc,
+	.free = stack_free,
+	.enqueue = stack_enqueue,
+	.dequeue = stack_dequeue,
+	.get_count = stack_get_count
+};
+
+MEMPOOL_REGISTER_OPS(ops_stack);
diff --git a/lib/librte_mempool/rte_mempool_version.map b/lib/librte_mempool/rte_mempool_version.map
index 17151e08..dee1c990 100644
--- a/lib/librte_mempool/rte_mempool_version.map
+++ b/lib/librte_mempool/rte_mempool_version.map
@@ -1,7 +1,6 @@
 DPDK_2.0 {
 	global:
 
-	rte_dom0_mempool_create;
 	rte_mempool_audit;
 	rte_mempool_calc_obj_size;
 	rte_mempool_count;
@@ -9,7 +8,6 @@ DPDK_2.0 {
 	rte_mempool_dump;
 	rte_mempool_list_dump;
 	rte_mempool_lookup;
-	rte_mempool_obj_iter;
 	rte_mempool_walk;
 	rte_mempool_xmem_create;
 	rte_mempool_xmem_size;
@@ -17,3 +15,30 @@ DPDK_2.0 {
 
 	local: *;
 };
+
+DPDK_16.07 {
+	global:
+
+	rte_mempool_avail_count;
+	rte_mempool_cache_create;
+	rte_mempool_cache_flush;
+	rte_mempool_cache_free;
+	rte_mempool_check_cookies;
+	rte_mempool_create_empty;
+	rte_mempool_default_cache;
+	rte_mempool_free;
+	rte_mempool_generic_get;
+	rte_mempool_generic_put;
+	rte_mempool_in_use_count;
+	rte_mempool_mem_iter;
+	rte_mempool_obj_iter;
+	rte_mempool_ops_table;
+	rte_mempool_populate_anon;
+	rte_mempool_populate_default;
+	rte_mempool_populate_phys;
+	rte_mempool_populate_phys_tab;
+	rte_mempool_populate_virt;
+	rte_mempool_register_ops;
+	rte_mempool_set_ops_byname;
+
+} DPDK_2.0;
diff --git a/lib/librte_pdump/Makefile b/lib/librte_pdump/Makefile
new file mode 100644
index 00000000..166441a2
--- /dev/null
+++ b/lib/librte_pdump/Makefile
@@ -0,0 +1,57 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2016 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = librte_pdump.a
+
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
+CFLAGS += -D_GNU_SOURCE
+LDLIBS += -lpthread
+
+EXPORT_MAP := rte_pdump_version.map
+
+LIBABIVER := 1
+
+# all source are stored in SRCS-y
+SRCS-$(CONFIG_RTE_LIBRTE_PDUMP) := rte_pdump.c
+
+# install this header file
+SYMLINK-$(CONFIG_RTE_LIBRTE_PDUMP)-include := rte_pdump.h
+
+# this lib depends upon:
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PDUMP) += lib/librte_mbuf
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PDUMP) += lib/librte_mempool
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PDUMP) += lib/librte_eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PDUMP) += lib/librte_ether
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_pdump/rte_pdump.c b/lib/librte_pdump/rte_pdump.c
new file mode 100644
index 00000000..ee566cb2
--- /dev/null
+++ b/lib/librte_pdump/rte_pdump.c
@@ -0,0 +1,959 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdio.h>
+
+#include <rte_memcpy.h>
+#include <rte_mbuf.h>
+#include <rte_ethdev.h>
+#include <rte_lcore.h>
+#include <rte_log.h>
+#include <rte_errno.h>
+#include <rte_pci.h>
+
+#include "rte_pdump.h"
+
+#define SOCKET_PATH_VAR_RUN "/var/run"
+#define SOCKET_PATH_HOME "HOME"
+#define DPDK_DIR         "/.dpdk"
+#define SOCKET_DIR       "/pdump_sockets"
+#define SERVER_SOCKET "%s/pdump_server_socket"
+#define CLIENT_SOCKET "%s/pdump_client_socket_%d_%u"
+#define DEVICE_ID_SIZE 64
+/* Macros for printing using RTE_LOG */
+#define RTE_LOGTYPE_PDUMP RTE_LOGTYPE_USER1
+
+enum pdump_operation {
+	DISABLE = 1,
+	ENABLE = 2
+};
+
+enum pdump_version {
+	V1 = 1
+};
+
+static pthread_t pdump_thread;
+static int pdump_socket_fd;
+static char server_socket_dir[PATH_MAX];
+static char client_socket_dir[PATH_MAX];
+
+struct pdump_request {
+	uint16_t ver;
+	uint16_t op;
+	uint32_t flags;
+	union pdump_data {
+		struct enable_v1 {
+			char device[DEVICE_ID_SIZE];
+			uint16_t queue;
+			struct rte_ring *ring;
+			struct rte_mempool *mp;
+			void *filter;
+		} en_v1;
+		struct disable_v1 {
+			char device[DEVICE_ID_SIZE];
+			uint16_t queue;
+			struct rte_ring *ring;
+			struct rte_mempool *mp;
+			void *filter;
+		} dis_v1;
+	} data;
+};
+
+struct pdump_response {
+	uint16_t ver;
+	uint16_t res_op;
+	int32_t err_value;
+};
+
+static struct pdump_rxtx_cbs {
+	struct rte_ring *ring;
+	struct rte_mempool *mp;
+	struct rte_eth_rxtx_callback *cb;
+	void *filter;
+} rx_cbs[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT],
+tx_cbs[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT];
+
+static inline int
+pdump_pktmbuf_copy_data(struct rte_mbuf *seg, const struct rte_mbuf *m)
+{
+	if (rte_pktmbuf_tailroom(seg) < m->data_len) {
+		RTE_LOG(ERR, PDUMP,
+			"User mempool: insufficient data_len of mbuf\n");
+		return -EINVAL;
+	}
+
+	seg->port = m->port;
+	seg->vlan_tci = m->vlan_tci;
+	seg->hash = m->hash;
+	seg->tx_offload = m->tx_offload;
+	seg->ol_flags = m->ol_flags;
+	seg->packet_type = m->packet_type;
+	seg->vlan_tci_outer = m->vlan_tci_outer;
+	seg->data_len = m->data_len;
+	seg->pkt_len = seg->data_len;
+	rte_memcpy(rte_pktmbuf_mtod(seg, void *),
+			rte_pktmbuf_mtod(m, void *),
+			rte_pktmbuf_data_len(seg));
+
+	return 0;
+}
+
+static inline struct rte_mbuf *
+pdump_pktmbuf_copy(struct rte_mbuf *m, struct rte_mempool *mp)
+{
+	struct rte_mbuf *m_dup, *seg, **prev;
+	uint32_t pktlen;
+	uint8_t nseg;
+
+	m_dup = rte_pktmbuf_alloc(mp);
+	if (unlikely(m_dup == NULL))
+		return NULL;
+
+	seg = m_dup;
+	prev = &seg->next;
+	pktlen = m->pkt_len;
+	nseg = 0;
+
+	do {
+		nseg++;
+		if (pdump_pktmbuf_copy_data(seg, m) < 0) {
+			rte_pktmbuf_free(m_dup);
+			return NULL;
+		}
+		*prev = seg;
+		prev = &seg->next;
+	} while ((m = m->next) != NULL &&
+			(seg = rte_pktmbuf_alloc(mp)) != NULL);
+
+	*prev = NULL;
+	m_dup->nb_segs = nseg;
+	m_dup->pkt_len = pktlen;
+
+	/* Allocation of new indirect segment failed */
+	if (unlikely(seg == NULL)) {
+		rte_pktmbuf_free(m_dup);
+		return NULL;
+	}
+
+	__rte_mbuf_sanity_check(m_dup, 1);
+	return m_dup;
+}
+
+static inline void
+pdump_copy(struct rte_mbuf **pkts, uint16_t nb_pkts, void *user_params)
+{
+	unsigned i;
+	int ring_enq;
+	uint16_t d_pkts = 0;
+	struct rte_mbuf *dup_bufs[nb_pkts];
+	struct pdump_rxtx_cbs *cbs;
+	struct rte_ring *ring;
+	struct rte_mempool *mp;
+	struct rte_mbuf *p;
+
+	cbs  = user_params;
+	ring = cbs->ring;
+	mp = cbs->mp;
+	for (i = 0; i < nb_pkts; i++) {
+		p = pdump_pktmbuf_copy(pkts[i], mp);
+		if (p)
+			dup_bufs[d_pkts++] = p;
+	}
+
+	ring_enq = rte_ring_enqueue_burst(ring, (void *)dup_bufs, d_pkts);
+	if (unlikely(ring_enq < d_pkts)) {
+		RTE_LOG(DEBUG, PDUMP,
+			"only %d of packets enqueued to ring\n", ring_enq);
+		do {
+			rte_pktmbuf_free(dup_bufs[ring_enq]);
+		} while (++ring_enq < d_pkts);
+	}
+}
+
+static uint16_t
+pdump_rx(uint8_t port __rte_unused, uint16_t qidx __rte_unused,
+	struct rte_mbuf **pkts, uint16_t nb_pkts,
+	uint16_t max_pkts __rte_unused,
+	void *user_params)
+{
+	pdump_copy(pkts, nb_pkts, user_params);
+	return nb_pkts;
+}
+
+static uint16_t
+pdump_tx(uint8_t port __rte_unused, uint16_t qidx __rte_unused,
+		struct rte_mbuf **pkts, uint16_t nb_pkts, void *user_params)
+{
+	pdump_copy(pkts, nb_pkts, user_params);
+	return nb_pkts;
+}
+
+static int
+pdump_get_dombdf(char *device_id, char *domBDF, size_t len)
+{
+	int ret;
+	struct rte_pci_addr dev_addr = {0};
+
+	/* identify if device_id is pci address or name */
+	ret = eal_parse_pci_DomBDF(device_id, &dev_addr);
+	if (ret < 0)
+		return -1;
+
+	if (dev_addr.domain)
+		ret = snprintf(domBDF, len, "%u:%u:%u.%u", dev_addr.domain,
+				dev_addr.bus, dev_addr.devid,
+				dev_addr.function);
+	else
+		ret = snprintf(domBDF, len, "%u:%u.%u", dev_addr.bus,
+				dev_addr.devid,
+				dev_addr.function);
+
+	return ret;
+}
+
+static int
+pdump_regitser_rx_callbacks(uint16_t end_q, uint8_t port, uint16_t queue,
+				struct rte_ring *ring, struct rte_mempool *mp,
+				uint16_t operation)
+{
+	uint16_t qid;
+	struct pdump_rxtx_cbs *cbs = NULL;
+
+	qid = (queue == RTE_PDUMP_ALL_QUEUES) ? 0 : queue;
+	for (; qid < end_q; qid++) {
+		cbs = &rx_cbs[port][qid];
+		if (cbs && operation == ENABLE) {
+			if (cbs->cb) {
+				RTE_LOG(ERR, PDUMP,
+					"failed to add rx callback for port=%d "
+					"and queue=%d, callback already exists\n",
+					port, qid);
+				return -EEXIST;
+			}
+			cbs->ring = ring;
+			cbs->mp = mp;
+			cbs->cb = rte_eth_add_first_rx_callback(port, qid,
+								pdump_rx, cbs);
+			if (cbs->cb == NULL) {
+				RTE_LOG(ERR, PDUMP,
+					"failed to add rx callback, errno=%d\n",
+					rte_errno);
+				return rte_errno;
+			}
+		}
+		if (cbs && operation == DISABLE) {
+			int ret;
+
+			if (cbs->cb == NULL) {
+				RTE_LOG(ERR, PDUMP,
+					"failed to delete non existing rx "
+					"callback for port=%d and queue=%d\n",
+					port, qid);
+				return -EINVAL;
+			}
+			ret = rte_eth_remove_rx_callback(port, qid, cbs->cb);
+			if (ret < 0) {
+				RTE_LOG(ERR, PDUMP,
+					"failed to remove rx callback, errno=%d\n",
+					rte_errno);
+				return ret;
+			}
+			cbs->cb = NULL;
+		}
+	}
+
+	return 0;
+}
+
+static int
+pdump_regitser_tx_callbacks(uint16_t end_q, uint8_t port, uint16_t queue,
+				struct rte_ring *ring, struct rte_mempool *mp,
+				uint16_t operation)
+{
+
+	uint16_t qid;
+	struct pdump_rxtx_cbs *cbs = NULL;
+
+	qid = (queue == RTE_PDUMP_ALL_QUEUES) ? 0 : queue;
+	for (; qid < end_q; qid++) {
+		cbs = &tx_cbs[port][qid];
+		if (cbs && operation == ENABLE) {
+			if (cbs->cb) {
+				RTE_LOG(ERR, PDUMP,
+					"failed to add tx callback for port=%d "
+					"and queue=%d, callback already exists\n",
+					port, qid);
+				return -EEXIST;
+			}
+			cbs->ring = ring;
+			cbs->mp = mp;
+			cbs->cb = rte_eth_add_tx_callback(port, qid, pdump_tx,
+								cbs);
+			if (cbs->cb == NULL) {
+				RTE_LOG(ERR, PDUMP,
+					"failed to add tx callback, errno=%d\n",
+					rte_errno);
+				return rte_errno;
+			}
+		}
+		if (cbs && operation == DISABLE) {
+			int ret;
+
+			if (cbs->cb == NULL) {
+				RTE_LOG(ERR, PDUMP,
+					"failed to delete non existing tx "
+					"callback for port=%d and queue=%d\n",
+					port, qid);
+				return -EINVAL;
+			}
+			ret = rte_eth_remove_tx_callback(port, qid, cbs->cb);
+			if (ret < 0) {
+				RTE_LOG(ERR, PDUMP,
+					"failed to remove tx callback, errno=%d\n",
+					rte_errno);
+				return ret;
+			}
+			cbs->cb = NULL;
+		}
+	}
+
+	return 0;
+}
+
+static int
+set_pdump_rxtx_cbs(struct pdump_request *p)
+{
+	uint16_t nb_rx_q, nb_tx_q = 0, end_q, queue;
+	uint8_t port;
+	int ret = 0;
+	uint32_t flags;
+	uint16_t operation;
+	struct rte_ring *ring;
+	struct rte_mempool *mp;
+
+	flags = p->flags;
+	operation = p->op;
+	if (operation == ENABLE) {
+		ret = rte_eth_dev_get_port_by_name(p->data.en_v1.device,
+				&port);
+		if (ret < 0) {
+			RTE_LOG(ERR, PDUMP,
+				"failed to get potid for device id=%s\n",
+				p->data.en_v1.device);
+			return -EINVAL;
+		}
+		queue = p->data.en_v1.queue;
+		ring = p->data.en_v1.ring;
+		mp = p->data.en_v1.mp;
+	} else {
+		ret = rte_eth_dev_get_port_by_name(p->data.dis_v1.device,
+				&port);
+		if (ret < 0) {
+			RTE_LOG(ERR, PDUMP,
+				"failed to get potid for device id=%s\n",
+				p->data.dis_v1.device);
+			return -EINVAL;
+		}
+		queue = p->data.dis_v1.queue;
+		ring = p->data.dis_v1.ring;
+		mp = p->data.dis_v1.mp;
+	}
+
+	/* validation if packet capture is for all queues */
+	if (queue == RTE_PDUMP_ALL_QUEUES) {
+		struct rte_eth_dev_info dev_info;
+
+		rte_eth_dev_info_get(port, &dev_info);
+		nb_rx_q = dev_info.nb_rx_queues;
+		nb_tx_q = dev_info.nb_tx_queues;
+		if (nb_rx_q == 0 && flags & RTE_PDUMP_FLAG_RX) {
+			RTE_LOG(ERR, PDUMP,
+				"number of rx queues cannot be 0\n");
+			return -EINVAL;
+		}
+		if (nb_tx_q == 0 && flags & RTE_PDUMP_FLAG_TX) {
+			RTE_LOG(ERR, PDUMP,
+				"number of tx queues cannot be 0\n");
+			return -EINVAL;
+		}
+		if ((nb_tx_q == 0 || nb_rx_q == 0) &&
+			flags == RTE_PDUMP_FLAG_RXTX) {
+			RTE_LOG(ERR, PDUMP,
+				"both tx&rx queues must be non zero\n");
+			return -EINVAL;
+		}
+	}
+
+	/* register RX callback */
+	if (flags & RTE_PDUMP_FLAG_RX) {
+		end_q = (queue == RTE_PDUMP_ALL_QUEUES) ? nb_rx_q : queue + 1;
+		ret = pdump_regitser_rx_callbacks(end_q, port, queue, ring, mp,
+							operation);
+		if (ret < 0)
+			return ret;
+	}
+
+	/* register TX callback */
+	if (flags & RTE_PDUMP_FLAG_TX) {
+		end_q = (queue == RTE_PDUMP_ALL_QUEUES) ? nb_tx_q : queue + 1;
+		ret = pdump_regitser_tx_callbacks(end_q, port, queue, ring, mp,
+							operation);
+		if (ret < 0)
+			return ret;
+	}
+
+	return ret;
+}
+
+/* get socket path (/var/run if root, $HOME otherwise) */
+static int
+pdump_get_socket_path(char *buffer, int bufsz, enum rte_pdump_socktype type)
+{
+	char dpdk_dir[PATH_MAX] = {0};
+	char dir[PATH_MAX] = {0};
+	char *dir_home = NULL;
+
+	if (type == RTE_PDUMP_SOCKET_SERVER && server_socket_dir[0] != 0)
+		snprintf(dir, sizeof(dir), "%s", server_socket_dir);
+	else if (type == RTE_PDUMP_SOCKET_CLIENT && client_socket_dir[0] != 0)
+		snprintf(dir, sizeof(dir), "%s", client_socket_dir);
+	else {
+		if (getuid() != 0) {
+			dir_home = getenv(SOCKET_PATH_HOME);
+			if (!dir_home) {
+				RTE_LOG(ERR, PDUMP,
+					"Failed to get environment variable"
+					" value for %s, %s:%d\n",
+					SOCKET_PATH_HOME, __func__, __LINE__);
+				return -1;
+			}
+			snprintf(dpdk_dir, sizeof(dpdk_dir), "%s%s",
+					dir_home, DPDK_DIR);
+		} else
+			snprintf(dpdk_dir, sizeof(dpdk_dir), "%s%s",
+					SOCKET_PATH_VAR_RUN, DPDK_DIR);
+
+		mkdir(dpdk_dir, 700);
+		snprintf(dir, sizeof(dir), "%s%s",
+					dpdk_dir, SOCKET_DIR);
+	}
+
+	mkdir(dir, 700);
+	if (type == RTE_PDUMP_SOCKET_SERVER)
+		snprintf(buffer, bufsz, SERVER_SOCKET, dir);
+	else
+		snprintf(buffer, bufsz, CLIENT_SOCKET, dir, getpid(),
+				rte_sys_gettid());
+
+	return 0;
+}
+
+static int
+pdump_create_server_socket(void)
+{
+	int ret, socket_fd;
+	struct sockaddr_un addr;
+	socklen_t addr_len;
+
+	ret = pdump_get_socket_path(addr.sun_path, sizeof(addr.sun_path),
+				RTE_PDUMP_SOCKET_SERVER);
+	if (ret != 0) {
+		RTE_LOG(ERR, PDUMP,
+			"Failed to get server socket path: %s:%d\n",
+			__func__, __LINE__);
+		return -1;
+	}
+	addr.sun_family = AF_UNIX;
+
+	/* remove if file already exists */
+	unlink(addr.sun_path);
+
+	/* set up a server socket */
+	socket_fd = socket(AF_UNIX, SOCK_DGRAM, 0);
+	if (socket_fd < 0) {
+		RTE_LOG(ERR, PDUMP,
+			"Failed to create server socket: %s, %s:%d\n",
+			strerror(errno), __func__, __LINE__);
+		return -1;
+	}
+
+	addr_len = sizeof(struct sockaddr_un);
+	ret = bind(socket_fd, (struct sockaddr *) &addr, addr_len);
+	if (ret) {
+		RTE_LOG(ERR, PDUMP,
+			"Failed to bind to server socket: %s, %s:%d\n",
+			strerror(errno), __func__, __LINE__);
+		close(socket_fd);
+		return -1;
+	}
+
+	/* save the socket in local configuration */
+	pdump_socket_fd = socket_fd;
+
+	return 0;
+}
+
+static __attribute__((noreturn)) void *
+pdump_thread_main(__rte_unused void *arg)
+{
+	struct sockaddr_un cli_addr;
+	socklen_t cli_len;
+	struct pdump_request cli_req;
+	struct pdump_response resp;
+	int n;
+	int ret = 0;
+
+	/* host thread, never break out */
+	for (;;) {
+		/* recv client requests */
+		cli_len = sizeof(cli_addr);
+		n = recvfrom(pdump_socket_fd, &cli_req,
+				sizeof(struct pdump_request), 0,
+				(struct sockaddr *)&cli_addr, &cli_len);
+		if (n < 0) {
+			RTE_LOG(ERR, PDUMP,
+				"failed to recv from client:%s, %s:%d\n",
+				strerror(errno), __func__, __LINE__);
+			continue;
+		}
+
+		ret = set_pdump_rxtx_cbs(&cli_req);
+
+		resp.ver = cli_req.ver;
+		resp.res_op = cli_req.op;
+		resp.err_value = ret;
+		n = sendto(pdump_socket_fd, &resp,
+				sizeof(struct pdump_response),
+				0, (struct sockaddr *)&cli_addr, cli_len);
+		if (n < 0) {
+			RTE_LOG(ERR, PDUMP,
+				"failed to send to client:%s, %s:%d\n",
+				strerror(errno), __func__, __LINE__);
+		}
+	}
+}
+
+int
+rte_pdump_init(const char *path)
+{
+	int ret = 0;
+	char thread_name[RTE_MAX_THREAD_NAME_LEN];
+
+	ret = rte_pdump_set_socket_dir(path, RTE_PDUMP_SOCKET_SERVER);
+	if (ret != 0)
+		return -1;
+
+	ret = pdump_create_server_socket();
+	if (ret != 0) {
+		RTE_LOG(ERR, PDUMP,
+			"Failed to create server socket:%s:%d\n",
+			__func__, __LINE__);
+		return -1;
+	}
+
+	/* create the host thread to wait/handle pdump requests */
+	ret = pthread_create(&pdump_thread, NULL, pdump_thread_main, NULL);
+	if (ret != 0) {
+		RTE_LOG(ERR, PDUMP,
+			"Failed to create the pdump thread:%s, %s:%d\n",
+			strerror(errno), __func__, __LINE__);
+		return -1;
+	}
+	/* Set thread_name for aid in debugging. */
+	snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "pdump-thread");
+	ret = rte_thread_setname(pdump_thread, thread_name);
+	if (ret != 0) {
+		RTE_LOG(DEBUG, PDUMP,
+			"Failed to set thread name for pdump handling\n");
+	}
+
+	return 0;
+}
+
+int
+rte_pdump_uninit(void)
+{
+	int ret;
+
+	ret = pthread_cancel(pdump_thread);
+	if (ret != 0) {
+		RTE_LOG(ERR, PDUMP,
+			"Failed to cancel the pdump thread:%s, %s:%d\n",
+			strerror(errno), __func__, __LINE__);
+		return -1;
+	}
+
+	ret = close(pdump_socket_fd);
+	if (ret != 0) {
+		RTE_LOG(ERR, PDUMP,
+			"Failed to close server socket: %s, %s:%d\n",
+			strerror(errno), __func__, __LINE__);
+		return -1;
+	}
+
+	struct sockaddr_un addr;
+
+	ret = pdump_get_socket_path(addr.sun_path, sizeof(addr.sun_path),
+				RTE_PDUMP_SOCKET_SERVER);
+	if (ret != 0) {
+		RTE_LOG(ERR, PDUMP,
+			"Failed to get server socket path: %s:%d\n",
+			__func__, __LINE__);
+		return -1;
+	}
+	ret = unlink(addr.sun_path);
+	if (ret != 0) {
+		RTE_LOG(ERR, PDUMP,
+			"Failed to remove server socket addr: %s, %s:%d\n",
+			strerror(errno), __func__, __LINE__);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+pdump_create_client_socket(struct pdump_request *p)
+{
+	int ret, socket_fd;
+	int pid;
+	int n;
+	struct pdump_response server_resp;
+	struct sockaddr_un addr, serv_addr, from;
+	socklen_t addr_len, serv_len;
+
+	pid = getpid();
+
+	socket_fd = socket(AF_UNIX, SOCK_DGRAM, 0);
+	if (socket_fd < 0) {
+		RTE_LOG(ERR, PDUMP,
+			"client socket(): %s:pid(%d):tid(%u), %s:%d\n",
+			strerror(errno), pid, rte_sys_gettid(),
+			__func__, __LINE__);
+		ret = errno;
+		return ret;
+	}
+
+	ret = pdump_get_socket_path(addr.sun_path, sizeof(addr.sun_path),
+				RTE_PDUMP_SOCKET_CLIENT);
+	if (ret != 0) {
+		RTE_LOG(ERR, PDUMP,
+			"Failed to get client socket path: %s:%d\n",
+			__func__, __LINE__);
+		return -1;
+	}
+	addr.sun_family = AF_UNIX;
+	addr_len = sizeof(struct sockaddr_un);
+
+	do {
+		ret = bind(socket_fd, (struct sockaddr *) &addr, addr_len);
+		if (ret) {
+			RTE_LOG(ERR, PDUMP,
+				"client bind(): %s, %s:%d\n",
+				strerror(errno), __func__, __LINE__);
+			ret = errno;
+			break;
+		}
+
+		serv_len = sizeof(struct sockaddr_un);
+		memset(&serv_addr, 0, sizeof(serv_addr));
+		ret = pdump_get_socket_path(serv_addr.sun_path,
+					sizeof(serv_addr.sun_path),
+					RTE_PDUMP_SOCKET_SERVER);
+		if (ret != 0) {
+			RTE_LOG(ERR, PDUMP,
+				"Failed to get server socket path: %s:%d\n",
+				__func__, __LINE__);
+			break;
+		}
+		serv_addr.sun_family = AF_UNIX;
+
+		n =  sendto(socket_fd, p, sizeof(struct pdump_request), 0,
+				(struct sockaddr *)&serv_addr, serv_len);
+		if (n < 0) {
+			RTE_LOG(ERR, PDUMP,
+				"failed to send to server:%s, %s:%d\n",
+				strerror(errno), __func__, __LINE__);
+			ret =  errno;
+			break;
+		}
+
+		n = recvfrom(socket_fd, &server_resp,
+				sizeof(struct pdump_response), 0,
+				(struct sockaddr *)&from, &serv_len);
+		if (n < 0) {
+			RTE_LOG(ERR, PDUMP,
+				"failed to recv from server:%s, %s:%d\n",
+				strerror(errno), __func__, __LINE__);
+			ret = errno;
+			break;
+		}
+		ret = server_resp.err_value;
+	} while (0);
+
+	close(socket_fd);
+	unlink(addr.sun_path);
+	return ret;
+}
+
+static int
+pdump_validate_ring_mp(struct rte_ring *ring, struct rte_mempool *mp)
+{
+	if (ring == NULL || mp == NULL) {
+		RTE_LOG(ERR, PDUMP, "NULL ring or mempool are passed %s:%d\n",
+			__func__, __LINE__);
+		rte_errno = EINVAL;
+		return -1;
+	}
+	if (mp->flags & MEMPOOL_F_SP_PUT || mp->flags & MEMPOOL_F_SC_GET) {
+		RTE_LOG(ERR, PDUMP, "mempool with either SP or SC settings"
+		" is not valid for pdump, should have MP and MC settings\n");
+		rte_errno = EINVAL;
+		return -1;
+	}
+	if (ring->prod.sp_enqueue || ring->cons.sc_dequeue) {
+		RTE_LOG(ERR, PDUMP, "ring with either SP or SC settings"
+		" is not valid for pdump, should have MP and MC settings\n");
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+pdump_validate_flags(uint32_t flags)
+{
+	if (flags != RTE_PDUMP_FLAG_RX && flags != RTE_PDUMP_FLAG_TX &&
+		flags != RTE_PDUMP_FLAG_RXTX) {
+		RTE_LOG(ERR, PDUMP,
+			"invalid flags, should be either rx/tx/rxtx\n");
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+pdump_validate_port(uint8_t port, char *name)
+{
+	int ret = 0;
+
+	if (port >= RTE_MAX_ETHPORTS) {
+		RTE_LOG(ERR, PDUMP, "Invalid port id %u, %s:%d\n", port,
+			__func__, __LINE__);
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	ret = rte_eth_dev_get_name_by_port(port, name);
+	if (ret < 0) {
+		RTE_LOG(ERR, PDUMP,
+			"port id to name mapping failed for port id=%u, %s:%d\n",
+			port, __func__, __LINE__);
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+pdump_prepare_client_request(char *device, uint16_t queue,
+				uint32_t flags,
+				uint16_t operation,
+				struct rte_ring *ring,
+				struct rte_mempool *mp,
+				void *filter)
+{
+	int ret;
+	struct pdump_request req = {.ver = 1,};
+
+	req.flags = flags;
+	req.op =  operation;
+	if ((operation & ENABLE) != 0) {
+		snprintf(req.data.en_v1.device, sizeof(req.data.en_v1.device),
+				"%s", device);
+		req.data.en_v1.queue = queue;
+		req.data.en_v1.ring = ring;
+		req.data.en_v1.mp = mp;
+		req.data.en_v1.filter = filter;
+	} else {
+		snprintf(req.data.dis_v1.device, sizeof(req.data.dis_v1.device),
+				"%s", device);
+		req.data.dis_v1.queue = queue;
+		req.data.dis_v1.ring = NULL;
+		req.data.dis_v1.mp = NULL;
+		req.data.dis_v1.filter = NULL;
+	}
+
+	ret = pdump_create_client_socket(&req);
+	if (ret < 0) {
+		RTE_LOG(ERR, PDUMP,
+			"client request for pdump enable/disable failed\n");
+		rte_errno = ret;
+		return -1;
+	}
+
+	return 0;
+}
+
+int
+rte_pdump_enable(uint8_t port, uint16_t queue, uint32_t flags,
+			struct rte_ring *ring,
+			struct rte_mempool *mp,
+			void *filter)
+{
+
+	int ret = 0;
+	char name[DEVICE_ID_SIZE];
+
+	ret = pdump_validate_port(port, name);
+	if (ret < 0)
+		return ret;
+	ret = pdump_validate_ring_mp(ring, mp);
+	if (ret < 0)
+		return ret;
+	ret = pdump_validate_flags(flags);
+	if (ret < 0)
+		return ret;
+
+	ret = pdump_prepare_client_request(name, queue, flags,
+						ENABLE, ring, mp, filter);
+
+	return ret;
+}
+
+int
+rte_pdump_enable_by_deviceid(char *device_id, uint16_t queue,
+				uint32_t flags,
+				struct rte_ring *ring,
+				struct rte_mempool *mp,
+				void *filter)
+{
+	int ret = 0;
+	char domBDF[DEVICE_ID_SIZE];
+
+	ret = pdump_validate_ring_mp(ring, mp);
+	if (ret < 0)
+		return ret;
+	ret = pdump_validate_flags(flags);
+	if (ret < 0)
+		return ret;
+
+	if (pdump_get_dombdf(device_id, domBDF, sizeof(domBDF)) > 0)
+		ret = pdump_prepare_client_request(domBDF, queue, flags,
+						ENABLE, ring, mp, filter);
+	else
+		ret = pdump_prepare_client_request(device_id, queue, flags,
+						ENABLE, ring, mp, filter);
+
+	return ret;
+}
+
+int
+rte_pdump_disable(uint8_t port, uint16_t queue, uint32_t flags)
+{
+	int ret = 0;
+	char name[DEVICE_ID_SIZE];
+
+	ret = pdump_validate_port(port, name);
+	if (ret < 0)
+		return ret;
+	ret = pdump_validate_flags(flags);
+	if (ret < 0)
+		return ret;
+
+	ret = pdump_prepare_client_request(name, queue, flags,
+						DISABLE, NULL, NULL, NULL);
+
+	return ret;
+}
+
+int
+rte_pdump_disable_by_deviceid(char *device_id, uint16_t queue,
+				uint32_t flags)
+{
+	int ret = 0;
+	char domBDF[DEVICE_ID_SIZE];
+
+	ret = pdump_validate_flags(flags);
+	if (ret < 0)
+		return ret;
+
+	if (pdump_get_dombdf(device_id, domBDF, sizeof(domBDF)) > 0)
+		ret = pdump_prepare_client_request(domBDF, queue, flags,
+						DISABLE, NULL, NULL, NULL);
+	else
+		ret = pdump_prepare_client_request(device_id, queue, flags,
+						DISABLE, NULL, NULL, NULL);
+
+	return ret;
+}
+
+int
+rte_pdump_set_socket_dir(const char *path, enum rte_pdump_socktype type)
+{
+	int ret, count;
+
+	if (path != NULL) {
+		if (type == RTE_PDUMP_SOCKET_SERVER) {
+			count = sizeof(server_socket_dir);
+			ret = snprintf(server_socket_dir, count, "%s", path);
+		} else {
+			count = sizeof(client_socket_dir);
+			ret = snprintf(client_socket_dir, count, "%s", path);
+		}
+
+		if (ret < 0  || ret >= count) {
+			RTE_LOG(ERR, PDUMP,
+					"Invalid socket path:%s:%d\n",
+					__func__, __LINE__);
+			if (type == RTE_PDUMP_SOCKET_SERVER)
+				server_socket_dir[0] = 0;
+			else
+				client_socket_dir[0] = 0;
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
diff --git a/lib/librte_pdump/rte_pdump.h b/lib/librte_pdump/rte_pdump.h
new file mode 100644
index 00000000..b5f4e2f3
--- /dev/null
+++ b/lib/librte_pdump/rte_pdump.h
@@ -0,0 +1,216 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_PDUMP_H_
+#define _RTE_PDUMP_H_
+
+/**
+ * @file
+ * RTE pdump
+ *
+ * packet dump library to provide packet capturing support on dpdk.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define RTE_PDUMP_ALL_QUEUES UINT16_MAX
+
+enum {
+	RTE_PDUMP_FLAG_RX = 1,  /* receive direction */
+	RTE_PDUMP_FLAG_TX = 2,  /* transmit direction */
+	/* both receive and transmit directions */
+	RTE_PDUMP_FLAG_RXTX = (RTE_PDUMP_FLAG_RX|RTE_PDUMP_FLAG_TX)
+};
+
+enum rte_pdump_socktype {
+	RTE_PDUMP_SOCKET_SERVER = 1,
+	RTE_PDUMP_SOCKET_CLIENT = 2
+};
+
+/**
+ * Initialize packet capturing handling
+ *
+ * Creates pthread and server socket for handling clients
+ * requests to enable/disable rxtx callbacks.
+ *
+ * @param path
+ * directory path for server socket.
+ *
+ * @return
+ *    0 on success, -1 on error
+ */
+int
+rte_pdump_init(const char *path);
+
+/**
+ * Un initialize packet capturing handling
+ *
+ * Cancels pthread, close server socket, removes server socket address.
+ *
+ * @return
+ *    0 on success, -1 on error
+ */
+int
+rte_pdump_uninit(void);
+
+/**
+ * Enables packet capturing on given port and queue.
+ *
+ * @param port
+ *  port on which packet capturing should be enabled.
+ * @param queue
+ *  queue of a given port on which packet capturing should be enabled.
+ *  users should pass on value UINT16_MAX to enable packet capturing on all
+ *  queues of a given port.
+ * @param flags
+ *  flags specifies RTE_PDUMP_FLAG_RX/RTE_PDUMP_FLAG_TX/RTE_PDUMP_FLAG_RXTX
+ *  on which packet capturing should be enabled for a given port and queue.
+ * @param ring
+ *  ring on which captured packets will be enqueued for user.
+ * @param mp
+ *  mempool on to which original packets will be mirrored or duplicated.
+ * @param filter
+ *  place holder for packet filtering.
+ *
+ * @return
+ *    0 on success, -1 on error, rte_errno is set accordingly.
+ */
+
+int
+rte_pdump_enable(uint8_t port, uint16_t queue, uint32_t flags,
+		struct rte_ring *ring,
+		struct rte_mempool *mp,
+		void *filter);
+
+/**
+ * Disables packet capturing on given port and queue.
+ *
+ * @param port
+ *  port on which packet capturing should be disabled.
+ * @param queue
+ *  queue of a given port on which packet capturing should be disabled.
+ *  users should pass on value UINT16_MAX to disable packet capturing on all
+ *  queues of a given port.
+ * @param flags
+ *  flags specifies RTE_PDUMP_FLAG_RX/RTE_PDUMP_FLAG_TX/RTE_PDUMP_FLAG_RXTX
+ *  on which packet capturing should be enabled for a given port and queue.
+ *
+ * @return
+ *    0 on success, -1 on error, rte_errno is set accordingly.
+ */
+
+int
+rte_pdump_disable(uint8_t port, uint16_t queue, uint32_t flags);
+
+/**
+ * Enables packet capturing on given device id and queue.
+ * device_id can be name or pci address of device.
+ *
+ * @param device_id
+ *  device id on which packet capturing should be enabled.
+ * @param queue
+ *  queue of a given device id on which packet capturing should be enabled.
+ *  users should pass on value UINT16_MAX to enable packet capturing on all
+ *  queues of a given device id.
+ * @param flags
+ *  flags specifies RTE_PDUMP_FLAG_RX/RTE_PDUMP_FLAG_TX/RTE_PDUMP_FLAG_RXTX
+ *  on which packet capturing should be enabled for a given port and queue.
+ * @param ring
+ *  ring on which captured packets will be enqueued for user.
+ * @param mp
+ *  mempool on to which original packets will be mirrored or duplicated.
+ * @param filter
+ *  place holder for packet filtering.
+ *
+ * @return
+ *    0 on success, -1 on error, rte_errno is set accordingly.
+ */
+
+int
+rte_pdump_enable_by_deviceid(char *device_id, uint16_t queue,
+				uint32_t flags,
+				struct rte_ring *ring,
+				struct rte_mempool *mp,
+				void *filter);
+
+/**
+ * Disables packet capturing on given device_id and queue.
+ * device_id can be name or pci address of device.
+ *
+ * @param device_id
+ *  pci address or name of the device on which packet capturing
+ *  should be disabled.
+ * @param queue
+ *  queue of a given device on which packet capturing should be disabled.
+ *  users should pass on value UINT16_MAX to disable packet capturing on all
+ *  queues of a given device id.
+ * @param flags
+ *  flags specifies RTE_PDUMP_FLAG_RX/RTE_PDUMP_FLAG_TX/RTE_PDUMP_FLAG_RXTX
+ *  on which packet capturing should be enabled for a given port and queue.
+ *
+ * @return
+ *    0 on success, -1 on error, rte_errno is set accordingly.
+ */
+int
+rte_pdump_disable_by_deviceid(char *device_id, uint16_t queue,
+				uint32_t flags);
+
+/**
+ * Allows applications to set server and client socket paths.
+ * If specified path is null default path will be selected, i.e.
+ *"/var/run/" for root user and "$HOME" for non root user.
+ * Clients also need to call this API to set their server path if the
+ * server path is different from default path.
+ * This API is not thread-safe.
+ *
+ * @param path
+ * directory path for server or client socket.
+ * @type
+ * specifies RTE_PDUMP_SOCKET_SERVER if socket path is for server.
+ * (or)
+ * specifies RTE_PDUMP_SOCKET_CLIENT if socket path is for client.
+ *
+ * @return
+ * 0 on success, -EINVAL on error
+ *
+ */
+int
+rte_pdump_set_socket_dir(const char *path, enum rte_pdump_socktype type);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_PDUMP_H_ */
diff --git a/lib/librte_pdump/rte_pdump_version.map b/lib/librte_pdump/rte_pdump_version.map
new file mode 100644
index 00000000..edec99a4
--- /dev/null
+++ b/lib/librte_pdump/rte_pdump_version.map
@@ -0,0 +1,13 @@
+DPDK_16.07 {
+	global:
+
+	rte_pdump_disable;
+	rte_pdump_disable_by_deviceid;
+	rte_pdump_enable;
+	rte_pdump_enable_by_deviceid;
+	rte_pdump_init;
+	rte_pdump_set_socket_dir;
+	rte_pdump_uninit;
+
+	local: *;
+};
diff --git a/lib/librte_pipeline/Makefile b/lib/librte_pipeline/Makefile
index 822fd41c..05d64ff8 100644
--- a/lib/librte_pipeline/Makefile
+++ b/lib/librte_pipeline/Makefile
@@ -52,7 +52,10 @@ SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) := rte_pipeline.c
 SYMLINK-$(CONFIG_RTE_LIBRTE_PIPELINE)-include += rte_pipeline.h
 
 # this lib depends upon:
-DEPDIRS-$(CONFIG_RTE_LIBRTE_PIPELINE) := lib/librte_table
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PIPELINE) += lib/librte_eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PIPELINE) += lib/librte_mbuf
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PIPELINE) += lib/librte_mempool
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PIPELINE) += lib/librte_table
 DEPDIRS-$(CONFIG_RTE_LIBRTE_PIPELINE) += lib/librte_port
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_port/Makefile b/lib/librte_port/Makefile
index 2c0ccbe5..3d84a0e4 100644
--- a/lib/librte_port/Makefile
+++ b/lib/librte_port/Makefile
@@ -44,7 +44,7 @@ CFLAGS += $(WERROR_FLAGS)
 
 EXPORT_MAP := rte_port_version.map
 
-LIBABIVER := 2
+LIBABIVER := 3
 
 #
 # all source are stored in SRCS-y
@@ -56,6 +56,9 @@ SRCS-$(CONFIG_RTE_LIBRTE_PORT) += rte_port_frag.c
 SRCS-$(CONFIG_RTE_LIBRTE_PORT) += rte_port_ras.c
 endif
 SRCS-$(CONFIG_RTE_LIBRTE_PORT) += rte_port_sched.c
+ifeq ($(CONFIG_RTE_LIBRTE_KNI),y)
+SRCS-$(CONFIG_RTE_LIBRTE_PORT) += rte_port_kni.c
+endif
 SRCS-$(CONFIG_RTE_LIBRTE_PORT) += rte_port_source_sink.c
 
 # install includes
@@ -67,6 +70,9 @@ SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port_frag.h
 SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port_ras.h
 endif
 SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port_sched.h
+ifeq ($(CONFIG_RTE_LIBRTE_KNI),y)
+SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port_kni.h
+endif
 SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port_source_sink.h
 
 # this lib depends upon:
@@ -75,5 +81,9 @@ DEPDIRS-$(CONFIG_RTE_LIBRTE_PORT) += lib/librte_mbuf
 DEPDIRS-$(CONFIG_RTE_LIBRTE_PORT) += lib/librte_mempool
 DEPDIRS-$(CONFIG_RTE_LIBRTE_PORT) += lib/librte_ether
 DEPDIRS-$(CONFIG_RTE_LIBRTE_PORT) += lib/librte_ip_frag
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PORT) += lib/librte_sched
+ifeq ($(CONFIG_RTE_LIBRTE_KNI),y)
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PORT) += lib/librte_kni
+endif
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_port/rte_port_kni.c b/lib/librte_port/rte_port_kni.c
new file mode 100644
index 00000000..08f4ac2a
--- /dev/null
+++ b/lib/librte_port/rte_port_kni.c
@@ -0,0 +1,545 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Ethan Zhuang <zhuangwj@gmail.com>.
+ *   Copyright(c) 2016 Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <string.h>
+
+#include <rte_common.h>
+#include <rte_malloc.h>
+#include <rte_kni.h>
+
+#include "rte_port_kni.h"
+
+/*
+ * Port KNI Reader
+ */
+#ifdef RTE_PORT_STATS_COLLECT
+
+#define RTE_PORT_KNI_READER_STATS_PKTS_IN_ADD(port, val) \
+	port->stats.n_pkts_in += val
+#define RTE_PORT_KNI_READER_STATS_PKTS_DROP_ADD(port, val) \
+	port->stats.n_pkts_drop += val
+
+#else
+
+#define RTE_PORT_KNI_READER_STATS_PKTS_IN_ADD(port, val)
+#define RTE_PORT_KNI_READER_STATS_PKTS_DROP_ADD(port, val)
+
+#endif
+
+struct rte_port_kni_reader {
+	struct rte_port_in_stats stats;
+
+	struct rte_kni *kni;
+};
+
+static void *
+rte_port_kni_reader_create(void *params, int socket_id)
+{
+	struct rte_port_kni_reader_params *conf =
+			(struct rte_port_kni_reader_params *) params;
+	struct rte_port_kni_reader *port;
+
+	/* Check input parameters */
+	if (conf == NULL) {
+		RTE_LOG(ERR, PORT, "%s: params is NULL\n", __func__);
+		return NULL;
+	}
+
+	/* Memory allocation */
+	port = rte_zmalloc_socket("PORT", sizeof(*port),
+		RTE_CACHE_LINE_SIZE, socket_id);
+	if (port == NULL) {
+		RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__);
+		return NULL;
+	}
+
+	/* Initialization */
+	port->kni = conf->kni;
+
+	return port;
+}
+
+static int
+rte_port_kni_reader_rx(void *port, struct rte_mbuf **pkts, uint32_t n_pkts)
+{
+	struct rte_port_kni_reader *p =
+			(struct rte_port_kni_reader *) port;
+	uint16_t rx_pkt_cnt;
+
+	rx_pkt_cnt = rte_kni_rx_burst(p->kni, pkts, n_pkts);
+	RTE_PORT_KNI_READER_STATS_PKTS_IN_ADD(p, rx_pkt_cnt);
+	return rx_pkt_cnt;
+}
+
+static int
+rte_port_kni_reader_free(void *port)
+{
+	if (port == NULL) {
+		RTE_LOG(ERR, PORT, "%s: port is NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	rte_free(port);
+
+	return 0;
+}
+
+static int rte_port_kni_reader_stats_read(void *port,
+	struct rte_port_in_stats *stats, int clear)
+{
+	struct rte_port_kni_reader *p =
+			(struct rte_port_kni_reader *) port;
+
+	if (stats != NULL)
+		memcpy(stats, &p->stats, sizeof(p->stats));
+
+	if (clear)
+		memset(&p->stats, 0, sizeof(p->stats));
+
+	return 0;
+}
+
+/*
+ * Port KNI Writer
+ */
+#ifdef RTE_PORT_STATS_COLLECT
+
+#define RTE_PORT_KNI_WRITER_STATS_PKTS_IN_ADD(port, val) \
+	port->stats.n_pkts_in += val
+#define RTE_PORT_KNI_WRITER_STATS_PKTS_DROP_ADD(port, val) \
+	port->stats.n_pkts_drop += val
+
+#else
+
+#define RTE_PORT_KNI_WRITER_STATS_PKTS_IN_ADD(port, val)
+#define RTE_PORT_KNI_WRITER_STATS_PKTS_DROP_ADD(port, val)
+
+#endif
+
+struct rte_port_kni_writer {
+	struct rte_port_out_stats stats;
+
+	struct rte_mbuf *tx_buf[2 * RTE_PORT_IN_BURST_SIZE_MAX];
+	uint32_t tx_burst_sz;
+	uint32_t tx_buf_count;
+	uint64_t bsz_mask;
+	struct rte_kni *kni;
+};
+
+static void *
+rte_port_kni_writer_create(void *params, int socket_id)
+{
+	struct rte_port_kni_writer_params *conf =
+			(struct rte_port_kni_writer_params *) params;
+	struct rte_port_kni_writer *port;
+
+	/* Check input parameters */
+	if ((conf == NULL) ||
+		(conf->tx_burst_sz == 0) ||
+		(conf->tx_burst_sz > RTE_PORT_IN_BURST_SIZE_MAX) ||
+		(!rte_is_power_of_2(conf->tx_burst_sz))) {
+		RTE_LOG(ERR, PORT, "%s: Invalid input parameters\n", __func__);
+		return NULL;
+	}
+
+	/* Memory allocation */
+	port = rte_zmalloc_socket("PORT", sizeof(*port),
+		RTE_CACHE_LINE_SIZE, socket_id);
+	if (port == NULL) {
+		RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__);
+		return NULL;
+	}
+
+	/* Initialization */
+	port->kni = conf->kni;
+	port->tx_burst_sz = conf->tx_burst_sz;
+	port->tx_buf_count = 0;
+	port->bsz_mask = 1LLU << (conf->tx_burst_sz - 1);
+
+	return port;
+}
+
+static inline void
+send_burst(struct rte_port_kni_writer *p)
+{
+	uint32_t nb_tx;
+
+	nb_tx = rte_kni_tx_burst(p->kni, p->tx_buf, p->tx_buf_count);
+
+	RTE_PORT_KNI_WRITER_STATS_PKTS_DROP_ADD(p, p->tx_buf_count - nb_tx);
+	for (; nb_tx < p->tx_buf_count; nb_tx++)
+		rte_pktmbuf_free(p->tx_buf[nb_tx]);
+
+	p->tx_buf_count = 0;
+}
+
+static int
+rte_port_kni_writer_tx(void *port, struct rte_mbuf *pkt)
+{
+	struct rte_port_kni_writer *p =
+			(struct rte_port_kni_writer *) port;
+
+	p->tx_buf[p->tx_buf_count++] = pkt;
+	RTE_PORT_KNI_WRITER_STATS_PKTS_IN_ADD(p, 1);
+	if (p->tx_buf_count >= p->tx_burst_sz)
+		send_burst(p);
+
+	return 0;
+}
+
+static int
+rte_port_kni_writer_tx_bulk(void *port,
+	struct rte_mbuf **pkts,
+	uint64_t pkts_mask)
+{
+	struct rte_port_kni_writer *p =
+			(struct rte_port_kni_writer *) port;
+	uint64_t bsz_mask = p->bsz_mask;
+	uint32_t tx_buf_count = p->tx_buf_count;
+	uint64_t expr = (pkts_mask & (pkts_mask + 1)) |
+					((pkts_mask & bsz_mask) ^ bsz_mask);
+
+	if (expr == 0) {
+		uint64_t n_pkts = __builtin_popcountll(pkts_mask);
+		uint32_t n_pkts_ok;
+
+		if (tx_buf_count)
+			send_burst(p);
+
+		RTE_PORT_KNI_WRITER_STATS_PKTS_IN_ADD(p, n_pkts);
+		n_pkts_ok = rte_kni_tx_burst(p->kni, pkts, n_pkts);
+
+		RTE_PORT_KNI_WRITER_STATS_PKTS_DROP_ADD(p, n_pkts - n_pkts_ok);
+		for (; n_pkts_ok < n_pkts; n_pkts_ok++) {
+			struct rte_mbuf *pkt = pkts[n_pkts_ok];
+
+			rte_pktmbuf_free(pkt);
+		}
+	} else {
+		for (; pkts_mask;) {
+			uint32_t pkt_index = __builtin_ctzll(pkts_mask);
+			uint64_t pkt_mask = 1LLU << pkt_index;
+			struct rte_mbuf *pkt = pkts[pkt_index];
+
+			p->tx_buf[tx_buf_count++] = pkt;
+			RTE_PORT_KNI_WRITER_STATS_PKTS_IN_ADD(p, 1);
+			pkts_mask &= ~pkt_mask;
+		}
+
+		p->tx_buf_count = tx_buf_count;
+		if (tx_buf_count >= p->tx_burst_sz)
+			send_burst(p);
+	}
+
+	return 0;
+}
+
+static int
+rte_port_kni_writer_flush(void *port)
+{
+	struct rte_port_kni_writer *p =
+			(struct rte_port_kni_writer *) port;
+
+	if (p->tx_buf_count > 0)
+		send_burst(p);
+
+	return 0;
+}
+
+static int
+rte_port_kni_writer_free(void *port)
+{
+	if (port == NULL) {
+		RTE_LOG(ERR, PORT, "%s: Port is NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	rte_port_kni_writer_flush(port);
+	rte_free(port);
+
+	return 0;
+}
+
+static int rte_port_kni_writer_stats_read(void *port,
+	struct rte_port_out_stats *stats, int clear)
+{
+	struct rte_port_kni_writer *p =
+			(struct rte_port_kni_writer *) port;
+
+	if (stats != NULL)
+		memcpy(stats, &p->stats, sizeof(p->stats));
+
+	if (clear)
+		memset(&p->stats, 0, sizeof(p->stats));
+
+	return 0;
+}
+
+/*
+ * Port KNI Writer Nodrop
+ */
+#ifdef RTE_PORT_STATS_COLLECT
+
+#define RTE_PORT_KNI_WRITER_NODROP_STATS_PKTS_IN_ADD(port, val) \
+	port->stats.n_pkts_in += val
+#define RTE_PORT_KNI_WRITER_NODROP_STATS_PKTS_DROP_ADD(port, val) \
+	port->stats.n_pkts_drop += val
+
+#else
+
+#define RTE_PORT_KNI_WRITER_NODROP_STATS_PKTS_IN_ADD(port, val)
+#define RTE_PORT_KNI_WRITER_NODROP_STATS_PKTS_DROP_ADD(port, val)
+
+#endif
+
+struct rte_port_kni_writer_nodrop {
+	struct rte_port_out_stats stats;
+
+	struct rte_mbuf *tx_buf[2 * RTE_PORT_IN_BURST_SIZE_MAX];
+	uint32_t tx_burst_sz;
+	uint32_t tx_buf_count;
+	uint64_t bsz_mask;
+	uint64_t n_retries;
+	struct rte_kni *kni;
+};
+
+static void *
+rte_port_kni_writer_nodrop_create(void *params, int socket_id)
+{
+	struct rte_port_kni_writer_nodrop_params *conf =
+		(struct rte_port_kni_writer_nodrop_params *) params;
+	struct rte_port_kni_writer_nodrop *port;
+
+	/* Check input parameters */
+	if ((conf == NULL) ||
+		(conf->tx_burst_sz == 0) ||
+		(conf->tx_burst_sz > RTE_PORT_IN_BURST_SIZE_MAX) ||
+		(!rte_is_power_of_2(conf->tx_burst_sz))) {
+		RTE_LOG(ERR, PORT, "%s: Invalid input parameters\n", __func__);
+		return NULL;
+	}
+
+	/* Memory allocation */
+	port = rte_zmalloc_socket("PORT", sizeof(*port),
+		RTE_CACHE_LINE_SIZE, socket_id);
+	if (port == NULL) {
+		RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__);
+		return NULL;
+	}
+
+	/* Initialization */
+	port->kni = conf->kni;
+	port->tx_burst_sz = conf->tx_burst_sz;
+	port->tx_buf_count = 0;
+	port->bsz_mask = 1LLU << (conf->tx_burst_sz - 1);
+
+	/*
+	 * When n_retries is 0 it means that we should wait for every packet to
+	 * send no matter how many retries should it take. To limit number of
+	 * branches in fast path, we use UINT64_MAX instead of branching.
+	 */
+	port->n_retries = (conf->n_retries == 0) ? UINT64_MAX : conf->n_retries;
+
+	return port;
+}
+
+static inline void
+send_burst_nodrop(struct rte_port_kni_writer_nodrop *p)
+{
+	uint32_t nb_tx = 0, i;
+
+	nb_tx = rte_kni_tx_burst(p->kni, p->tx_buf, p->tx_buf_count);
+
+	/* We sent all the packets in a first try */
+	if (nb_tx >= p->tx_buf_count) {
+		p->tx_buf_count = 0;
+		return;
+	}
+
+	for (i = 0; i < p->n_retries; i++) {
+		nb_tx += rte_kni_tx_burst(p->kni,
+			p->tx_buf + nb_tx,
+			p->tx_buf_count - nb_tx);
+
+		/* We sent all the packets in more than one try */
+		if (nb_tx >= p->tx_buf_count) {
+			p->tx_buf_count = 0;
+			return;
+		}
+	}
+
+	/* We didn't send the packets in maximum allowed attempts */
+	RTE_PORT_KNI_WRITER_NODROP_STATS_PKTS_DROP_ADD(p, p->tx_buf_count - nb_tx);
+	for ( ; nb_tx < p->tx_buf_count; nb_tx++)
+		rte_pktmbuf_free(p->tx_buf[nb_tx]);
+
+	p->tx_buf_count = 0;
+}
+
+static int
+rte_port_kni_writer_nodrop_tx(void *port, struct rte_mbuf *pkt)
+{
+	struct rte_port_kni_writer_nodrop *p =
+			(struct rte_port_kni_writer_nodrop *) port;
+
+	p->tx_buf[p->tx_buf_count++] = pkt;
+	RTE_PORT_KNI_WRITER_STATS_PKTS_IN_ADD(p, 1);
+	if (p->tx_buf_count >= p->tx_burst_sz)
+		send_burst_nodrop(p);
+
+	return 0;
+}
+
+static int
+rte_port_kni_writer_nodrop_tx_bulk(void *port,
+	struct rte_mbuf **pkts,
+	uint64_t pkts_mask)
+{
+	struct rte_port_kni_writer_nodrop *p =
+			(struct rte_port_kni_writer_nodrop *) port;
+
+	uint64_t bsz_mask = p->bsz_mask;
+	uint32_t tx_buf_count = p->tx_buf_count;
+	uint64_t expr = (pkts_mask & (pkts_mask + 1)) |
+					((pkts_mask & bsz_mask) ^ bsz_mask);
+
+	if (expr == 0) {
+		uint64_t n_pkts = __builtin_popcountll(pkts_mask);
+		uint32_t n_pkts_ok;
+
+		if (tx_buf_count)
+			send_burst_nodrop(p);
+
+		RTE_PORT_KNI_WRITER_NODROP_STATS_PKTS_IN_ADD(p, n_pkts);
+		n_pkts_ok = rte_kni_tx_burst(p->kni, pkts, n_pkts);
+
+		if (n_pkts_ok >= n_pkts)
+			return 0;
+
+		/*
+		 * If we didn't manage to send all packets in single burst, move
+		 * remaining packets to the buffer and call send burst.
+		 */
+		for (; n_pkts_ok < n_pkts; n_pkts_ok++) {
+			struct rte_mbuf *pkt = pkts[n_pkts_ok];
+			p->tx_buf[p->tx_buf_count++] = pkt;
+		}
+		send_burst_nodrop(p);
+	} else {
+		for ( ; pkts_mask; ) {
+			uint32_t pkt_index = __builtin_ctzll(pkts_mask);
+			uint64_t pkt_mask = 1LLU << pkt_index;
+			struct rte_mbuf *pkt = pkts[pkt_index];
+
+			p->tx_buf[tx_buf_count++] = pkt;
+			RTE_PORT_KNI_WRITER_NODROP_STATS_PKTS_IN_ADD(p, 1);
+			pkts_mask &= ~pkt_mask;
+		}
+
+		p->tx_buf_count = tx_buf_count;
+		if (tx_buf_count >= p->tx_burst_sz)
+			send_burst_nodrop(p);
+	}
+
+	return 0;
+}
+
+static int
+rte_port_kni_writer_nodrop_flush(void *port)
+{
+	struct rte_port_kni_writer_nodrop *p =
+		(struct rte_port_kni_writer_nodrop *) port;
+
+	if (p->tx_buf_count > 0)
+		send_burst_nodrop(p);
+
+	return 0;
+}
+
+static int
+rte_port_kni_writer_nodrop_free(void *port)
+{
+	if (port == NULL) {
+		RTE_LOG(ERR, PORT, "%s: Port is NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	rte_port_kni_writer_nodrop_flush(port);
+	rte_free(port);
+
+	return 0;
+}
+
+static int rte_port_kni_writer_nodrop_stats_read(void *port,
+	struct rte_port_out_stats *stats, int clear)
+{
+	struct rte_port_kni_writer_nodrop *p =
+			(struct rte_port_kni_writer_nodrop *) port;
+
+	if (stats != NULL)
+		memcpy(stats, &p->stats, sizeof(p->stats));
+
+	if (clear)
+		memset(&p->stats, 0, sizeof(p->stats));
+
+	return 0;
+}
+
+
+/*
+ * Summary of port operations
+ */
+struct rte_port_in_ops rte_port_kni_reader_ops = {
+	.f_create = rte_port_kni_reader_create,
+	.f_free = rte_port_kni_reader_free,
+	.f_rx = rte_port_kni_reader_rx,
+	.f_stats = rte_port_kni_reader_stats_read,
+};
+
+struct rte_port_out_ops rte_port_kni_writer_ops = {
+	.f_create = rte_port_kni_writer_create,
+	.f_free = rte_port_kni_writer_free,
+	.f_tx = rte_port_kni_writer_tx,
+	.f_tx_bulk = rte_port_kni_writer_tx_bulk,
+	.f_flush = rte_port_kni_writer_flush,
+	.f_stats = rte_port_kni_writer_stats_read,
+};
+
+struct rte_port_out_ops rte_port_kni_writer_nodrop_ops = {
+	.f_create = rte_port_kni_writer_nodrop_create,
+	.f_free = rte_port_kni_writer_nodrop_free,
+	.f_tx = rte_port_kni_writer_nodrop_tx,
+	.f_tx_bulk = rte_port_kni_writer_nodrop_tx_bulk,
+	.f_flush = rte_port_kni_writer_nodrop_flush,
+	.f_stats = rte_port_kni_writer_nodrop_stats_read,
+};
diff --git a/lib/librte_vhost/virtio-net.h b/lib/librte_port/rte_port_kni.h
index 75fb57e5..4b60689c 100644
--- a/lib/librte_vhost/virtio-net.h
+++ b/lib/librte_port/rte_port_kni.h
@@ -1,7 +1,8 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2016 Ethan Zhuang <zhuangwj@gmail.com>.
+ *   Copyright(c) 2016 Intel Corporation.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -31,13 +32,64 @@
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#ifndef _VIRTIO_NET_H
-#define _VIRTIO_NET_H
+#ifndef __INCLUDE_RTE_PORT_KNI_H__
+#define __INCLUDE_RTE_PORT_KNI_H__
 
-#include "vhost-net.h"
-#include "rte_virtio_net.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Port KNI Interface
+ *
+ * kni_reader: input port built on top of pre-initialized KNI interface
+ * kni_writer: output port built on top of pre-initialized KNI interface
+ *
+ ***/
+
+#include <stdint.h>
+
+#include <rte_kni.h>
+
+#include "rte_port.h"
+
+/** kni_reader port parameters */
+struct rte_port_kni_reader_params {
+	/** KNI interface reference */
+	struct rte_kni *kni;
+};
 
-struct virtio_net_device_ops const *notify_ops;
-struct virtio_net *get_device(struct vhost_device_ctx ctx);
+/** kni_reader port operations */
+extern struct rte_port_in_ops rte_port_kni_reader_ops;
+
+
+/** kni_writer port parameters */
+struct rte_port_kni_writer_params {
+	/** KNI interface reference */
+	struct rte_kni *kni;
+	/** Burst size to KNI interface. */
+	uint32_t tx_burst_sz;
+};
+
+/** kni_writer port operations */
+extern struct rte_port_out_ops rte_port_kni_writer_ops;
+
+/** kni_writer_nodrop port parameters */
+struct rte_port_kni_writer_nodrop_params {
+	/** KNI interface reference */
+	struct rte_kni *kni;
+	/** Burst size to KNI interface. */
+	uint32_t tx_burst_sz;
+	/** Maximum number of retries, 0 for no limit */
+	uint32_t n_retries;
+};
+
+/** kni_writer_nodrop port operations */
+extern struct rte_port_out_ops rte_port_kni_writer_nodrop_ops;
+
+#ifdef __cplusplus
+}
+#endif
 
 #endif
diff --git a/lib/librte_port/rte_port_source_sink.c b/lib/librte_port/rte_port_source_sink.c
index 056c9756..4cad7109 100644
--- a/lib/librte_port/rte_port_source_sink.c
+++ b/lib/librte_port/rte_port_source_sink.c
@@ -38,17 +38,11 @@
 #include <rte_malloc.h>
 #include <rte_memcpy.h>
 
-#ifdef RTE_NEXT_ABI
-
 #ifdef RTE_PORT_PCAP
 #include <rte_ether.h>
 #include <pcap.h>
 #endif
 
-#else
-#undef RTE_PORT_PCAP
-#endif
-
 #include "rte_port_source_sink.h"
 
 /*
@@ -81,8 +75,6 @@ struct rte_port_source {
 	uint32_t pkt_index;
 };
 
-#ifdef RTE_NEXT_ABI
-
 #ifdef RTE_PORT_PCAP
 
 static int
@@ -232,8 +224,6 @@ error_exit:
 
 #endif /* RTE_PORT_PCAP */
 
-#endif /* RTE_NEXT_ABI */
-
 static void *
 rte_port_source_create(void *params, int socket_id)
 {
@@ -258,8 +248,6 @@ rte_port_source_create(void *params, int socket_id)
 	/* Initialization */
 	port->mempool = (struct rte_mempool *) p->mempool;
 
-#ifdef RTE_NEXT_ABI
-
 	if (p->file_name) {
 		int status = PCAP_SOURCE_LOAD(port, p->file_name,
 			p->n_bytes_per_pkt, socket_id);
@@ -270,8 +258,6 @@ rte_port_source_create(void *params, int socket_id)
 		}
 	}
 
-#endif
-
 	return port;
 }
 
diff --git a/lib/librte_port/rte_port_source_sink.h b/lib/librte_port/rte_port_source_sink.h
index 917abe4f..4db8a8a8 100644
--- a/lib/librte_port/rte_port_source_sink.h
+++ b/lib/librte_port/rte_port_source_sink.h
@@ -53,7 +53,6 @@ extern "C" {
 struct rte_port_source_params {
 	/** Pre-initialized buffer pool */
 	struct rte_mempool *mempool;
-#ifdef RTE_NEXT_ABI
 
 	/** The full path of the pcap file to read packets from */
 	char *file_name;
@@ -62,8 +61,6 @@ struct rte_port_source_params {
 	 *  if it is bigger than packet size, the generated packets
 	 *  will contain the whole packet */
 	uint32_t n_bytes_per_pkt;
-
-#endif
 };
 
 /** source port operations */
diff --git a/lib/librte_port/rte_port_version.map b/lib/librte_port/rte_port_version.map
index 7a0b34d0..048c20d7 100644
--- a/lib/librte_port/rte_port_version.map
+++ b/lib/librte_port/rte_port_version.map
@@ -5,10 +5,8 @@ DPDK_2.0 {
 	rte_port_ethdev_writer_ops;
 	rte_port_ring_reader_ipv4_frag_ops;
 	rte_port_ring_reader_ops;
-	rte_port_ring_reader_ops;
 	rte_port_ring_writer_ipv4_ras_ops;
 	rte_port_ring_writer_ops;
-	rte_port_ring_writer_ops;
 	rte_port_sched_reader_ops;
 	rte_port_sched_writer_ops;
 	rte_port_sink_ops;
@@ -35,3 +33,12 @@ DPDK_2.2 {
 	rte_port_ring_multi_writer_nodrop_ops;
 
 } DPDK_2.1;
+
+DPDK_16.07 {
+	global:
+
+	rte_port_kni_reader_ops;
+	rte_port_kni_writer_ops;
+	rte_port_kni_writer_nodrop_ops;
+
+} DPDK_2.2;
diff --git a/lib/librte_power/guest_channel.c b/lib/librte_power/guest_channel.c
index d6b6d0aa..85c92fab 100644
--- a/lib/librte_power/guest_channel.c
+++ b/lib/librte_power/guest_channel.c
@@ -103,8 +103,10 @@ guest_channel_host_connect(const char *path, unsigned lcore_id)
 	global_fds[lcore_id] = fd;
 	ret = guest_channel_send_msg(&pkt, lcore_id);
 	if (ret != 0) {
-		RTE_LOG(ERR, GUEST_CHANNEL, "Error on channel '%s' communications "
-				"test: %s\n", fd_path, strerror(ret));
+		RTE_LOG(ERR, GUEST_CHANNEL,
+				"Error on channel '%s' communications test: %s\n",
+				fd_path, ret > 0 ? strerror(ret) :
+				"channel not connected");
 		goto error;
 	}
 	RTE_LOG(INFO, GUEST_CHANNEL, "Channel '%s' is now connected\n", fd_path);
diff --git a/lib/librte_power/rte_power_kvm_vm.c b/lib/librte_power/rte_power_kvm_vm.c
index 7bb2774c..a1badf34 100644
--- a/lib/librte_power/rte_power_kvm_vm.c
+++ b/lib/librte_power/rte_power_kvm_vm.c
@@ -106,7 +106,8 @@ send_msg(unsigned lcore_id, uint32_t scale_direction)
 	ret = guest_channel_send_msg(&pkt[lcore_id], lcore_id);
 	if (ret == 0)
 		return 1;
-	RTE_LOG(DEBUG, POWER, "Error sending message: %s\n", strerror(ret));
+	RTE_LOG(DEBUG, POWER, "Error sending message: %s\n",
+			ret > 0 ? strerror(ret) : "channel not connected");
 	return -1;
 }
 
diff --git a/lib/librte_reorder/Makefile b/lib/librte_reorder/Makefile
index 0c01de16..0d111aad 100644
--- a/lib/librte_reorder/Makefile
+++ b/lib/librte_reorder/Makefile
@@ -49,6 +49,7 @@ SYMLINK-$(CONFIG_RTE_LIBRTE_REORDER)-include := rte_reorder.h
 
 # this lib depends upon:
 DEPDIRS-$(CONFIG_RTE_LIBRTE_REORDER) += lib/librte_mbuf
+DEPDIRS-$(CONFIG_RTE_LIBRTE_REORDER) += lib/librte_mempool
 DEPDIRS-$(CONFIG_RTE_LIBRTE_REORDER) += lib/librte_eal
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_ring/rte_ring.c b/lib/librte_ring/rte_ring.c
index d80faf3b..ca0a1082 100644
--- a/lib/librte_ring/rte_ring.c
+++ b/lib/librte_ring/rte_ring.c
@@ -122,6 +122,8 @@ int
 rte_ring_init(struct rte_ring *r, const char *name, unsigned count,
 	unsigned flags)
 {
+	int ret;
+
 	/* compilation-time checks */
 	RTE_BUILD_BUG_ON((sizeof(struct rte_ring) &
 			  RTE_CACHE_LINE_MASK) != 0);
@@ -140,7 +142,9 @@ rte_ring_init(struct rte_ring *r, const char *name, unsigned count,
 
 	/* init the ring structure */
 	memset(r, 0, sizeof(*r));
-	snprintf(r->name, sizeof(r->name), "%s", name);
+	ret = snprintf(r->name, sizeof(r->name), "%s", name);
+	if (ret < 0 || ret >= (int)sizeof(r->name))
+		return -ENAMETOOLONG;
 	r->flags = flags;
 	r->prod.watermark = count;
 	r->prod.sp_enqueue = !!(flags & RING_F_SP_ENQ);
@@ -165,6 +169,7 @@ rte_ring_create(const char *name, unsigned count, int socket_id,
 	ssize_t ring_size;
 	int mz_flags = 0;
 	struct rte_ring_list* ring_list = NULL;
+	int ret;
 
 	ring_list = RTE_TAILQ_CAST(rte_ring_tailq.head, rte_ring_list);
 
@@ -174,6 +179,13 @@ rte_ring_create(const char *name, unsigned count, int socket_id,
 		return NULL;
 	}
 
+	ret = snprintf(mz_name, sizeof(mz_name), "%s%s",
+		RTE_RING_MZ_PREFIX, name);
+	if (ret < 0 || ret >= (int)sizeof(mz_name)) {
+		rte_errno = ENAMETOOLONG;
+		return NULL;
+	}
+
 	te = rte_zmalloc("RING_TAILQ_ENTRY", sizeof(*te), 0);
 	if (te == NULL) {
 		RTE_LOG(ERR, RING, "Cannot reserve memory for tailq\n");
@@ -181,8 +193,6 @@ rte_ring_create(const char *name, unsigned count, int socket_id,
 		return NULL;
 	}
 
-	snprintf(mz_name, sizeof(mz_name), "%s%s", RTE_RING_MZ_PREFIX, name);
-
 	rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
 
 	/* reserve a memory zone for this ring. If we can't get rte_config or
diff --git a/lib/librte_sched/Makefile b/lib/librte_sched/Makefile
index a782cd1a..44cb780f 100644
--- a/lib/librte_sched/Makefile
+++ b/lib/librte_sched/Makefile
@@ -59,6 +59,7 @@ SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include := rte_sched.h rte_bitmap.h rte_sched
 SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include += rte_reciprocal.h
 
 # this lib depends upon:
+DEPDIRS-$(CONFIG_RTE_LIBRTE_SCHED) += lib/librte_eal
 DEPDIRS-$(CONFIG_RTE_LIBRTE_SCHED) += lib/librte_mempool lib/librte_mbuf
 DEPDIRS-$(CONFIG_RTE_LIBRTE_SCHED) += lib/librte_net lib/librte_timer
 
diff --git a/lib/librte_sched/rte_red.c b/lib/librte_sched/rte_red.c
index fdf40576..ade57d1f 100644
--- a/lib/librte_sched/rte_red.c
+++ b/lib/librte_sched/rte_red.c
@@ -77,7 +77,7 @@ __rte_red_init_tables(void)
 
 	scale = 1024.0;
 
-	RTE_RED_ASSERT(RTE_RED_WQ_LOG2_NUM == RTE_DIM(rte_red_log2_1_minus_Wq));
+	RTE_ASSERT(RTE_RED_WQ_LOG2_NUM == RTE_DIM(rte_red_log2_1_minus_Wq));
 
 	for (i = RTE_RED_WQ_LOG2_MIN; i <= RTE_RED_WQ_LOG2_MAX; i++) {
 		double n = (double)i;
diff --git a/lib/librte_sched/rte_red.h b/lib/librte_sched/rte_red.h
index 7f9ac901..ca122275 100644
--- a/lib/librte_sched/rte_red.h
+++ b/lib/librte_sched/rte_red.h
@@ -63,19 +63,6 @@ extern "C" {
 #define RTE_RED_INT16_NBITS                 (sizeof(uint16_t) * CHAR_BIT)
 #define RTE_RED_WQ_LOG2_NUM                 (RTE_RED_WQ_LOG2_MAX - RTE_RED_WQ_LOG2_MIN + 1)
 
-#ifdef RTE_RED_DEBUG
-
-#define RTE_RED_ASSERT(exp)                                      \
-if (!(exp)) {                                                    \
-	rte_panic("line%d\tassert \"" #exp "\" failed\n", __LINE__); \
-}
-
-#else
-
-#define RTE_RED_ASSERT(exp)                 do { } while(0)
-
-#endif /* RTE_RED_DEBUG */
-
 /**
  * Externs
  *
@@ -246,8 +233,8 @@ rte_red_enqueue_empty(const struct rte_red_config *red_cfg,
 {
 	uint64_t time_diff = 0, m = 0;
 
-	RTE_RED_ASSERT(red_cfg != NULL);
-	RTE_RED_ASSERT(red != NULL);
+	RTE_ASSERT(red_cfg != NULL);
+	RTE_ASSERT(red != NULL);
 
 	red->count ++;
 
@@ -361,8 +348,8 @@ rte_red_enqueue_nonempty(const struct rte_red_config *red_cfg,
 	struct rte_red *red,
 	const unsigned q)
 {
-	RTE_RED_ASSERT(red_cfg != NULL);
-	RTE_RED_ASSERT(red != NULL);
+	RTE_ASSERT(red_cfg != NULL);
+	RTE_ASSERT(red != NULL);
 
 	/**
 	* EWMA filter (Sally Floyd and Van Jacobson):
@@ -424,8 +411,8 @@ rte_red_enqueue(const struct rte_red_config *red_cfg,
 	const unsigned q,
 	const uint64_t time)
 {
-	RTE_RED_ASSERT(red_cfg != NULL);
-	RTE_RED_ASSERT(red != NULL);
+	RTE_ASSERT(red_cfg != NULL);
+	RTE_ASSERT(red != NULL);
 
 	if (q != 0) {
 		return rte_red_enqueue_nonempty(red_cfg, red, q);
diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c
index 1609ea87..86964234 100644
--- a/lib/librte_sched/rte_sched.c
+++ b/lib/librte_sched/rte_sched.c
@@ -1084,10 +1084,17 @@ rte_sched_port_update_subport_stats(struct rte_sched_port *port, uint32_t qindex
 	s->stats.n_bytes_tc[tc_index] += pkt_len;
 }
 
+#ifdef RTE_SCHED_RED
 static inline void
 rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
-					    uint32_t qindex,
-					    struct rte_mbuf *pkt, uint32_t red)
+						uint32_t qindex,
+						struct rte_mbuf *pkt, uint32_t red)
+#else
+static inline void
+rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
+						uint32_t qindex,
+						struct rte_mbuf *pkt, __rte_unused uint32_t red)
+#endif
 {
 	struct rte_sched_subport *s = port->subport + (qindex / rte_sched_port_queues_per_subport(port));
 	uint32_t tc_index = (qindex >> 2) & 0x3;
@@ -1110,10 +1117,17 @@ rte_sched_port_update_queue_stats(struct rte_sched_port *port, uint32_t qindex,
 	qe->stats.n_bytes += pkt_len;
 }
 
+#ifdef RTE_SCHED_RED
 static inline void
 rte_sched_port_update_queue_stats_on_drop(struct rte_sched_port *port,
-					  uint32_t qindex,
-					  struct rte_mbuf *pkt, uint32_t red)
+						uint32_t qindex,
+						struct rte_mbuf *pkt, uint32_t red)
+#else
+static inline void
+rte_sched_port_update_queue_stats_on_drop(struct rte_sched_port *port,
+						uint32_t qindex,
+						struct rte_mbuf *pkt, __rte_unused uint32_t red)
+#endif
 {
 	struct rte_sched_queue_extra *qe = port->queue_extra + qindex;
 	uint32_t pkt_len = pkt->pkt_len;
diff --git a/lib/librte_table/rte_table_acl.c b/lib/librte_table/rte_table_acl.c
index c1eb8488..8f1f8ceb 100644
--- a/lib/librte_table/rte_table_acl.c
+++ b/lib/librte_table/rte_table_acl.c
@@ -236,8 +236,6 @@ rte_table_acl_build(struct rte_table_acl *acl, struct rte_acl_ctx **acl_ctx)
 		return -1;
 	}
 
-	rte_acl_dump(ctx);
-
 	*acl_ctx = ctx;
 	return 0;
 }
diff --git a/lib/librte_table/rte_table_lpm.c b/lib/librte_table/rte_table_lpm.c
index cdeb0f5a..598b79f5 100644
--- a/lib/librte_table/rte_table_lpm.c
+++ b/lib/librte_table/rte_table_lpm.c
@@ -44,7 +44,9 @@
 
 #include "rte_table_lpm.h"
 
-#define RTE_TABLE_LPM_MAX_NEXT_HOPS                        256
+#ifndef RTE_TABLE_LPM_MAX_NEXT_HOPS
+#define RTE_TABLE_LPM_MAX_NEXT_HOPS                        65536
+#endif
 
 #ifdef RTE_TABLE_STATS_COLLECT
 
@@ -74,7 +76,7 @@ struct rte_table_lpm {
 
 	/* Next Hop Table (NHT) */
 	uint32_t nht_users[RTE_TABLE_LPM_MAX_NEXT_HOPS];
-	uint32_t nht[0] __rte_cache_aligned;
+	uint8_t nht[0] __rte_cache_aligned;
 };
 
 static void *
@@ -188,7 +190,7 @@ nht_find_existing(struct rte_table_lpm *lpm, void *entry, uint32_t *pos)
 	uint32_t i;
 
 	for (i = 0; i < RTE_TABLE_LPM_MAX_NEXT_HOPS; i++) {
-		uint32_t *nht_entry = &lpm->nht[i * lpm->entry_size];
+		uint8_t *nht_entry = &lpm->nht[i * lpm->entry_size];
 
 		if ((lpm->nht_users[i] > 0) && (memcmp(nht_entry, entry,
 			lpm->entry_unique_size) == 0)) {
@@ -242,7 +244,7 @@ rte_table_lpm_entry_add(
 
 	/* Find existing or free NHT entry */
 	if (nht_find_existing(lpm, entry, &nht_pos) == 0) {
-		uint32_t *nht_entry;
+		uint8_t *nht_entry;
 
 		if (nht_find_free(lpm, &nht_pos) == 0) {
 			RTE_LOG(ERR, TABLE, "%s: NHT full\n", __func__);
diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
index e33ff53e..538adb0b 100644
--- a/lib/librte_vhost/Makefile
+++ b/lib/librte_vhost/Makefile
@@ -36,7 +36,7 @@ LIB = librte_vhost.a
 
 EXPORT_MAP := rte_vhost_version.map
 
-LIBABIVER := 2
+LIBABIVER := 3
 
 CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 -D_FILE_OFFSET_BITS=64
 ifeq ($(CONFIG_RTE_LIBRTE_VHOST_USER),y)
@@ -66,6 +66,7 @@ SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_virtio_net.h
 DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_eal
 DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_ether
 DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_mbuf
+DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_mempool
 DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_net
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_vhost/rte_vhost_version.map b/lib/librte_vhost/rte_vhost_version.map
index 3d8709e5..5ceaa8a5 100644
--- a/lib/librte_vhost/rte_vhost_version.map
+++ b/lib/librte_vhost/rte_vhost_version.map
@@ -20,3 +20,13 @@ DPDK_2.1 {
 	rte_vhost_driver_unregister;
 
 } DPDK_2.0;
+
+DPDK_16.07 {
+	global:
+
+	rte_vhost_avail_entries;
+	rte_vhost_get_ifname;
+	rte_vhost_get_numa_node;
+	rte_vhost_get_queue_num;
+
+} DPDK_2.1;
diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
index 600b20b4..9caa6221 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -51,125 +51,12 @@
 #include <rte_mempool.h>
 #include <rte_ether.h>
 
-struct rte_mbuf;
-
-#define VHOST_MEMORY_MAX_NREGIONS 8
-
-/* Used to indicate that the device is running on a data core */
-#define VIRTIO_DEV_RUNNING 1
-
-/* Backend value set by guest. */
-#define VIRTIO_DEV_STOPPED -1
-
+#define RTE_VHOST_USER_CLIENT		(1ULL << 0)
+#define RTE_VHOST_USER_NO_RECONNECT	(1ULL << 1)
 
 /* Enum for virtqueue management. */
 enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
 
-#define BUF_VECTOR_MAX 256
-
-/**
- * Structure contains buffer address, length and descriptor index
- * from vring to do scatter RX.
- */
-struct buf_vector {
-	uint64_t buf_addr;
-	uint32_t buf_len;
-	uint32_t desc_idx;
-};
-
-/**
- * Structure contains variables relevant to RX/TX virtqueues.
- */
-struct vhost_virtqueue {
-	struct vring_desc	*desc;			/**< Virtqueue descriptor ring. */
-	struct vring_avail	*avail;			/**< Virtqueue available ring. */
-	struct vring_used	*used;			/**< Virtqueue used ring. */
-	uint32_t		size;			/**< Size of descriptor ring. */
-	uint32_t		backend;		/**< Backend value to determine if device should started/stopped. */
-	uint16_t		vhost_hlen;		/**< Vhost header length (varies depending on RX merge buffers. */
-	volatile uint16_t	last_used_idx;		/**< Last index used on the available ring */
-	volatile uint16_t	last_used_idx_res;	/**< Used for multiple devices reserving buffers. */
-#define VIRTIO_INVALID_EVENTFD		(-1)
-#define VIRTIO_UNINITIALIZED_EVENTFD	(-2)
-	int			callfd;			/**< Used to notify the guest (trigger interrupt). */
-	int			kickfd;			/**< Currently unused as polling mode is enabled. */
-	int			enabled;
-	uint64_t		log_guest_addr;		/**< Physical address of used ring, for logging */
-	uint64_t		reserved[15];		/**< Reserve some spaces for future extension. */
-	struct buf_vector	buf_vec[BUF_VECTOR_MAX];	/**< for scatter RX. */
-} __rte_cache_aligned;
-
-/* Old kernels have no such macro defined */
-#ifndef VIRTIO_NET_F_GUEST_ANNOUNCE
- #define VIRTIO_NET_F_GUEST_ANNOUNCE 21
-#endif
-
-
-/*
- * Make an extra wrapper for VIRTIO_NET_F_MQ and
- * VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX as they are
- * introduced since kernel v3.8. This makes our
- * code buildable for older kernel.
- */
-#ifdef VIRTIO_NET_F_MQ
- #define VHOST_MAX_QUEUE_PAIRS	VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX
- #define VHOST_SUPPORTS_MQ	(1ULL << VIRTIO_NET_F_MQ)
-#else
- #define VHOST_MAX_QUEUE_PAIRS	1
- #define VHOST_SUPPORTS_MQ	0
-#endif
-
-/*
- * Define virtio 1.0 for older kernels
- */
-#ifndef VIRTIO_F_VERSION_1
- #define VIRTIO_F_VERSION_1 32
-#endif
-
-/**
- * Device structure contains all configuration information relating to the device.
- */
-struct virtio_net {
-	struct virtio_memory	*mem;		/**< QEMU memory and memory region information. */
-	uint64_t		features;	/**< Negotiated feature set. */
-	uint64_t		protocol_features;	/**< Negotiated protocol feature set. */
-	uint64_t		device_fh;	/**< device identifier. */
-	uint32_t		flags;		/**< Device flags. Only used to check if device is running on data core. */
-#define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ)
-	char			ifname[IF_NAME_SZ];	/**< Name of the tap device or socket path. */
-	uint32_t		virt_qp_nb;	/**< number of queue pair we have allocated */
-	void			*priv;		/**< private context */
-	uint64_t		log_size;	/**< Size of log area */
-	uint64_t		log_base;	/**< Where dirty pages are logged */
-	struct ether_addr	mac;		/**< MAC address */
-	rte_atomic16_t		broadcast_rarp;	/**< A flag to tell if we need broadcast rarp packet */
-	uint64_t		reserved[61];	/**< Reserve some spaces for future extension. */
-	struct vhost_virtqueue	*virtqueue[VHOST_MAX_QUEUE_PAIRS * 2];	/**< Contains all virtqueue information. */
-} __rte_cache_aligned;
-
-/**
- * Information relating to memory regions including offsets to addresses in QEMUs memory file.
- */
-struct virtio_memory_regions {
-	uint64_t	guest_phys_address;	/**< Base guest physical address of region. */
-	uint64_t	guest_phys_address_end;	/**< End guest physical address of region. */
-	uint64_t	memory_size;		/**< Size of region. */
-	uint64_t	userspace_address;	/**< Base userspace address of region. */
-	uint64_t	address_offset;		/**< Offset of region for address translation. */
-};
-
-
-/**
- * Memory structure includes region and mapping information.
- */
-struct virtio_memory {
-	uint64_t	base_address;	/**< Base QEMU userspace address of the memory file. */
-	uint64_t	mapped_address;	/**< Mapped address of memory file base in our applications memory space. */
-	uint64_t	mapped_size;	/**< Total size of memory file. */
-	uint32_t	nregions;	/**< Number of memory regions. */
-	struct virtio_memory_regions      regions[0]; /**< Memory region information. */
-};
-
 /**
  * Device and vring operations.
  *
@@ -178,45 +65,13 @@ struct virtio_memory {
  *
  */
 struct virtio_net_device_ops {
-	int (*new_device)(struct virtio_net *);	/**< Add device. */
-	void (*destroy_device)(volatile struct virtio_net *);	/**< Remove device. */
-
-	int (*vring_state_changed)(struct virtio_net *dev, uint16_t queue_id, int enable);	/**< triggered when a vring is enabled or disabled */
-};
-
-static inline uint16_t __attribute__((always_inline))
-rte_vring_available_entries(struct virtio_net *dev, uint16_t queue_id)
-{
-	struct vhost_virtqueue *vq = dev->virtqueue[queue_id];
-
-	if (!vq->enabled)
-		return 0;
+	int (*new_device)(int vid);		/**< Add device. */
+	void (*destroy_device)(int vid);	/**< Remove device. */
 
-	return *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx_res;
-}
-
-/**
- * Function to convert guest physical addresses to vhost virtual addresses.
- * This is used to convert guest virtio buffer addresses.
- */
-static inline uint64_t __attribute__((always_inline))
-gpa_to_vva(struct virtio_net *dev, uint64_t guest_pa)
-{
-	struct virtio_memory_regions *region;
-	uint32_t regionidx;
-	uint64_t vhost_va = 0;
-
-	for (regionidx = 0; regionidx < dev->mem->nregions; regionidx++) {
-		region = &dev->mem->regions[regionidx];
-		if ((guest_pa >= region->guest_phys_address) &&
-			(guest_pa <= region->guest_phys_address_end)) {
-			vhost_va = region->address_offset + guest_pa;
-			break;
-		}
-	}
-	return vhost_va;
-}
+	int (*vring_state_changed)(int vid, uint16_t queue_id, int enable);	/**< triggered when a vring is enabled or disabled */
 
+	void *reserved[5]; /**< Reserved for future extension */
+};
 
 /**
  *  Disable features in feature_mask. Returns 0 on success.
@@ -231,13 +86,16 @@ int rte_vhost_feature_enable(uint64_t feature_mask);
 /* Returns currently supported vhost features */
 uint64_t rte_vhost_feature_get(void);
 
-int rte_vhost_enable_guest_notification(struct virtio_net *dev, uint16_t queue_id, int enable);
+int rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable);
 
-/* Register vhost driver. dev_name could be different for multiple instance support. */
-int rte_vhost_driver_register(const char *dev_name);
+/**
+ * Register vhost driver. path could be different for multiple
+ * instance support.
+ */
+int rte_vhost_driver_register(const char *path, uint64_t flags);
 
 /* Unregister vhost driver. This is only meaningful to vhost user. */
-int rte_vhost_driver_unregister(const char *dev_name);
+int rte_vhost_driver_unregister(const char *path);
 
 /* Register callbacks. */
 int rte_vhost_driver_callback_register(struct virtio_net_device_ops const * const);
@@ -245,12 +103,65 @@ int rte_vhost_driver_callback_register(struct virtio_net_device_ops const * cons
 int rte_vhost_driver_session_start(void);
 
 /**
+ * Get the numa node from which the virtio net device's memory
+ * is allocated.
+ *
+ * @param vid
+ *  virtio-net device ID
+ *
+ * @return
+ *  The numa node, -1 on failure
+ */
+int rte_vhost_get_numa_node(int vid);
+
+/**
+ * Get the number of queues the device supports.
+ *
+ * @param vid
+ *  virtio-net device ID
+ *
+ * @return
+ *  The number of queues, 0 on failure
+ */
+uint32_t rte_vhost_get_queue_num(int vid);
+
+/**
+ * Get the virtio net device's ifname. For vhost-cuse, ifname is the
+ * path of the char device. For vhost-user, ifname is the vhost-user
+ * socket file path.
+ *
+ * @param vid
+ *  virtio-net device ID
+ * @param buf
+ *  The buffer to stored the queried ifname
+ * @param len
+ *  The length of buf
+ *
+ * @return
+ *  0 on success, -1 on failure
+ */
+int rte_vhost_get_ifname(int vid, char *buf, size_t len);
+
+/**
+ * Get how many avail entries are left in the queue
+ *
+ * @param vid
+ *  virtio-net device ID
+ * @param queue_id
+ *  virtio queue index
+ *
+ * @return
+ *  num of avail entires left
+ */
+uint16_t rte_vhost_avail_entries(int vid, uint16_t queue_id);
+
+/**
  * This function adds buffers to the virtio devices RX virtqueue. Buffers can
  * be received from the physical port or from another virtual device. A packet
  * count is returned to indicate the number of packets that were succesfully
  * added to the RX queue.
- * @param dev
- *  virtio-net device
+ * @param vid
+ *  virtio-net device ID
  * @param queue_id
  *  virtio queue index in mq case
  * @param pkts
@@ -260,14 +171,14 @@ int rte_vhost_driver_session_start(void);
  * @return
  *  num of packets enqueued
  */
-uint16_t rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,
+uint16_t rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
 	struct rte_mbuf **pkts, uint16_t count);
 
 /**
  * This function gets guest buffers from the virtio device TX virtqueue,
  * construct host mbufs, copies guest buffer content to host mbufs and
  * store them in pkts to be processed.
- * @param dev
+ * @param vid
  *  virtio-net device
  * @param queue_id
  *  virtio queue index in mq case
@@ -280,7 +191,7 @@ uint16_t rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,
  * @return
  *  num of packets dequeued
  */
-uint16_t rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
+uint16_t rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count);
 
 #endif /* _VIRTIO_NET_H_ */
diff --git a/lib/librte_vhost/vhost-net.h b/lib/librte_vhost/vhost-net.h
index f193a1f6..38593a29 100644
--- a/lib/librte_vhost/vhost-net.h
+++ b/lib/librte_vhost/vhost-net.h
@@ -43,6 +43,128 @@
 
 #include "rte_virtio_net.h"
 
+/* Used to indicate that the device is running on a data core */
+#define VIRTIO_DEV_RUNNING 1
+
+/* Backend value set by guest. */
+#define VIRTIO_DEV_STOPPED -1
+
+#define BUF_VECTOR_MAX 256
+
+/**
+ * Structure contains buffer address, length and descriptor index
+ * from vring to do scatter RX.
+ */
+struct buf_vector {
+	uint64_t buf_addr;
+	uint32_t buf_len;
+	uint32_t desc_idx;
+};
+
+/**
+ * Structure contains variables relevant to RX/TX virtqueues.
+ */
+struct vhost_virtqueue {
+	struct vring_desc	*desc;
+	struct vring_avail	*avail;
+	struct vring_used	*used;
+	uint32_t		size;
+
+	/* Last index used on the available ring */
+	volatile uint16_t	last_used_idx;
+#define VIRTIO_INVALID_EVENTFD		(-1)
+#define VIRTIO_UNINITIALIZED_EVENTFD	(-2)
+
+	/* Backend value to determine if device should started/stopped */
+	int			backend;
+	/* Used to notify the guest (trigger interrupt) */
+	int			callfd;
+	/* Currently unused as polling mode is enabled */
+	int			kickfd;
+	int			enabled;
+
+	/* Physical address of used ring, for logging */
+	uint64_t		log_guest_addr;
+} __rte_cache_aligned;
+
+/* Old kernels have no such macro defined */
+#ifndef VIRTIO_NET_F_GUEST_ANNOUNCE
+ #define VIRTIO_NET_F_GUEST_ANNOUNCE 21
+#endif
+
+
+/*
+ * Make an extra wrapper for VIRTIO_NET_F_MQ and
+ * VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX as they are
+ * introduced since kernel v3.8. This makes our
+ * code buildable for older kernel.
+ */
+#ifdef VIRTIO_NET_F_MQ
+ #define VHOST_MAX_QUEUE_PAIRS	VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX
+ #define VHOST_SUPPORTS_MQ	(1ULL << VIRTIO_NET_F_MQ)
+#else
+ #define VHOST_MAX_QUEUE_PAIRS	1
+ #define VHOST_SUPPORTS_MQ	0
+#endif
+
+/*
+ * Define virtio 1.0 for older kernels
+ */
+#ifndef VIRTIO_F_VERSION_1
+ #define VIRTIO_F_VERSION_1 32
+#endif
+
+/**
+ * Device structure contains all configuration information relating
+ * to the device.
+ */
+struct virtio_net {
+	/* Frontend (QEMU) memory and memory region information */
+	struct virtio_memory	*mem;
+	uint64_t		features;
+	uint64_t		protocol_features;
+	int			vid;
+	uint32_t		flags;
+	uint16_t		vhost_hlen;
+	/* to tell if we need broadcast rarp packet */
+	rte_atomic16_t		broadcast_rarp;
+	uint32_t		virt_qp_nb;
+	struct vhost_virtqueue	*virtqueue[VHOST_MAX_QUEUE_PAIRS * 2];
+#define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ)
+	char			ifname[IF_NAME_SZ];
+	uint64_t		log_size;
+	uint64_t		log_base;
+	uint64_t		log_addr;
+	struct ether_addr	mac;
+
+} __rte_cache_aligned;
+
+/**
+ * Information relating to memory regions including offsets to
+ * addresses in QEMUs memory file.
+ */
+struct virtio_memory_regions {
+	uint64_t guest_phys_address;
+	uint64_t guest_phys_address_end;
+	uint64_t memory_size;
+	uint64_t userspace_address;
+	uint64_t address_offset;
+};
+
+
+/**
+ * Memory structure includes region and mapping information.
+ */
+struct virtio_memory {
+	/* Base QEMU userspace address of the memory file. */
+	uint64_t base_address;
+	uint64_t mapped_address;
+	uint64_t mapped_size;
+	uint32_t nregions;
+	struct virtio_memory_regions regions[0];
+};
+
+
 /* Macros for printing using RTE_LOG */
 #define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1
 #define RTE_LOGTYPE_VHOST_DATA   RTE_LOGTYPE_USER1
@@ -57,9 +179,9 @@
 	char packet[VHOST_MAX_PRINT_BUFF]; \
 	\
 	if ((header)) \
-		snprintf(packet, VHOST_MAX_PRINT_BUFF, "(%" PRIu64 ") Header size %d: ", (device->device_fh), (size)); \
+		snprintf(packet, VHOST_MAX_PRINT_BUFF, "(%d) Header size %d: ", (device->vid), (size)); \
 	else \
-		snprintf(packet, VHOST_MAX_PRINT_BUFF, "(%" PRIu64 ") Packet size %d: ", (device->device_fh), (size)); \
+		snprintf(packet, VHOST_MAX_PRINT_BUFF, "(%d) Packet size %d: ", (device->vid), (size)); \
 	for (index = 0; index < (size); index++) { \
 		snprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), \
 			"%02hhx ", pkt_addr[index]); \
@@ -74,37 +196,51 @@
 #define PRINT_PACKET(device, addr, size, header) do {} while (0)
 #endif
 
-
-/*
- * Structure used to identify device context.
+/**
+ * Function to convert guest physical addresses to vhost virtual addresses.
+ * This is used to convert guest virtio buffer addresses.
  */
-struct vhost_device_ctx {
-	pid_t		pid;	/* PID of process calling the IOCTL. */
-	uint64_t	fh;	/* Populated with fi->fh to track the device index. */
-};
-
-int vhost_new_device(struct vhost_device_ctx);
-void vhost_destroy_device(struct vhost_device_ctx);
-
-void vhost_set_ifname(struct vhost_device_ctx,
-	const char *if_name, unsigned int if_len);
-
-int vhost_get_features(struct vhost_device_ctx, uint64_t *);
-int vhost_set_features(struct vhost_device_ctx, uint64_t *);
-
-int vhost_set_vring_num(struct vhost_device_ctx, struct vhost_vring_state *);
-int vhost_set_vring_addr(struct vhost_device_ctx, struct vhost_vring_addr *);
-int vhost_set_vring_base(struct vhost_device_ctx, struct vhost_vring_state *);
-int vhost_get_vring_base(struct vhost_device_ctx,
-	uint32_t, struct vhost_vring_state *);
-
-int vhost_set_vring_kick(struct vhost_device_ctx, struct vhost_vring_file *);
-int vhost_set_vring_call(struct vhost_device_ctx, struct vhost_vring_file *);
-
-int vhost_set_backend(struct vhost_device_ctx, struct vhost_vring_file *);
-
-int vhost_set_owner(struct vhost_device_ctx);
-int vhost_reset_owner(struct vhost_device_ctx);
+static inline uint64_t __attribute__((always_inline))
+gpa_to_vva(struct virtio_net *dev, uint64_t guest_pa)
+{
+	struct virtio_memory_regions *region;
+	uint32_t regionidx;
+	uint64_t vhost_va = 0;
+
+	for (regionidx = 0; regionidx < dev->mem->nregions; regionidx++) {
+		region = &dev->mem->regions[regionidx];
+		if ((guest_pa >= region->guest_phys_address) &&
+			(guest_pa <= region->guest_phys_address_end)) {
+			vhost_va = region->address_offset + guest_pa;
+			break;
+		}
+	}
+	return vhost_va;
+}
+
+struct virtio_net_device_ops const *notify_ops;
+struct virtio_net *get_device(int vid);
+
+int vhost_new_device(void);
+void vhost_destroy_device(int);
+
+void vhost_set_ifname(int, const char *if_name, unsigned int if_len);
+
+int vhost_get_features(int, uint64_t *);
+int vhost_set_features(int, uint64_t *);
+
+int vhost_set_vring_num(int, struct vhost_vring_state *);
+int vhost_set_vring_addr(int, struct vhost_vring_addr *);
+int vhost_set_vring_base(int, struct vhost_vring_state *);
+int vhost_get_vring_base(int, uint32_t, struct vhost_vring_state *);
+
+int vhost_set_vring_kick(int, struct vhost_vring_file *);
+int vhost_set_vring_call(int, struct vhost_vring_file *);
+
+int vhost_set_backend(int, struct vhost_vring_file *);
+
+int vhost_set_owner(int);
+int vhost_reset_owner(int);
 
 /*
  * Backend-specific cleanup. Defined by vhost-cuse and vhost-user.
diff --git a/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c b/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
index c613e68e..5d150116 100644
--- a/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
+++ b/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
@@ -60,17 +60,18 @@ static const char default_cdev[] = "vhost-net";
 static struct fuse_session *session;
 
 /*
- * Returns vhost_device_ctx from given fuse_req_t. The index is populated later
- * when the device is added to the device linked list.
+ * Returns vhost_cuse_device_ctx from given fuse_req_t. The
+ * index is populated later when the device is added to the
+ * device linked list.
  */
-static struct vhost_device_ctx
+static struct vhost_cuse_device_ctx
 fuse_req_to_vhost_ctx(fuse_req_t req, struct fuse_file_info *fi)
 {
-	struct vhost_device_ctx ctx;
+	struct vhost_cuse_device_ctx ctx;
 	struct fuse_ctx const *const req_ctx = fuse_req_ctx(req);
 
 	ctx.pid = req_ctx->pid;
-	ctx.fh = fi->fh;
+	ctx.vid = (int)fi->fh;
 
 	return ctx;
 }
@@ -82,19 +83,18 @@ fuse_req_to_vhost_ctx(fuse_req_t req, struct fuse_file_info *fi)
 static void
 vhost_net_open(fuse_req_t req, struct fuse_file_info *fi)
 {
-	struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
-	int err = 0;
+	int vid = 0;
 
-	err = vhost_new_device(ctx);
-	if (err == -1) {
+	vid = vhost_new_device();
+	if (vid == -1) {
 		fuse_reply_err(req, EPERM);
 		return;
 	}
 
-	fi->fh = err;
+	fi->fh = vid;
 
 	RTE_LOG(INFO, VHOST_CONFIG,
-		"(%"PRIu64") Device configuration started\n", fi->fh);
+		"(%d) device configuration started\n", vid);
 	fuse_reply_open(req, fi);
 }
 
@@ -105,19 +105,19 @@ static void
 vhost_net_release(fuse_req_t req, struct fuse_file_info *fi)
 {
 	int err = 0;
-	struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
+	struct vhost_cuse_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
 
-	vhost_destroy_device(ctx);
-	RTE_LOG(INFO, VHOST_CONFIG, "(%"PRIu64") Device released\n", ctx.fh);
+	vhost_destroy_device(ctx.vid);
+	RTE_LOG(INFO, VHOST_CONFIG, "(%d) device released\n", ctx.vid);
 	fuse_reply_err(req, err);
 }
 
 /*
  * Boilerplate code for CUSE IOCTL
- * Implicit arguments: ctx, req, result.
+ * Implicit arguments: vid, req, result.
  */
 #define VHOST_IOCTL(func) do {	\
-	result = (func)(ctx);	\
+	result = (func)(vid);	\
 	fuse_reply_ioctl(req, result, NULL, 0);	\
 } while (0)
 
@@ -134,41 +134,41 @@ vhost_net_release(fuse_req_t req, struct fuse_file_info *fi)
 
 /*
  * Boilerplate code for CUSE Read IOCTL
- * Implicit arguments: ctx, req, result, in_bufsz, in_buf.
+ * Implicit arguments: vid, req, result, in_bufsz, in_buf.
  */
 #define VHOST_IOCTL_R(type, var, func) do {	\
 	if (!in_bufsz) {	\
 		VHOST_IOCTL_RETRY(sizeof(type), 0);\
 	} else {	\
 		(var) = *(const type*)in_buf;	\
-		result = func(ctx, &(var));	\
+		result = func(vid, &(var));	\
 		fuse_reply_ioctl(req, result, NULL, 0);\
 	}	\
 } while (0)
 
 /*
  * Boilerplate code for CUSE Write IOCTL
- * Implicit arguments: ctx, req, result, out_bufsz.
+ * Implicit arguments: vid, req, result, out_bufsz.
  */
 #define VHOST_IOCTL_W(type, var, func) do {	\
 	if (!out_bufsz) {	\
 		VHOST_IOCTL_RETRY(0, sizeof(type));\
 	} else {	\
-		result = (func)(ctx, &(var));\
+		result = (func)(vid, &(var));\
 		fuse_reply_ioctl(req, result, &(var), sizeof(type));\
 	} \
 } while (0)
 
 /*
  * Boilerplate code for CUSE Read/Write IOCTL
- * Implicit arguments: ctx, req, result, in_bufsz, in_buf.
+ * Implicit arguments: vid, req, result, in_bufsz, in_buf.
  */
 #define VHOST_IOCTL_RW(type1, var1, type2, var2, func) do {	\
 	if (!in_bufsz) {	\
 		VHOST_IOCTL_RETRY(sizeof(type1), sizeof(type2));\
 	} else {	\
 		(var1) = *(const type1*) (in_buf);	\
-		result = (func)(ctx, (var1), &(var2));	\
+		result = (func)(vid, (var1), &(var2));	\
 		fuse_reply_ioctl(req, result, &(var2), sizeof(type2));\
 	}	\
 } while (0)
@@ -183,18 +183,19 @@ vhost_net_ioctl(fuse_req_t req, int cmd, void *arg,
 		struct fuse_file_info *fi, __rte_unused unsigned flags,
 		const void *in_buf, size_t in_bufsz, size_t out_bufsz)
 {
-	struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
+	struct vhost_cuse_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
 	struct vhost_vring_file file;
 	struct vhost_vring_state state;
 	struct vhost_vring_addr addr;
 	uint64_t features;
 	uint32_t index;
 	int result = 0;
+	int vid = ctx.vid;
 
 	switch (cmd) {
 	case VHOST_NET_SET_BACKEND:
 		LOG_DEBUG(VHOST_CONFIG,
-			"(%"PRIu64") IOCTL: VHOST_NET_SET_BACKEND\n", ctx.fh);
+			"(%d) IOCTL: VHOST_NET_SET_BACKEND\n", ctx.vid);
 		if (!in_buf) {
 			VHOST_IOCTL_RETRY(sizeof(file), 0);
 			break;
@@ -206,32 +207,32 @@ vhost_net_ioctl(fuse_req_t req, int cmd, void *arg,
 
 	case VHOST_GET_FEATURES:
 		LOG_DEBUG(VHOST_CONFIG,
-			"(%"PRIu64") IOCTL: VHOST_GET_FEATURES\n", ctx.fh);
+			"(%d) IOCTL: VHOST_GET_FEATURES\n", vid);
 		VHOST_IOCTL_W(uint64_t, features, vhost_get_features);
 		break;
 
 	case VHOST_SET_FEATURES:
 		LOG_DEBUG(VHOST_CONFIG,
-			"(%"PRIu64") IOCTL: VHOST_SET_FEATURES\n", ctx.fh);
+			"(%d) IOCTL: VHOST_SET_FEATURES\n", vid);
 		VHOST_IOCTL_R(uint64_t, features, vhost_set_features);
 		break;
 
 	case VHOST_RESET_OWNER:
 		LOG_DEBUG(VHOST_CONFIG,
-			"(%"PRIu64") IOCTL: VHOST_RESET_OWNER\n", ctx.fh);
+			"(%d) IOCTL: VHOST_RESET_OWNER\n", vid);
 		VHOST_IOCTL(vhost_reset_owner);
 		break;
 
 	case VHOST_SET_OWNER:
 		LOG_DEBUG(VHOST_CONFIG,
-			"(%"PRIu64") IOCTL: VHOST_SET_OWNER\n", ctx.fh);
+			"(%d) IOCTL: VHOST_SET_OWNER\n", vid);
 		VHOST_IOCTL(vhost_set_owner);
 		break;
 
 	case VHOST_SET_MEM_TABLE:
 		/*TODO fix race condition.*/
 		LOG_DEBUG(VHOST_CONFIG,
-			"(%"PRIu64") IOCTL: VHOST_SET_MEM_TABLE\n", ctx.fh);
+			"(%d) IOCTL: VHOST_SET_MEM_TABLE\n", vid);
 		static struct vhost_memory mem_temp;
 
 		switch (in_bufsz) {
@@ -264,28 +265,28 @@ vhost_net_ioctl(fuse_req_t req, int cmd, void *arg,
 
 	case VHOST_SET_VRING_NUM:
 		LOG_DEBUG(VHOST_CONFIG,
-			"(%"PRIu64") IOCTL: VHOST_SET_VRING_NUM\n", ctx.fh);
+			"(%d) IOCTL: VHOST_SET_VRING_NUM\n", vid);
 		VHOST_IOCTL_R(struct vhost_vring_state, state,
 			vhost_set_vring_num);
 		break;
 
 	case VHOST_SET_VRING_BASE:
 		LOG_DEBUG(VHOST_CONFIG,
-			"(%"PRIu64") IOCTL: VHOST_SET_VRING_BASE\n", ctx.fh);
+			"(%d) IOCTL: VHOST_SET_VRING_BASE\n", vid);
 		VHOST_IOCTL_R(struct vhost_vring_state, state,
 			vhost_set_vring_base);
 		break;
 
 	case VHOST_GET_VRING_BASE:
 		LOG_DEBUG(VHOST_CONFIG,
-			"(%"PRIu64") IOCTL: VHOST_GET_VRING_BASE\n", ctx.fh);
+			"(%d) IOCTL: VHOST_GET_VRING_BASE\n", vid);
 		VHOST_IOCTL_RW(uint32_t, index,
 			struct vhost_vring_state, state, vhost_get_vring_base);
 		break;
 
 	case VHOST_SET_VRING_ADDR:
 		LOG_DEBUG(VHOST_CONFIG,
-			"(%"PRIu64") IOCTL: VHOST_SET_VRING_ADDR\n", ctx.fh);
+			"(%d) IOCTL: VHOST_SET_VRING_ADDR\n", vid);
 		VHOST_IOCTL_R(struct vhost_vring_addr, addr,
 			vhost_set_vring_addr);
 		break;
@@ -294,12 +295,10 @@ vhost_net_ioctl(fuse_req_t req, int cmd, void *arg,
 	case VHOST_SET_VRING_CALL:
 		if (cmd == VHOST_SET_VRING_KICK)
 			LOG_DEBUG(VHOST_CONFIG,
-				"(%"PRIu64") IOCTL: VHOST_SET_VRING_KICK\n",
-			ctx.fh);
+				"(%d) IOCTL: VHOST_SET_VRING_KICK\n", vid);
 		else
 			LOG_DEBUG(VHOST_CONFIG,
-				"(%"PRIu64") IOCTL: VHOST_SET_VRING_CALL\n",
-			ctx.fh);
+				"(%d) IOCTL: VHOST_SET_VRING_CALL\n", vid);
 		if (!in_buf)
 			VHOST_IOCTL_RETRY(sizeof(struct vhost_vring_file), 0);
 		else {
@@ -315,10 +314,10 @@ vhost_net_ioctl(fuse_req_t req, int cmd, void *arg,
 			}
 			file.fd = fd;
 			if (cmd == VHOST_SET_VRING_KICK) {
-				result = vhost_set_vring_kick(ctx, &file);
+				result = vhost_set_vring_kick(vid, &file);
 				fuse_reply_ioctl(req, result, NULL, 0);
 			} else {
-				result = vhost_set_vring_call(ctx, &file);
+				result = vhost_set_vring_call(vid, &file);
 				fuse_reply_ioctl(req, result, NULL, 0);
 			}
 		}
@@ -326,17 +325,17 @@ vhost_net_ioctl(fuse_req_t req, int cmd, void *arg,
 
 	default:
 		RTE_LOG(ERR, VHOST_CONFIG,
-			"(%"PRIu64") IOCTL: DOESN NOT EXIST\n", ctx.fh);
+			"(%d) IOCTL: DOESN NOT EXIST\n", vid);
 		result = -1;
 		fuse_reply_ioctl(req, result, NULL, 0);
 	}
 
 	if (result < 0)
 		LOG_DEBUG(VHOST_CONFIG,
-			"(%"PRIu64") IOCTL: FAIL\n", ctx.fh);
+			"(%d) IOCTL: FAIL\n", vid);
 	else
 		LOG_DEBUG(VHOST_CONFIG,
-			"(%"PRIu64") IOCTL: SUCCESS\n", ctx.fh);
+			"(%d) IOCTL: SUCCESS\n", vid);
 }
 
 /*
@@ -353,7 +352,7 @@ static const struct cuse_lowlevel_ops vhost_net_ops = {
  * vhost_net_device_ops are also passed when the device is registered in app.
  */
 int
-rte_vhost_driver_register(const char *dev_name)
+rte_vhost_driver_register(const char *dev_name, uint64_t flags)
 {
 	struct cuse_info cuse_info;
 	char device_name[PATH_MAX] = "";
@@ -365,6 +364,12 @@ rte_vhost_driver_register(const char *dev_name)
 	char fuse_opt_nomulti[] = FUSE_OPT_NOMULTI;
 	char *fuse_argv[] = {fuse_opt_dummy, fuse_opt_fore, fuse_opt_nomulti};
 
+	if (flags) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"vhost-cuse does not support any flags so far\n");
+		return -1;
+	}
+
 	if (access(cuse_device_name, R_OK | W_OK) < 0) {
 		RTE_LOG(ERR, VHOST_CONFIG,
 			"char device %s can't be accessed, maybe not exist\n",
diff --git a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
index a68a8bd4..552be7d4 100644
--- a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
+++ b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
@@ -54,7 +54,6 @@
 #include "rte_virtio_net.h"
 #include "vhost-net.h"
 #include "virtio-net-cdev.h"
-#include "virtio-net.h"
 #include "eventfd_copy.h"
 
 /* Line size for reading maps file. */
@@ -263,7 +262,7 @@ host_memory_map(pid_t pid, uint64_t addr,
 }
 
 int
-cuse_set_mem_table(struct vhost_device_ctx ctx,
+cuse_set_mem_table(struct vhost_cuse_device_ctx ctx,
 	const struct vhost_memory *mem_regions_addr, uint32_t nregions)
 {
 	uint64_t size = offsetof(struct vhost_memory, regions);
@@ -274,7 +273,7 @@ cuse_set_mem_table(struct vhost_device_ctx ctx,
 	uint64_t base_address = 0, mapped_address, mapped_size;
 	struct virtio_net *dev;
 
-	dev = get_device(ctx);
+	dev = get_device(ctx.vid);
 	if (dev == NULL)
 		return -1;
 
@@ -289,8 +288,8 @@ cuse_set_mem_table(struct vhost_device_ctx ctx,
 		sizeof(struct virtio_memory_regions) * nregions);
 	if (dev->mem == NULL) {
 		RTE_LOG(ERR, VHOST_CONFIG,
-			"(%"PRIu64") Failed to allocate memory for dev->mem\n",
-			dev->device_fh);
+			"(%d) failed to allocate memory for dev->mem\n",
+			dev->vid);
 		return -1;
 	}
 
@@ -379,7 +378,7 @@ cuse_set_mem_table(struct vhost_device_ctx ctx,
  * save it in the device structure.
  */
 static int
-get_ifname(struct vhost_device_ctx ctx, struct virtio_net *dev, int tap_fd, int pid)
+get_ifname(int vid, int tap_fd, int pid)
 {
 	int fd_tap;
 	struct ifreq ifr;
@@ -393,33 +392,32 @@ get_ifname(struct vhost_device_ctx ctx, struct virtio_net *dev, int tap_fd, int
 	ret = ioctl(fd_tap, TUNGETIFF, &ifr);
 
 	if (close(fd_tap) < 0)
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"(%"PRIu64") fd close failed\n",
-			dev->device_fh);
+		RTE_LOG(ERR, VHOST_CONFIG, "(%d) fd close failed\n", vid);
 
 	if (ret >= 0) {
 		ifr_size = strnlen(ifr.ifr_name, sizeof(ifr.ifr_name));
-		vhost_set_ifname(ctx, ifr.ifr_name, ifr_size);
+		vhost_set_ifname(vid, ifr.ifr_name, ifr_size);
 	} else
 		RTE_LOG(ERR, VHOST_CONFIG,
-			"(%"PRIu64") TUNGETIFF ioctl failed\n",
-			dev->device_fh);
+			"(%d) TUNGETIFF ioctl failed\n", vid);
 
 	return 0;
 }
 
-int cuse_set_backend(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
+int
+cuse_set_backend(struct vhost_cuse_device_ctx ctx,
+		 struct vhost_vring_file *file)
 {
 	struct virtio_net *dev;
 
-	dev = get_device(ctx);
+	dev = get_device(ctx.vid);
 	if (dev == NULL)
 		return -1;
 
 	if (!(dev->flags & VIRTIO_DEV_RUNNING) && file->fd != VIRTIO_DEV_STOPPED)
-		get_ifname(ctx, dev, file->fd, ctx.pid);
+		get_ifname(ctx.vid, file->fd, ctx.pid);
 
-	return vhost_set_backend(ctx, file);
+	return vhost_set_backend(ctx.vid, file);
 }
 
 void
diff --git a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.h b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.h
index eb6b0bab..3f67154b 100644
--- a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.h
+++ b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.h
@@ -38,11 +38,19 @@
 
 #include "vhost-net.h"
 
+/*
+ * Structure used to identify device context.
+ */
+struct vhost_cuse_device_ctx {
+	pid_t	pid;	/* PID of process calling the IOCTL. */
+	int	vid;	/* Virtio-net device ID */
+};
+
 int
-cuse_set_mem_table(struct vhost_device_ctx ctx,
+cuse_set_mem_table(struct vhost_cuse_device_ctx ctx,
 	const struct vhost_memory *mem_regions_addr, uint32_t nregions);
 
 int
-cuse_set_backend(struct vhost_device_ctx ctx, struct vhost_vring_file *);
+cuse_set_backend(struct vhost_cuse_device_ctx ctx, struct vhost_vring_file *);
 
 #endif
diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index 750821a4..15ca9562 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -126,10 +126,10 @@ virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr)
 }
 
 static inline void
-copy_virtio_net_hdr(struct vhost_virtqueue *vq, uint64_t desc_addr,
+copy_virtio_net_hdr(struct virtio_net *dev, uint64_t desc_addr,
 		    struct virtio_net_hdr_mrg_rxbuf hdr)
 {
-	if (vq->vhost_hlen == sizeof(struct virtio_net_hdr_mrg_rxbuf))
+	if (dev->vhost_hlen == sizeof(struct virtio_net_hdr_mrg_rxbuf))
 		*(struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc_addr = hdr;
 	else
 		*(struct virtio_net_hdr *)(uintptr_t)desc_addr = hdr.hdr;
@@ -137,7 +137,7 @@ copy_virtio_net_hdr(struct vhost_virtqueue *vq, uint64_t desc_addr,
 
 static inline int __attribute__((always_inline))
 copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
-		  struct rte_mbuf *m, uint16_t desc_idx, uint32_t *copied)
+		  struct rte_mbuf *m, uint16_t desc_idx)
 {
 	uint32_t desc_avail, desc_offset;
 	uint32_t mbuf_avail, mbuf_offset;
@@ -147,21 +147,20 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
 
 	desc = &vq->desc[desc_idx];
-	if (unlikely(desc->len < vq->vhost_hlen))
+	if (unlikely(desc->len < dev->vhost_hlen))
 		return -1;
 
 	desc_addr = gpa_to_vva(dev, desc->addr);
 	rte_prefetch0((void *)(uintptr_t)desc_addr);
 
 	virtio_enqueue_offload(m, &virtio_hdr.hdr);
-	copy_virtio_net_hdr(vq, desc_addr, virtio_hdr);
-	vhost_log_write(dev, desc->addr, vq->vhost_hlen);
-	PRINT_PACKET(dev, (uintptr_t)desc_addr, vq->vhost_hlen, 0);
+	copy_virtio_net_hdr(dev, desc_addr, virtio_hdr);
+	vhost_log_write(dev, desc->addr, dev->vhost_hlen);
+	PRINT_PACKET(dev, (uintptr_t)desc_addr, dev->vhost_hlen, 0);
 
-	desc_offset = vq->vhost_hlen;
-	desc_avail  = desc->len - vq->vhost_hlen;
+	desc_offset = dev->vhost_hlen;
+	desc_avail  = desc->len - dev->vhost_hlen;
 
-	*copied = rte_pktmbuf_pkt_len(m);
 	mbuf_avail  = rte_pktmbuf_data_len(m);
 	mbuf_offset = 0;
 	while (mbuf_avail != 0 || m->next != NULL) {
@@ -205,49 +204,6 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	return 0;
 }
 
-/*
- * As many data cores may want to access available buffers
- * they need to be reserved.
- */
-static inline uint32_t
-reserve_avail_buf(struct vhost_virtqueue *vq, uint32_t count,
-		  uint16_t *start, uint16_t *end)
-{
-	uint16_t res_start_idx;
-	uint16_t res_end_idx;
-	uint16_t avail_idx;
-	uint16_t free_entries;
-	int success;
-
-	count = RTE_MIN(count, (uint32_t)MAX_PKT_BURST);
-
-again:
-	res_start_idx = vq->last_used_idx_res;
-	avail_idx = *((volatile uint16_t *)&vq->avail->idx);
-
-	free_entries = avail_idx - res_start_idx;
-	count = RTE_MIN(count, free_entries);
-	if (count == 0)
-		return 0;
-
-	res_end_idx = res_start_idx + count;
-
-	/*
-	 * update vq->last_used_idx_res atomically; try again if failed.
-	 *
-	 * TODO: Allow to disable cmpset if no concurrency in application.
-	 */
-	success = rte_atomic16_cmpset(&vq->last_used_idx_res,
-				      res_start_idx, res_end_idx);
-	if (unlikely(!success))
-		goto again;
-
-	*start = res_start_idx;
-	*end   = res_end_idx;
-
-	return count;
-}
-
 /**
  * This function adds buffers to the virtio devices RX virtqueue. Buffers can
  * be received from the physical port or from another virtio device. A packet
@@ -260,15 +216,15 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 	      struct rte_mbuf **pkts, uint32_t count)
 {
 	struct vhost_virtqueue *vq;
-	uint16_t res_start_idx, res_end_idx;
+	uint16_t avail_idx, free_entries, start_idx;
 	uint16_t desc_indexes[MAX_PKT_BURST];
+	uint16_t used_idx;
 	uint32_t i;
 
-	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_rx()\n", dev->device_fh);
+	LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
 	if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->virt_qp_nb))) {
-		RTE_LOG(ERR, VHOST_DATA,
-			"%s (%"PRIu64"): virtqueue idx:%d invalid.\n",
-			__func__, dev->device_fh, queue_id);
+		RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n",
+			dev->vid, __func__, queue_id);
 		return 0;
 	}
 
@@ -276,38 +232,43 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 	if (unlikely(vq->enabled == 0))
 		return 0;
 
-	count = reserve_avail_buf(vq, count, &res_start_idx, &res_end_idx);
+	avail_idx = *((volatile uint16_t *)&vq->avail->idx);
+	start_idx = vq->last_used_idx;
+	free_entries = avail_idx - start_idx;
+	count = RTE_MIN(count, free_entries);
+	count = RTE_MIN(count, (uint32_t)MAX_PKT_BURST);
 	if (count == 0)
 		return 0;
 
-	LOG_DEBUG(VHOST_DATA,
-		"(%"PRIu64") res_start_idx %d| res_end_idx Index %d\n",
-		dev->device_fh, res_start_idx, res_end_idx);
+	LOG_DEBUG(VHOST_DATA, "(%d) start_idx %d | end_idx %d\n",
+		dev->vid, start_idx, start_idx + count);
 
 	/* Retrieve all of the desc indexes first to avoid caching issues. */
-	rte_prefetch0(&vq->avail->ring[res_start_idx & (vq->size - 1)]);
+	rte_prefetch0(&vq->avail->ring[start_idx & (vq->size - 1)]);
 	for (i = 0; i < count; i++) {
-		desc_indexes[i] = vq->avail->ring[(res_start_idx + i) &
-						  (vq->size - 1)];
+		used_idx = (start_idx + i) & (vq->size - 1);
+		desc_indexes[i] = vq->avail->ring[used_idx];
+		vq->used->ring[used_idx].id = desc_indexes[i];
+		vq->used->ring[used_idx].len = pkts[i]->pkt_len +
+					       dev->vhost_hlen;
+		vhost_log_used_vring(dev, vq,
+			offsetof(struct vring_used, ring[used_idx]),
+			sizeof(vq->used->ring[used_idx]));
 	}
 
 	rte_prefetch0(&vq->desc[desc_indexes[0]]);
 	for (i = 0; i < count; i++) {
 		uint16_t desc_idx = desc_indexes[i];
-		uint16_t used_idx = (res_start_idx + i) & (vq->size - 1);
-		uint32_t copied;
 		int err;
 
-		err = copy_mbuf_to_desc(dev, vq, pkts[i], desc_idx, &copied);
-
-		vq->used->ring[used_idx].id = desc_idx;
-		if (unlikely(err))
-			vq->used->ring[used_idx].len = vq->vhost_hlen;
-		else
-			vq->used->ring[used_idx].len = copied + vq->vhost_hlen;
-		vhost_log_used_vring(dev, vq,
-			offsetof(struct vring_used, ring[used_idx]),
-			sizeof(vq->used->ring[used_idx]));
+		err = copy_mbuf_to_desc(dev, vq, pkts[i], desc_idx);
+		if (unlikely(err)) {
+			used_idx = (start_idx + i) & (vq->size - 1);
+			vq->used->ring[used_idx].len = dev->vhost_hlen;
+			vhost_log_used_vring(dev, vq,
+				offsetof(struct vring_used, ring[used_idx]),
+				sizeof(vq->used->ring[used_idx]));
+		}
 
 		if (i + 1 < count)
 			rte_prefetch0(&vq->desc[desc_indexes[i+1]]);
@@ -315,12 +276,8 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 
 	rte_smp_wmb();
 
-	/* Wait until it's our turn to add our buffer to the used ring. */
-	while (unlikely(vq->last_used_idx != res_start_idx))
-		rte_pause();
-
 	*(volatile uint16_t *)&vq->used->idx += count;
-	vq->last_used_idx = res_end_idx;
+	vq->last_used_idx += count;
 	vhost_log_used_vring(dev, vq,
 		offsetof(struct vring_used, idx),
 		sizeof(vq->used->idx));
@@ -337,7 +294,8 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 
 static inline int
 fill_vec_buf(struct vhost_virtqueue *vq, uint32_t avail_idx,
-	     uint32_t *allocated, uint32_t *vec_idx)
+	     uint32_t *allocated, uint32_t *vec_idx,
+	     struct buf_vector *buf_vec)
 {
 	uint16_t idx = vq->avail->ring[avail_idx & (vq->size - 1)];
 	uint32_t vec_id = *vec_idx;
@@ -348,9 +306,9 @@ fill_vec_buf(struct vhost_virtqueue *vq, uint32_t avail_idx,
 			return -1;
 
 		len += vq->desc[idx].len;
-		vq->buf_vec[vec_id].buf_addr = vq->desc[idx].addr;
-		vq->buf_vec[vec_id].buf_len  = vq->desc[idx].len;
-		vq->buf_vec[vec_id].desc_idx = idx;
+		buf_vec[vec_id].buf_addr = vq->desc[idx].addr;
+		buf_vec[vec_id].buf_len  = vq->desc[idx].len;
+		buf_vec[vec_id].desc_idx = idx;
 		vec_id++;
 
 		if ((vq->desc[idx].flags & VRING_DESC_F_NEXT) == 0)
@@ -366,39 +324,30 @@ fill_vec_buf(struct vhost_virtqueue *vq, uint32_t avail_idx,
 }
 
 /*
- * As many data cores may want to access available buffers concurrently,
- * they need to be reserved.
- *
  * Returns -1 on fail, 0 on success
  */
 static inline int
 reserve_avail_buf_mergeable(struct vhost_virtqueue *vq, uint32_t size,
-			    uint16_t *start, uint16_t *end)
+			    uint16_t *end, struct buf_vector *buf_vec)
 {
-	uint16_t res_start_idx;
-	uint16_t res_cur_idx;
+	uint16_t cur_idx;
 	uint16_t avail_idx;
-	uint32_t allocated;
-	uint32_t vec_idx;
-	uint16_t tries;
+	uint32_t allocated = 0;
+	uint32_t vec_idx = 0;
+	uint16_t tries = 0;
 
-again:
-	res_start_idx = vq->last_used_idx_res;
-	res_cur_idx  = res_start_idx;
+	cur_idx  = vq->last_used_idx;
 
-	allocated = 0;
-	vec_idx   = 0;
-	tries     = 0;
 	while (1) {
 		avail_idx = *((volatile uint16_t *)&vq->avail->idx);
-		if (unlikely(res_cur_idx == avail_idx))
+		if (unlikely(cur_idx == avail_idx))
 			return -1;
 
-		if (unlikely(fill_vec_buf(vq, res_cur_idx, &allocated,
-					  &vec_idx) < 0))
+		if (unlikely(fill_vec_buf(vq, cur_idx, &allocated,
+					  &vec_idx, buf_vec) < 0))
 			return -1;
 
-		res_cur_idx++;
+		cur_idx++;
 		tries++;
 
 		if (allocated >= size)
@@ -413,27 +362,19 @@ again:
 			return -1;
 	}
 
-	/*
-	 * update vq->last_used_idx_res atomically.
-	 * retry again if failed.
-	 */
-	if (rte_atomic16_cmpset(&vq->last_used_idx_res,
-				res_start_idx, res_cur_idx) == 0)
-		goto again;
-
-	*start = res_start_idx;
-	*end   = res_cur_idx;
+	*end = cur_idx;
 	return 0;
 }
 
 static inline uint32_t __attribute__((always_inline))
 copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
-			    uint16_t res_start_idx, uint16_t res_end_idx,
-			    struct rte_mbuf *m)
+			    uint16_t end_idx, struct rte_mbuf *m,
+			    struct buf_vector *buf_vec)
 {
 	struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
 	uint32_t vec_idx = 0;
-	uint16_t cur_idx = res_start_idx;
+	uint16_t start_idx = vq->last_used_idx;
+	uint16_t cur_idx = start_idx;
 	uint64_t desc_addr;
 	uint32_t mbuf_offset, mbuf_avail;
 	uint32_t desc_offset, desc_avail;
@@ -443,34 +384,33 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	if (unlikely(m == NULL))
 		return 0;
 
-	LOG_DEBUG(VHOST_DATA,
-		"(%"PRIu64") Current Index %d| End Index %d\n",
-		dev->device_fh, cur_idx, res_end_idx);
+	LOG_DEBUG(VHOST_DATA, "(%d) current index %d | end index %d\n",
+		dev->vid, cur_idx, end_idx);
 
-	if (vq->buf_vec[vec_idx].buf_len < vq->vhost_hlen)
+	if (buf_vec[vec_idx].buf_len < dev->vhost_hlen)
 		return -1;
 
-	desc_addr = gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
+	desc_addr = gpa_to_vva(dev, buf_vec[vec_idx].buf_addr);
 	rte_prefetch0((void *)(uintptr_t)desc_addr);
 
-	virtio_hdr.num_buffers = res_end_idx - res_start_idx;
-	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") RX: Num merge buffers %d\n",
-		dev->device_fh, virtio_hdr.num_buffers);
+	virtio_hdr.num_buffers = end_idx - start_idx;
+	LOG_DEBUG(VHOST_DATA, "(%d) RX: num merge buffers %d\n",
+		dev->vid, virtio_hdr.num_buffers);
 
 	virtio_enqueue_offload(m, &virtio_hdr.hdr);
-	copy_virtio_net_hdr(vq, desc_addr, virtio_hdr);
-	vhost_log_write(dev, vq->buf_vec[vec_idx].buf_addr, vq->vhost_hlen);
-	PRINT_PACKET(dev, (uintptr_t)desc_addr, vq->vhost_hlen, 0);
+	copy_virtio_net_hdr(dev, desc_addr, virtio_hdr);
+	vhost_log_write(dev, buf_vec[vec_idx].buf_addr, dev->vhost_hlen);
+	PRINT_PACKET(dev, (uintptr_t)desc_addr, dev->vhost_hlen, 0);
 
-	desc_avail  = vq->buf_vec[vec_idx].buf_len - vq->vhost_hlen;
-	desc_offset = vq->vhost_hlen;
+	desc_avail  = buf_vec[vec_idx].buf_len - dev->vhost_hlen;
+	desc_offset = dev->vhost_hlen;
 
 	mbuf_avail  = rte_pktmbuf_data_len(m);
 	mbuf_offset = 0;
 	while (mbuf_avail != 0 || m->next != NULL) {
 		/* done with current desc buf, get the next one */
 		if (desc_avail == 0) {
-			desc_idx = vq->buf_vec[vec_idx].desc_idx;
+			desc_idx = buf_vec[vec_idx].desc_idx;
 
 			if (!(vq->desc[desc_idx].flags & VRING_DESC_F_NEXT)) {
 				/* Update used ring with desc information */
@@ -484,12 +424,12 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
 			}
 
 			vec_idx++;
-			desc_addr = gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
+			desc_addr = gpa_to_vva(dev, buf_vec[vec_idx].buf_addr);
 
 			/* Prefetch buffer address. */
 			rte_prefetch0((void *)(uintptr_t)desc_addr);
 			desc_offset = 0;
-			desc_avail  = vq->buf_vec[vec_idx].buf_len;
+			desc_avail  = buf_vec[vec_idx].buf_len;
 		}
 
 		/* done with current mbuf, get the next one */
@@ -504,7 +444,7 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		rte_memcpy((void *)((uintptr_t)(desc_addr + desc_offset)),
 			rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
 			cpy_len);
-		vhost_log_write(dev, vq->buf_vec[vec_idx].buf_addr + desc_offset,
+		vhost_log_write(dev, buf_vec[vec_idx].buf_addr + desc_offset,
 			cpy_len);
 		PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset),
 			cpy_len, 0);
@@ -516,13 +456,13 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	}
 
 	used_idx = cur_idx & (vq->size - 1);
-	vq->used->ring[used_idx].id = vq->buf_vec[vec_idx].desc_idx;
+	vq->used->ring[used_idx].id = buf_vec[vec_idx].desc_idx;
 	vq->used->ring[used_idx].len = desc_offset;
 	vhost_log_used_vring(dev, vq,
 		offsetof(struct vring_used, ring[used_idx]),
 		sizeof(vq->used->ring[used_idx]));
 
-	return res_end_idx - res_start_idx;
+	return end_idx - start_idx;
 }
 
 static inline uint32_t __attribute__((always_inline))
@@ -531,14 +471,13 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
 {
 	struct vhost_virtqueue *vq;
 	uint32_t pkt_idx = 0, nr_used = 0;
-	uint16_t start, end;
+	uint16_t end;
+	struct buf_vector buf_vec[BUF_VECTOR_MAX];
 
-	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_merge_rx()\n",
-		dev->device_fh);
+	LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
 	if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->virt_qp_nb))) {
-		RTE_LOG(ERR, VHOST_DATA,
-			"%s (%"PRIu64"): virtqueue idx:%d invalid.\n",
-			__func__, dev->device_fh, queue_id);
+		RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n",
+			dev->vid, __func__, queue_id);
 		return 0;
 	}
 
@@ -551,27 +490,20 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
 		return 0;
 
 	for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
-		uint32_t pkt_len = pkts[pkt_idx]->pkt_len + vq->vhost_hlen;
+		uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen;
 
 		if (unlikely(reserve_avail_buf_mergeable(vq, pkt_len,
-							 &start, &end) < 0)) {
+							 &end, buf_vec) < 0)) {
 			LOG_DEBUG(VHOST_DATA,
-				"(%" PRIu64 ") Failed to get enough desc from vring\n",
-				dev->device_fh);
+				"(%d) failed to get enough desc from vring\n",
+				dev->vid);
 			break;
 		}
 
-		nr_used = copy_mbuf_to_desc_mergeable(dev, vq, start, end,
-						      pkts[pkt_idx]);
+		nr_used = copy_mbuf_to_desc_mergeable(dev, vq, end,
+						      pkts[pkt_idx], buf_vec);
 		rte_smp_wmb();
 
-		/*
-		 * Wait until it's our turn to add our buffer
-		 * to the used ring.
-		 */
-		while (unlikely(vq->last_used_idx != start))
-			rte_pause();
-
 		*(volatile uint16_t *)&vq->used->idx += nr_used;
 		vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx),
 			sizeof(vq->used->idx));
@@ -592,9 +524,14 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
 }
 
 uint16_t
-rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id,
+rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
 	struct rte_mbuf **pkts, uint16_t count)
 {
+	struct virtio_net *dev = get_device(vid);
+
+	if (!dev)
+		return 0;
+
 	if (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF))
 		return virtio_dev_merge_rx(dev, queue_id, pkts, count);
 	else
@@ -747,22 +684,53 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	uint32_t nr_desc = 1;
 
 	desc = &vq->desc[desc_idx];
-	if (unlikely(desc->len < vq->vhost_hlen))
+	if (unlikely(desc->len < dev->vhost_hlen))
 		return -1;
 
 	desc_addr = gpa_to_vva(dev, desc->addr);
-	rte_prefetch0((void *)(uintptr_t)desc_addr);
-
-	/* Retrieve virtio net header */
 	hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
-	desc_avail  = desc->len - vq->vhost_hlen;
-	desc_offset = vq->vhost_hlen;
+	rte_prefetch0(hdr);
+
+	/*
+	 * A virtio driver normally uses at least 2 desc buffers
+	 * for Tx: the first for storing the header, and others
+	 * for storing the data.
+	 */
+	if (likely((desc->len == dev->vhost_hlen) &&
+		   (desc->flags & VRING_DESC_F_NEXT) != 0)) {
+		desc = &vq->desc[desc->next];
+
+		desc_addr = gpa_to_vva(dev, desc->addr);
+		rte_prefetch0((void *)(uintptr_t)desc_addr);
+
+		desc_offset = 0;
+		desc_avail  = desc->len;
+		nr_desc    += 1;
+
+		PRINT_PACKET(dev, (uintptr_t)desc_addr, desc->len, 0);
+	} else {
+		desc_avail  = desc->len - dev->vhost_hlen;
+		desc_offset = dev->vhost_hlen;
+	}
 
 	mbuf_offset = 0;
 	mbuf_avail  = m->buf_len - RTE_PKTMBUF_HEADROOM;
-	while (desc_avail != 0 || (desc->flags & VRING_DESC_F_NEXT) != 0) {
+	while (1) {
+		cpy_len = RTE_MIN(desc_avail, mbuf_avail);
+		rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *, mbuf_offset),
+			(void *)((uintptr_t)(desc_addr + desc_offset)),
+			cpy_len);
+
+		mbuf_avail  -= cpy_len;
+		mbuf_offset += cpy_len;
+		desc_avail  -= cpy_len;
+		desc_offset += cpy_len;
+
 		/* This desc reaches to its end, get the next one */
 		if (desc_avail == 0) {
+			if ((desc->flags & VRING_DESC_F_NEXT) == 0)
+				break;
+
 			if (unlikely(desc->next >= vq->size ||
 				     ++nr_desc >= vq->size))
 				return -1;
@@ -798,16 +766,6 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 			mbuf_offset = 0;
 			mbuf_avail  = cur->buf_len - RTE_PKTMBUF_HEADROOM;
 		}
-
-		cpy_len = RTE_MIN(desc_avail, mbuf_avail);
-		rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *, mbuf_offset),
-			(void *)((uintptr_t)(desc_addr + desc_offset)),
-			cpy_len);
-
-		mbuf_avail  -= cpy_len;
-		mbuf_offset += cpy_len;
-		desc_avail  -= cpy_len;
-		desc_offset += cpy_len;
 	}
 
 	prev->data_len = mbuf_offset;
@@ -820,9 +778,10 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 }
 
 uint16_t
-rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
+rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
 {
+	struct virtio_net *dev;
 	struct rte_mbuf *rarp_mbuf = NULL;
 	struct vhost_virtqueue *vq;
 	uint32_t desc_indexes[MAX_PKT_BURST];
@@ -831,10 +790,13 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
 	uint16_t free_entries;
 	uint16_t avail_idx;
 
+	dev = get_device(vid);
+	if (!dev)
+		return 0;
+
 	if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->virt_qp_nb))) {
-		RTE_LOG(ERR, VHOST_DATA,
-			"%s (%"PRIu64"): virtqueue idx:%d invalid.\n",
-			__func__, dev->device_fh, queue_id);
+		RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n",
+			dev->vid, __func__, queue_id);
 		return 0;
 	}
 
@@ -870,35 +832,37 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
 	if (free_entries == 0)
 		goto out;
 
-	LOG_DEBUG(VHOST_DATA, "%s (%"PRIu64")\n", __func__, dev->device_fh);
+	LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
 
 	/* Prefetch available ring to retrieve head indexes. */
 	used_idx = vq->last_used_idx & (vq->size - 1);
 	rte_prefetch0(&vq->avail->ring[used_idx]);
+	rte_prefetch0(&vq->used->ring[used_idx]);
 
 	count = RTE_MIN(count, MAX_PKT_BURST);
 	count = RTE_MIN(count, free_entries);
-	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") about to dequeue %u buffers\n",
-			dev->device_fh, count);
+	LOG_DEBUG(VHOST_DATA, "(%d) about to dequeue %u buffers\n",
+			dev->vid, count);
 
 	/* Retrieve all of the head indexes first to avoid caching issues. */
 	for (i = 0; i < count; i++) {
-		desc_indexes[i] = vq->avail->ring[(vq->last_used_idx + i) &
-					(vq->size - 1)];
+		used_idx = (vq->last_used_idx + i) & (vq->size - 1);
+		desc_indexes[i] = vq->avail->ring[used_idx];
+
+		vq->used->ring[used_idx].id  = desc_indexes[i];
+		vq->used->ring[used_idx].len = 0;
+		vhost_log_used_vring(dev, vq,
+				offsetof(struct vring_used, ring[used_idx]),
+				sizeof(vq->used->ring[used_idx]));
 	}
 
 	/* Prefetch descriptor index. */
 	rte_prefetch0(&vq->desc[desc_indexes[0]]);
-	rte_prefetch0(&vq->used->ring[vq->last_used_idx & (vq->size - 1)]);
-
 	for (i = 0; i < count; i++) {
 		int err;
 
-		if (likely(i + 1 < count)) {
+		if (likely(i + 1 < count))
 			rte_prefetch0(&vq->desc[desc_indexes[i + 1]]);
-			rte_prefetch0(&vq->used->ring[(used_idx + 1) &
-						      (vq->size - 1)]);
-		}
 
 		pkts[i] = rte_pktmbuf_alloc(mbuf_pool);
 		if (unlikely(pkts[i] == NULL)) {
@@ -912,18 +876,12 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
 			rte_pktmbuf_free(pkts[i]);
 			break;
 		}
-
-		used_idx = vq->last_used_idx++ & (vq->size - 1);
-		vq->used->ring[used_idx].id  = desc_indexes[i];
-		vq->used->ring[used_idx].len = 0;
-		vhost_log_used_vring(dev, vq,
-				offsetof(struct vring_used, ring[used_idx]),
-				sizeof(vq->used->ring[used_idx]));
 	}
 
 	rte_smp_wmb();
 	rte_smp_rmb();
 	vq->used->idx += i;
+	vq->last_used_idx += i;
 	vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx),
 			sizeof(vq->used->idx));
 
diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c b/lib/librte_vhost/vhost_user/vhost-net-user.c
index df2bd648..94f1b923 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.c
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
@@ -33,6 +33,7 @@
 
 #include <stdint.h>
 #include <stdio.h>
+#include <stdbool.h>
 #include <limits.h>
 #include <stdlib.h>
 #include <unistd.h>
@@ -40,6 +41,7 @@
 #include <sys/types.h>
 #include <sys/socket.h>
 #include <sys/un.h>
+#include <sys/queue.h>
 #include <errno.h>
 #include <pthread.h>
 
@@ -51,32 +53,44 @@
 #include "vhost-net.h"
 #include "virtio-net-user.h"
 
-#define MAX_VIRTIO_BACKLOG 128
-
-static void vserver_new_vq_conn(int fd, void *data, int *remove);
-static void vserver_message_handler(int fd, void *dat, int *remove);
+/*
+ * Every time rte_vhost_driver_register() is invoked, an associated
+ * vhost_user_socket struct will be created.
+ */
+struct vhost_user_socket {
+	char *path;
+	int listenfd;
+	bool is_server;
+	bool reconnect;
+};
 
-struct connfd_ctx {
-	struct vhost_server *vserver;
-	uint32_t fh;
+struct vhost_user_connection {
+	struct vhost_user_socket *vsocket;
+	int vid;
 };
 
-#define MAX_VHOST_SERVER 1024
-struct _vhost_server {
-	struct vhost_server *server[MAX_VHOST_SERVER];
+#define MAX_VHOST_SOCKET 1024
+struct vhost_user {
+	struct vhost_user_socket *vsockets[MAX_VHOST_SOCKET];
 	struct fdset fdset;
-	int vserver_cnt;
-	pthread_mutex_t server_mutex;
+	int vsocket_cnt;
+	pthread_mutex_t mutex;
 };
 
-static struct _vhost_server g_vhost_server = {
+#define MAX_VIRTIO_BACKLOG 128
+
+static void vhost_user_server_new_connection(int fd, void *data, int *remove);
+static void vhost_user_msg_handler(int fd, void *dat, int *remove);
+static int vhost_user_create_client(struct vhost_user_socket *vsocket);
+
+static struct vhost_user vhost_user = {
 	.fdset = {
 		.fd = { [0 ... MAX_FDS - 1] = {-1, NULL, NULL, NULL, 0} },
 		.fd_mutex = PTHREAD_MUTEX_INITIALIZER,
 		.num = 0
 	},
-	.vserver_cnt = 0,
-	.server_mutex = PTHREAD_MUTEX_INITIALIZER,
+	.vsocket_cnt = 0,
+	.mutex = PTHREAD_MUTEX_INITIALIZER,
 };
 
 static const char *vhost_message_str[VHOST_USER_MAX] = {
@@ -102,48 +116,6 @@ static const char *vhost_message_str[VHOST_USER_MAX] = {
 	[VHOST_USER_SEND_RARP]  = "VHOST_USER_SEND_RARP",
 };
 
-/**
- * Create a unix domain socket, bind to path and listen for connection.
- * @return
- *  socket fd or -1 on failure
- */
-static int
-uds_socket(const char *path)
-{
-	struct sockaddr_un un;
-	int sockfd;
-	int ret;
-
-	if (path == NULL)
-		return -1;
-
-	sockfd = socket(AF_UNIX, SOCK_STREAM, 0);
-	if (sockfd < 0)
-		return -1;
-	RTE_LOG(INFO, VHOST_CONFIG, "socket created, fd:%d\n", sockfd);
-
-	memset(&un, 0, sizeof(un));
-	un.sun_family = AF_UNIX;
-	snprintf(un.sun_path, sizeof(un.sun_path), "%s", path);
-	ret = bind(sockfd, (struct sockaddr *)&un, sizeof(un));
-	if (ret == -1) {
-		RTE_LOG(ERR, VHOST_CONFIG, "fail to bind fd:%d, remove file:%s and try again.\n",
-			sockfd, path);
-		goto err;
-	}
-	RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path);
-
-	ret = listen(sockfd, MAX_VIRTIO_BACKLOG);
-	if (ret == -1)
-		goto err;
-
-	return sockfd;
-
-err:
-	close(sockfd);
-	return -1;
-}
-
 /* return bytes# of read on success or negative val on failure. */
 static int
 read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
@@ -278,62 +250,66 @@ send_vhost_message(int sockfd, struct VhostUserMsg *msg)
 	return ret;
 }
 
-/* call back when there is new virtio connection.  */
+
 static void
-vserver_new_vq_conn(int fd, void *dat, __rte_unused int *remove)
+vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
 {
-	struct vhost_server *vserver = (struct vhost_server *)dat;
-	int conn_fd;
-	struct connfd_ctx *ctx;
-	int fh;
-	struct vhost_device_ctx vdev_ctx = { (pid_t)0, 0 };
-	unsigned int size;
-
-	conn_fd = accept(fd, NULL, NULL);
-	RTE_LOG(INFO, VHOST_CONFIG,
-		"new virtio connection is %d\n", conn_fd);
-	if (conn_fd < 0)
-		return;
+	int vid;
+	size_t size;
+	struct vhost_user_connection *conn;
 
-	ctx = calloc(1, sizeof(*ctx));
-	if (ctx == NULL) {
-		close(conn_fd);
+	conn = malloc(sizeof(*conn));
+	if (conn == NULL) {
+		close(fd);
 		return;
 	}
 
-	fh = vhost_new_device(vdev_ctx);
-	if (fh == -1) {
-		free(ctx);
-		close(conn_fd);
+	vid = vhost_new_device();
+	if (vid == -1) {
+		close(fd);
+		free(conn);
 		return;
 	}
 
-	vdev_ctx.fh = fh;
-	size = strnlen(vserver->path, PATH_MAX);
-	vhost_set_ifname(vdev_ctx, vserver->path,
-		size);
+	size = strnlen(vsocket->path, PATH_MAX);
+	vhost_set_ifname(vid, vsocket->path, size);
+
+	RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", vid);
 
-	RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", fh);
+	conn->vsocket = vsocket;
+	conn->vid = vid;
+	fdset_add(&vhost_user.fdset, fd, vhost_user_msg_handler, NULL, conn);
+}
+
+/* call back when there is new vhost-user connection from client  */
+static void
+vhost_user_server_new_connection(int fd, void *dat, int *remove __rte_unused)
+{
+	struct vhost_user_socket *vsocket = dat;
 
-	ctx->vserver = vserver;
-	ctx->fh = fh;
-	fdset_add(&g_vhost_server.fdset,
-		conn_fd, vserver_message_handler, NULL, ctx);
+	fd = accept(fd, NULL, NULL);
+	if (fd < 0)
+		return;
+
+	RTE_LOG(INFO, VHOST_CONFIG, "new vhost user connection is %d\n", fd);
+	vhost_user_add_connection(fd, vsocket);
 }
 
 /* callback when there is message on the connfd */
 static void
-vserver_message_handler(int connfd, void *dat, int *remove)
+vhost_user_msg_handler(int connfd, void *dat, int *remove)
 {
-	struct vhost_device_ctx ctx;
-	struct connfd_ctx *cfd_ctx = (struct connfd_ctx *)dat;
+	int vid;
+	struct vhost_user_connection *conn = dat;
 	struct VhostUserMsg msg;
 	uint64_t features;
 	int ret;
 
-	ctx.fh = cfd_ctx->fh;
+	vid = conn->vid;
 	ret = read_vhost_message(connfd, &msg);
 	if (ret <= 0 || msg.request >= VHOST_USER_MAX) {
+		struct vhost_user_socket *vsocket = conn->vsocket;
+
 		if (ret < 0)
 			RTE_LOG(ERR, VHOST_CONFIG,
 				"vhost read message failed\n");
@@ -346,8 +322,11 @@ vserver_message_handler(int connfd, void *dat, int *remove)
 
 		close(connfd);
 		*remove = 1;
-		free(cfd_ctx);
-		vhost_destroy_device(ctx);
+		free(conn);
+		vhost_destroy_device(vid);
+
+		if (vsocket->reconnect)
+			vhost_user_create_client(vsocket);
 
 		return;
 	}
@@ -356,14 +335,14 @@ vserver_message_handler(int connfd, void *dat, int *remove)
 		vhost_message_str[msg.request]);
 	switch (msg.request) {
 	case VHOST_USER_GET_FEATURES:
-		ret = vhost_get_features(ctx, &features);
+		ret = vhost_get_features(vid, &features);
 		msg.payload.u64 = features;
 		msg.size = sizeof(msg.payload.u64);
 		send_vhost_message(connfd, &msg);
 		break;
 	case VHOST_USER_SET_FEATURES:
 		features = msg.payload.u64;
-		vhost_set_features(ctx, &features);
+		vhost_set_features(vid, &features);
 		break;
 
 	case VHOST_USER_GET_PROTOCOL_FEATURES:
@@ -372,22 +351,22 @@ vserver_message_handler(int connfd, void *dat, int *remove)
 		send_vhost_message(connfd, &msg);
 		break;
 	case VHOST_USER_SET_PROTOCOL_FEATURES:
-		user_set_protocol_features(ctx, msg.payload.u64);
+		user_set_protocol_features(vid, msg.payload.u64);
 		break;
 
 	case VHOST_USER_SET_OWNER:
-		vhost_set_owner(ctx);
+		vhost_set_owner(vid);
 		break;
 	case VHOST_USER_RESET_OWNER:
-		vhost_reset_owner(ctx);
+		vhost_reset_owner(vid);
 		break;
 
 	case VHOST_USER_SET_MEM_TABLE:
-		user_set_mem_table(ctx, &msg);
+		user_set_mem_table(vid, &msg);
 		break;
 
 	case VHOST_USER_SET_LOG_BASE:
-		user_set_log_base(ctx, &msg);
+		user_set_log_base(vid, &msg);
 
 		/* it needs a reply */
 		msg.size = sizeof(msg.payload.u64);
@@ -399,26 +378,26 @@ vserver_message_handler(int connfd, void *dat, int *remove)
 		break;
 
 	case VHOST_USER_SET_VRING_NUM:
-		vhost_set_vring_num(ctx, &msg.payload.state);
+		vhost_set_vring_num(vid, &msg.payload.state);
 		break;
 	case VHOST_USER_SET_VRING_ADDR:
-		vhost_set_vring_addr(ctx, &msg.payload.addr);
+		vhost_set_vring_addr(vid, &msg.payload.addr);
 		break;
 	case VHOST_USER_SET_VRING_BASE:
-		vhost_set_vring_base(ctx, &msg.payload.state);
+		vhost_set_vring_base(vid, &msg.payload.state);
 		break;
 
 	case VHOST_USER_GET_VRING_BASE:
-		ret = user_get_vring_base(ctx, &msg.payload.state);
+		ret = user_get_vring_base(vid, &msg.payload.state);
 		msg.size = sizeof(msg.payload.state);
 		send_vhost_message(connfd, &msg);
 		break;
 
 	case VHOST_USER_SET_VRING_KICK:
-		user_set_vring_kick(ctx, &msg);
+		user_set_vring_kick(vid, &msg);
 		break;
 	case VHOST_USER_SET_VRING_CALL:
-		user_set_vring_call(ctx, &msg);
+		user_set_vring_call(vid, &msg);
 		break;
 
 	case VHOST_USER_SET_VRING_ERR:
@@ -434,10 +413,10 @@ vserver_message_handler(int connfd, void *dat, int *remove)
 		break;
 
 	case VHOST_USER_SET_VRING_ENABLE:
-		user_set_vring_enable(ctx, &msg.payload.state);
+		user_set_vring_enable(vid, &msg.payload.state);
 		break;
 	case VHOST_USER_SEND_RARP:
-		user_send_rarp(ctx, &msg);
+		user_send_rarp(vid, &msg);
 		break;
 
 	default:
@@ -446,50 +425,222 @@ vserver_message_handler(int connfd, void *dat, int *remove)
 	}
 }
 
-/**
- * Creates and initialise the vhost server.
- */
-int
-rte_vhost_driver_register(const char *path)
+static int
+create_unix_socket(const char *path, struct sockaddr_un *un, bool is_server)
 {
-	struct vhost_server *vserver;
+	int fd;
 
-	pthread_mutex_lock(&g_vhost_server.server_mutex);
+	fd = socket(AF_UNIX, SOCK_STREAM, 0);
+	if (fd < 0)
+		return -1;
+	RTE_LOG(INFO, VHOST_CONFIG, "vhost-user %s: socket created, fd: %d\n",
+		is_server ? "server" : "client", fd);
 
-	if (g_vhost_server.vserver_cnt == MAX_VHOST_SERVER) {
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"error: the number of servers reaches maximum\n");
-		pthread_mutex_unlock(&g_vhost_server.server_mutex);
+	memset(un, 0, sizeof(*un));
+	un->sun_family = AF_UNIX;
+	strncpy(un->sun_path, path, sizeof(un->sun_path));
+
+	return fd;
+}
+
+static int
+vhost_user_create_server(struct vhost_user_socket *vsocket)
+{
+	int fd;
+	int ret;
+	struct sockaddr_un un;
+	const char *path = vsocket->path;
+
+	fd = create_unix_socket(path, &un, vsocket->is_server);
+	if (fd < 0)
 		return -1;
+
+	ret = bind(fd, (struct sockaddr *)&un, sizeof(un));
+	if (ret < 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"failed to bind to %s: %s; remove it and try again\n",
+			path, strerror(errno));
+		goto err;
 	}
+	RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path);
 
-	vserver = calloc(sizeof(struct vhost_server), 1);
-	if (vserver == NULL) {
-		pthread_mutex_unlock(&g_vhost_server.server_mutex);
-		return -1;
+	ret = listen(fd, MAX_VIRTIO_BACKLOG);
+	if (ret < 0)
+		goto err;
+
+	vsocket->listenfd = fd;
+	fdset_add(&vhost_user.fdset, fd, vhost_user_server_new_connection,
+		  NULL, vsocket);
+
+	return 0;
+
+err:
+	close(fd);
+	return -1;
+}
+
+struct vhost_user_reconnect {
+	struct sockaddr_un un;
+	int fd;
+	struct vhost_user_socket *vsocket;
+
+	TAILQ_ENTRY(vhost_user_reconnect) next;
+};
+
+TAILQ_HEAD(vhost_user_reconnect_tailq_list, vhost_user_reconnect);
+struct vhost_user_reconnect_list {
+	struct vhost_user_reconnect_tailq_list head;
+	pthread_mutex_t mutex;
+};
+
+static struct vhost_user_reconnect_list reconn_list;
+static pthread_t reconn_tid;
+
+static void *
+vhost_user_client_reconnect(void *arg __rte_unused)
+{
+	struct vhost_user_reconnect *reconn, *next;
+
+	while (1) {
+		pthread_mutex_lock(&reconn_list.mutex);
+
+		/*
+		 * An equal implementation of TAILQ_FOREACH_SAFE,
+		 * which does not exist on all platforms.
+		 */
+		for (reconn = TAILQ_FIRST(&reconn_list.head);
+		     reconn != NULL; reconn = next) {
+			next = TAILQ_NEXT(reconn, next);
+
+			if (connect(reconn->fd, (struct sockaddr *)&reconn->un,
+				    sizeof(reconn->un)) < 0)
+				continue;
+
+			RTE_LOG(INFO, VHOST_CONFIG,
+				"%s: connected\n", reconn->vsocket->path);
+			vhost_user_add_connection(reconn->fd, reconn->vsocket);
+			TAILQ_REMOVE(&reconn_list.head, reconn, next);
+			free(reconn);
+		}
+
+		pthread_mutex_unlock(&reconn_list.mutex);
+		sleep(1);
 	}
 
-	vserver->listenfd = uds_socket(path);
-	if (vserver->listenfd < 0) {
-		free(vserver);
-		pthread_mutex_unlock(&g_vhost_server.server_mutex);
+	return NULL;
+}
+
+static int
+vhost_user_reconnect_init(void)
+{
+	int ret;
+
+	pthread_mutex_init(&reconn_list.mutex, NULL);
+	TAILQ_INIT(&reconn_list.head);
+
+	ret = pthread_create(&reconn_tid, NULL,
+			     vhost_user_client_reconnect, NULL);
+	if (ret < 0)
+		RTE_LOG(ERR, VHOST_CONFIG, "failed to create reconnect thread");
+
+	return ret;
+}
+
+static int
+vhost_user_create_client(struct vhost_user_socket *vsocket)
+{
+	int fd;
+	int ret;
+	struct sockaddr_un un;
+	const char *path = vsocket->path;
+	struct vhost_user_reconnect *reconn;
+
+	fd = create_unix_socket(path, &un, vsocket->is_server);
+	if (fd < 0)
 		return -1;
+
+	ret = connect(fd, (struct sockaddr *)&un, sizeof(un));
+	if (ret == 0) {
+		vhost_user_add_connection(fd, vsocket);
+		return 0;
 	}
 
-	vserver->path = strdup(path);
+	RTE_LOG(ERR, VHOST_CONFIG,
+		"failed to connect to %s: %s\n",
+		path, strerror(errno));
 
-	fdset_add(&g_vhost_server.fdset, vserver->listenfd,
-		vserver_new_vq_conn, NULL, vserver);
+	if (!vsocket->reconnect) {
+		close(fd);
+		return -1;
+	}
 
-	g_vhost_server.server[g_vhost_server.vserver_cnt++] = vserver;
-	pthread_mutex_unlock(&g_vhost_server.server_mutex);
+	RTE_LOG(ERR, VHOST_CONFIG, "%s: reconnecting...\n", path);
+	reconn = malloc(sizeof(*reconn));
+	reconn->un = un;
+	reconn->fd = fd;
+	reconn->vsocket = vsocket;
+	pthread_mutex_lock(&reconn_list.mutex);
+	TAILQ_INSERT_TAIL(&reconn_list.head, reconn, next);
+	pthread_mutex_unlock(&reconn_list.mutex);
 
 	return 0;
 }
 
+/*
+ * Register a new vhost-user socket; here we could act as server
+ * (the default case), or client (when RTE_VHOST_USER_CLIENT) flag
+ * is set.
+ */
+int
+rte_vhost_driver_register(const char *path, uint64_t flags)
+{
+	int ret = -1;
+	struct vhost_user_socket *vsocket;
+
+	if (!path)
+		return -1;
+
+	pthread_mutex_lock(&vhost_user.mutex);
+
+	if (vhost_user.vsocket_cnt == MAX_VHOST_SOCKET) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"error: the number of vhost sockets reaches maximum\n");
+		goto out;
+	}
+
+	vsocket = malloc(sizeof(struct vhost_user_socket));
+	if (!vsocket)
+		goto out;
+	memset(vsocket, 0, sizeof(struct vhost_user_socket));
+	vsocket->path = strdup(path);
+
+	if ((flags & RTE_VHOST_USER_CLIENT) != 0) {
+		vsocket->reconnect = !(flags & RTE_VHOST_USER_NO_RECONNECT);
+		if (vsocket->reconnect && reconn_tid == 0) {
+			if (vhost_user_reconnect_init() < 0)
+				goto out;
+		}
+		ret = vhost_user_create_client(vsocket);
+	} else {
+		vsocket->is_server = true;
+		ret = vhost_user_create_server(vsocket);
+	}
+	if (ret < 0) {
+		free(vsocket->path);
+		free(vsocket);
+		goto out;
+	}
+
+	vhost_user.vsockets[vhost_user.vsocket_cnt++] = vsocket;
+
+out:
+	pthread_mutex_unlock(&vhost_user.mutex);
+
+	return ret;
+}
 
 /**
- * Unregister the specified vhost server
+ * Unregister the specified vhost socket
  */
 int
 rte_vhost_driver_unregister(const char *path)
@@ -497,28 +648,29 @@ rte_vhost_driver_unregister(const char *path)
 	int i;
 	int count;
 
-	pthread_mutex_lock(&g_vhost_server.server_mutex);
-
-	for (i = 0; i < g_vhost_server.vserver_cnt; i++) {
-		if (!strcmp(g_vhost_server.server[i]->path, path)) {
-			fdset_del(&g_vhost_server.fdset,
-				g_vhost_server.server[i]->listenfd);
+	pthread_mutex_lock(&vhost_user.mutex);
 
-			close(g_vhost_server.server[i]->listenfd);
-			free(g_vhost_server.server[i]->path);
-			free(g_vhost_server.server[i]);
+	for (i = 0; i < vhost_user.vsocket_cnt; i++) {
+		if (!strcmp(vhost_user.vsockets[i]->path, path)) {
+			if (vhost_user.vsockets[i]->is_server) {
+				fdset_del(&vhost_user.fdset,
+					vhost_user.vsockets[i]->listenfd);
+				close(vhost_user.vsockets[i]->listenfd);
+				unlink(path);
+			}
 
-			unlink(path);
+			free(vhost_user.vsockets[i]->path);
+			free(vhost_user.vsockets[i]);
 
-			count = --g_vhost_server.vserver_cnt;
-			g_vhost_server.server[i] = g_vhost_server.server[count];
-			g_vhost_server.server[count] = NULL;
-			pthread_mutex_unlock(&g_vhost_server.server_mutex);
+			count = --vhost_user.vsocket_cnt;
+			vhost_user.vsockets[i] = vhost_user.vsockets[count];
+			vhost_user.vsockets[count] = NULL;
+			pthread_mutex_unlock(&vhost_user.mutex);
 
 			return 0;
 		}
 	}
-	pthread_mutex_unlock(&g_vhost_server.server_mutex);
+	pthread_mutex_unlock(&vhost_user.mutex);
 
 	return -1;
 }
@@ -526,6 +678,6 @@ rte_vhost_driver_unregister(const char *path)
 int
 rte_vhost_driver_session_start(void)
 {
-	fdset_event_dispatch(&g_vhost_server.fdset);
+	fdset_event_dispatch(&vhost_user.fdset);
 	return 0;
 }
diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.h b/lib/librte_vhost/vhost_user/vhost-net-user.h
index e3bb4138..f5332396 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.h
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.h
@@ -38,15 +38,11 @@
 #include <linux/vhost.h>
 
 #include "rte_virtio_net.h"
-#include "fd_man.h"
-
-struct vhost_server {
-	char *path; /**< The path the uds is bind to. */
-	int listenfd;     /**< The listener sockfd. */
-};
 
 /* refer to hw/virtio/vhost-user.c */
 
+#define VHOST_MEMORY_MAX_NREGIONS 8
+
 typedef enum VhostUserRequest {
 	VHOST_USER_NONE = 0,
 	VHOST_USER_GET_FEATURES = 1,
diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c b/lib/librte_vhost/vhost_user/virtio-net-user.c
index f5248bc4..e7c43479 100644
--- a/lib/librte_vhost/vhost_user/virtio-net-user.c
+++ b/lib/librte_vhost/vhost_user/virtio-net-user.c
@@ -43,7 +43,6 @@
 #include <rte_common.h>
 #include <rte_log.h>
 
-#include "virtio-net.h"
 #include "virtio-net-user.h"
 #include "vhost-net-user.h"
 #include "vhost-net.h"
@@ -64,9 +63,10 @@ static uint64_t
 get_blk_size(int fd)
 {
 	struct stat stat;
+	int ret;
 
-	fstat(fd, &stat);
-	return (uint64_t)stat.st_blksize;
+	ret = fstat(fd, &stat);
+	return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize;
 }
 
 static void
@@ -96,10 +96,14 @@ vhost_backend_cleanup(struct virtio_net *dev)
 		free(dev->mem);
 		dev->mem = NULL;
 	}
+	if (dev->log_addr) {
+		munmap((void *)(uintptr_t)dev->log_addr, dev->log_size);
+		dev->log_addr = 0;
+	}
 }
 
 int
-user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
+user_set_mem_table(int vid, struct VhostUserMsg *pmsg)
 {
 	struct VhostUserMemory memory = pmsg->payload.memory;
 	struct virtio_memory_regions *pregion;
@@ -110,13 +114,15 @@ user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
 	uint64_t alignment;
 
 	/* unmap old memory regions one by one*/
-	dev = get_device(ctx);
+	dev = get_device(vid);
 	if (dev == NULL)
 		return -1;
 
 	/* Remove from the data plane. */
-	if (dev->flags & VIRTIO_DEV_RUNNING)
-		notify_ops->destroy_device(dev);
+	if (dev->flags & VIRTIO_DEV_RUNNING) {
+		dev->flags &= ~VIRTIO_DEV_RUNNING;
+		notify_ops->destroy_device(vid);
+	}
 
 	if (dev->mem) {
 		free_mem_region(dev);
@@ -130,8 +136,8 @@ user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
 		sizeof(struct orig_region_map) * memory.nregions);
 	if (dev->mem == NULL) {
 		RTE_LOG(ERR, VHOST_CONFIG,
-			"(%"PRIu64") Failed to allocate memory for dev->mem\n",
-			dev->device_fh);
+			"(%d) failed to allocate memory for dev->mem\n",
+			dev->vid);
 		return -1;
 	}
 	dev->mem->nregions = memory.nregions;
@@ -162,6 +168,11 @@ user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
 		 * aligned.
 		 */
 		alignment = get_blk_size(pmsg->fds[idx]);
+		if (alignment == (uint64_t)-1) {
+			RTE_LOG(ERR, VHOST_CONFIG,
+				"couldn't get hugepage size through fstat\n");
+			goto err_mmap;
+		}
 		mapped_size = RTE_ALIGN_CEIL(mapped_size, alignment);
 
 		mapped_address = (uint64_t)(uintptr_t)mmap(NULL,
@@ -252,7 +263,7 @@ virtio_is_ready(struct virtio_net *dev)
 }
 
 void
-user_set_vring_call(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
+user_set_vring_call(int vid, struct VhostUserMsg *pmsg)
 {
 	struct vhost_vring_file file;
 
@@ -263,7 +274,7 @@ user_set_vring_call(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
 		file.fd = pmsg->fds[0];
 	RTE_LOG(INFO, VHOST_CONFIG,
 		"vring call idx:%d file:%d\n", file.index, file.fd);
-	vhost_set_vring_call(ctx, &file);
+	vhost_set_vring_call(vid, &file);
 }
 
 
@@ -272,10 +283,13 @@ user_set_vring_call(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
  *  device is ready for packet processing.
  */
 void
-user_set_vring_kick(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
+user_set_vring_kick(int vid, struct VhostUserMsg *pmsg)
 {
 	struct vhost_vring_file file;
-	struct virtio_net *dev = get_device(ctx);
+	struct virtio_net *dev = get_device(vid);
+
+	if (!dev)
+		return;
 
 	file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
 	if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
@@ -284,30 +298,32 @@ user_set_vring_kick(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
 		file.fd = pmsg->fds[0];
 	RTE_LOG(INFO, VHOST_CONFIG,
 		"vring kick idx:%d file:%d\n", file.index, file.fd);
-	vhost_set_vring_kick(ctx, &file);
+	vhost_set_vring_kick(vid, &file);
 
-	if (virtio_is_ready(dev) &&
-		!(dev->flags & VIRTIO_DEV_RUNNING))
-			notify_ops->new_device(dev);
+	if (virtio_is_ready(dev) && !(dev->flags & VIRTIO_DEV_RUNNING)) {
+		if (notify_ops->new_device(vid) == 0)
+			dev->flags |= VIRTIO_DEV_RUNNING;
+	}
 }
 
 /*
  * when virtio is stopped, qemu will send us the GET_VRING_BASE message.
  */
 int
-user_get_vring_base(struct vhost_device_ctx ctx,
-	struct vhost_vring_state *state)
+user_get_vring_base(int vid, struct vhost_vring_state *state)
 {
-	struct virtio_net *dev = get_device(ctx);
+	struct virtio_net *dev = get_device(vid);
 
 	if (dev == NULL)
 		return -1;
 	/* We have to stop the queue (virtio) if it is running. */
-	if (dev->flags & VIRTIO_DEV_RUNNING)
-		notify_ops->destroy_device(dev);
+	if (dev->flags & VIRTIO_DEV_RUNNING) {
+		dev->flags &= ~VIRTIO_DEV_RUNNING;
+		notify_ops->destroy_device(vid);
+	}
 
 	/* Here we are safe to get the last used index */
-	vhost_get_vring_base(ctx, state->index, state);
+	vhost_get_vring_base(vid, state->index, state);
 
 	RTE_LOG(INFO, VHOST_CONFIG,
 		"vring base idx:%d file:%d\n", state->index, state->num);
@@ -329,19 +345,21 @@ user_get_vring_base(struct vhost_device_ctx ctx,
  * enable the virtio queue pair.
  */
 int
-user_set_vring_enable(struct vhost_device_ctx ctx,
-		      struct vhost_vring_state *state)
+user_set_vring_enable(int vid, struct vhost_vring_state *state)
 {
-	struct virtio_net *dev = get_device(ctx);
+	struct virtio_net *dev;
 	int enable = (int)state->num;
 
+	dev = get_device(vid);
+	if (dev == NULL)
+		return -1;
+
 	RTE_LOG(INFO, VHOST_CONFIG,
 		"set queue enable: %d to qp idx: %d\n",
 		enable, state->index);
 
-	if (notify_ops->vring_state_changed) {
-		notify_ops->vring_state_changed(dev, state->index, enable);
-	}
+	if (notify_ops->vring_state_changed)
+		notify_ops->vring_state_changed(vid, state->index, enable);
 
 	dev->virtqueue[state->index]->enabled = enable;
 
@@ -349,12 +367,11 @@ user_set_vring_enable(struct vhost_device_ctx ctx,
 }
 
 void
-user_set_protocol_features(struct vhost_device_ctx ctx,
-			   uint64_t protocol_features)
+user_set_protocol_features(int vid, uint64_t protocol_features)
 {
 	struct virtio_net *dev;
 
-	dev = get_device(ctx);
+	dev = get_device(vid);
 	if (dev == NULL || protocol_features & ~VHOST_USER_PROTOCOL_FEATURES)
 		return;
 
@@ -362,15 +379,14 @@ user_set_protocol_features(struct vhost_device_ctx ctx,
 }
 
 int
-user_set_log_base(struct vhost_device_ctx ctx,
-		 struct VhostUserMsg *msg)
+user_set_log_base(int vid, struct VhostUserMsg *msg)
 {
 	struct virtio_net *dev;
 	int fd = msg->fds[0];
 	uint64_t size, off;
 	void *addr;
 
-	dev = get_device(ctx);
+	dev = get_device(vid);
 	if (!dev)
 		return -1;
 
@@ -397,13 +413,21 @@ user_set_log_base(struct vhost_device_ctx ctx,
 	 * fail when offset is not page size aligned.
 	 */
 	addr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+	close(fd);
 	if (addr == MAP_FAILED) {
 		RTE_LOG(ERR, VHOST_CONFIG, "mmap log base failed!\n");
 		return -1;
 	}
 
-	/* TODO: unmap on stop */
-	dev->log_base = (uint64_t)(uintptr_t)addr + off;
+	/*
+	 * Free previously mapped log memory on occasionally
+	 * multiple VHOST_USER_SET_LOG_BASE.
+	 */
+	if (dev->log_addr) {
+		munmap((void *)(uintptr_t)dev->log_addr, dev->log_size);
+	}
+	dev->log_addr = (uint64_t)(uintptr_t)addr;
+	dev->log_base = dev->log_addr + off;
 	dev->log_size = size;
 
 	return 0;
@@ -418,12 +442,12 @@ user_set_log_base(struct vhost_device_ctx ctx,
  * a flag 'broadcast_rarp' to let rte_vhost_dequeue_burst() inject it.
  */
 int
-user_send_rarp(struct vhost_device_ctx ctx, struct VhostUserMsg *msg)
+user_send_rarp(int vid, struct VhostUserMsg *msg)
 {
 	struct virtio_net *dev;
 	uint8_t *mac = (uint8_t *)&msg->payload.u64;
 
-	dev = get_device(ctx);
+	dev = get_device(vid);
 	if (!dev)
 		return -1;
 
diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.h b/lib/librte_vhost/vhost_user/virtio-net-user.h
index cefec162..e1b967b8 100644
--- a/lib/librte_vhost/vhost_user/virtio-net-user.h
+++ b/lib/librte_vhost/vhost_user/virtio-net-user.h
@@ -45,20 +45,18 @@
 					 (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |\
 					 (1ULL << VHOST_USER_PROTOCOL_F_RARP))
 
-int user_set_mem_table(struct vhost_device_ctx, struct VhostUserMsg *);
+int user_set_mem_table(int, struct VhostUserMsg *);
 
-void user_set_vring_call(struct vhost_device_ctx, struct VhostUserMsg *);
+void user_set_vring_call(int, struct VhostUserMsg *);
 
-void user_set_vring_kick(struct vhost_device_ctx, struct VhostUserMsg *);
+void user_set_vring_kick(int, struct VhostUserMsg *);
 
-void user_set_protocol_features(struct vhost_device_ctx ctx,
-				uint64_t protocol_features);
-int user_set_log_base(struct vhost_device_ctx ctx, struct VhostUserMsg *);
-int user_send_rarp(struct vhost_device_ctx ctx, struct VhostUserMsg *);
+void user_set_protocol_features(int vid, uint64_t protocol_features);
+int user_set_log_base(int vid, struct VhostUserMsg *);
+int user_send_rarp(int vid, struct VhostUserMsg *);
 
-int user_get_vring_base(struct vhost_device_ctx, struct vhost_vring_state *);
+int user_get_vring_base(int, struct vhost_vring_state *);
 
-int user_set_vring_enable(struct vhost_device_ctx ctx,
-			  struct vhost_vring_state *state);
+int user_set_vring_enable(int vid, struct vhost_vring_state *state);
 
 #endif
diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
index d870ad97..1785695b 100644
--- a/lib/librte_vhost/virtio-net.c
+++ b/lib/librte_vhost/virtio-net.c
@@ -53,7 +53,6 @@
 #include <rte_virtio_net.h>
 
 #include "vhost-net.h"
-#include "virtio-net.h"
 
 #define MAX_VHOST_DEVICE	1024
 static struct virtio_net *vhost_devices[MAX_VHOST_DEVICE];
@@ -108,15 +107,14 @@ qva_to_vva(struct virtio_net *dev, uint64_t qemu_va)
 	return vhost_va;
 }
 
-
 struct virtio_net *
-get_device(struct vhost_device_ctx ctx)
+get_device(int vid)
 {
-	struct virtio_net *dev = vhost_devices[ctx.fh];
+	struct virtio_net *dev = vhost_devices[vid];
 
 	if (unlikely(!dev)) {
 		RTE_LOG(ERR, VHOST_CONFIG,
-			"(%"PRIu64") device not found.\n", ctx.fh);
+			"(%d) device not found.\n", vid);
 	}
 
 	return dev;
@@ -233,7 +231,7 @@ alloc_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx)
 
 /*
  * Reset some variables in device structure, while keeping few
- * others untouched, such as device_fh, ifname, virt_qp_nb: they
+ * others untouched, such as vid, ifname, virt_qp_nb: they
  * should be same unless the device is removed.
  */
 static void
@@ -255,7 +253,7 @@ reset_device(struct virtio_net *dev)
  * list.
  */
 int
-vhost_new_device(struct vhost_device_ctx ctx)
+vhost_new_device(void)
 {
 	struct virtio_net *dev;
 	int i;
@@ -263,8 +261,7 @@ vhost_new_device(struct vhost_device_ctx ctx)
 	dev = rte_zmalloc(NULL, sizeof(struct virtio_net), 0);
 	if (dev == NULL) {
 		RTE_LOG(ERR, VHOST_CONFIG,
-			"(%"PRIu64") Failed to allocate memory for dev.\n",
-			ctx.fh);
+			"Failed to allocate memory for new dev.\n");
 		return -1;
 	}
 
@@ -279,7 +276,7 @@ vhost_new_device(struct vhost_device_ctx ctx)
 	}
 
 	vhost_devices[i] = dev;
-	dev->device_fh   = i;
+	dev->vid = i;
 
 	return i;
 }
@@ -289,30 +286,31 @@ vhost_new_device(struct vhost_device_ctx ctx)
  * cleanup the device and remove it from device configuration linked list.
  */
 void
-vhost_destroy_device(struct vhost_device_ctx ctx)
+vhost_destroy_device(int vid)
 {
-	struct virtio_net *dev = get_device(ctx);
+	struct virtio_net *dev = get_device(vid);
 
 	if (dev == NULL)
 		return;
 
-	if (dev->flags & VIRTIO_DEV_RUNNING)
-		notify_ops->destroy_device(dev);
+	if (dev->flags & VIRTIO_DEV_RUNNING) {
+		dev->flags &= ~VIRTIO_DEV_RUNNING;
+		notify_ops->destroy_device(vid);
+	}
 
 	cleanup_device(dev, 1);
 	free_device(dev);
 
-	vhost_devices[ctx.fh] = NULL;
+	vhost_devices[vid] = NULL;
 }
 
 void
-vhost_set_ifname(struct vhost_device_ctx ctx,
-	const char *if_name, unsigned int if_len)
+vhost_set_ifname(int vid, const char *if_name, unsigned int if_len)
 {
 	struct virtio_net *dev;
 	unsigned int len;
 
-	dev = get_device(ctx);
+	dev = get_device(vid);
 	if (dev == NULL)
 		return;
 
@@ -320,6 +318,7 @@ vhost_set_ifname(struct vhost_device_ctx ctx,
 		sizeof(dev->ifname) : if_len;
 
 	strncpy(dev->ifname, if_name, len);
+	dev->ifname[sizeof(dev->ifname) - 1] = '\0';
 }
 
 
@@ -329,11 +328,11 @@ vhost_set_ifname(struct vhost_device_ctx ctx,
  * the device hasn't been initialised.
  */
 int
-vhost_set_owner(struct vhost_device_ctx ctx)
+vhost_set_owner(int vid)
 {
 	struct virtio_net *dev;
 
-	dev = get_device(ctx);
+	dev = get_device(vid);
 	if (dev == NULL)
 		return -1;
 
@@ -344,16 +343,18 @@ vhost_set_owner(struct vhost_device_ctx ctx)
  * Called from CUSE IOCTL: VHOST_RESET_OWNER
  */
 int
-vhost_reset_owner(struct vhost_device_ctx ctx)
+vhost_reset_owner(int vid)
 {
 	struct virtio_net *dev;
 
-	dev = get_device(ctx);
+	dev = get_device(vid);
 	if (dev == NULL)
 		return -1;
 
-	if (dev->flags & VIRTIO_DEV_RUNNING)
-		notify_ops->destroy_device(dev);
+	if (dev->flags & VIRTIO_DEV_RUNNING) {
+		dev->flags &= ~VIRTIO_DEV_RUNNING;
+		notify_ops->destroy_device(vid);
+	}
 
 	cleanup_device(dev, 0);
 	reset_device(dev);
@@ -365,11 +366,11 @@ vhost_reset_owner(struct vhost_device_ctx ctx)
  * The features that we support are requested.
  */
 int
-vhost_get_features(struct vhost_device_ctx ctx, uint64_t *pu)
+vhost_get_features(int vid, uint64_t *pu)
 {
 	struct virtio_net *dev;
 
-	dev = get_device(ctx);
+	dev = get_device(vid);
 	if (dev == NULL)
 		return -1;
 
@@ -383,13 +384,11 @@ vhost_get_features(struct vhost_device_ctx ctx, uint64_t *pu)
  * We receive the negotiated features supported by us and the virtio device.
  */
 int
-vhost_set_features(struct vhost_device_ctx ctx, uint64_t *pu)
+vhost_set_features(int vid, uint64_t *pu)
 {
 	struct virtio_net *dev;
-	uint16_t vhost_hlen;
-	uint16_t i;
 
-	dev = get_device(ctx);
+	dev = get_device(vid);
 	if (dev == NULL)
 		return -1;
 	if (*pu & ~VHOST_FEATURES)
@@ -398,23 +397,16 @@ vhost_set_features(struct vhost_device_ctx ctx, uint64_t *pu)
 	dev->features = *pu;
 	if (dev->features &
 		((1 << VIRTIO_NET_F_MRG_RXBUF) | (1ULL << VIRTIO_F_VERSION_1))) {
-		vhost_hlen = sizeof(struct virtio_net_hdr_mrg_rxbuf);
+		dev->vhost_hlen = sizeof(struct virtio_net_hdr_mrg_rxbuf);
 	} else {
-		vhost_hlen = sizeof(struct virtio_net_hdr);
+		dev->vhost_hlen = sizeof(struct virtio_net_hdr);
 	}
 	LOG_DEBUG(VHOST_CONFIG,
-		"(%"PRIu64") Mergeable RX buffers %s, virtio 1 %s\n",
-		dev->device_fh,
+		"(%d) mergeable RX buffers %s, virtio 1 %s\n",
+		dev->vid,
 		(dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)) ? "on" : "off",
 		(dev->features & (1ULL << VIRTIO_F_VERSION_1)) ? "on" : "off");
 
-	for (i = 0; i < dev->virt_qp_nb; i++) {
-		uint16_t base_idx = i * VIRTIO_QNUM;
-
-		dev->virtqueue[base_idx + VIRTIO_RXQ]->vhost_hlen = vhost_hlen;
-		dev->virtqueue[base_idx + VIRTIO_TXQ]->vhost_hlen = vhost_hlen;
-	}
-
 	return 0;
 }
 
@@ -423,12 +415,11 @@ vhost_set_features(struct vhost_device_ctx ctx, uint64_t *pu)
  * The virtio device sends us the size of the descriptor ring.
  */
 int
-vhost_set_vring_num(struct vhost_device_ctx ctx,
-	struct vhost_vring_state *state)
+vhost_set_vring_num(int vid, struct vhost_vring_state *state)
 {
 	struct virtio_net *dev;
 
-	dev = get_device(ctx);
+	dev = get_device(vid);
 	if (dev == NULL)
 		return -1;
 
@@ -509,7 +500,7 @@ numa_realloc(struct virtio_net *dev, int index)
 out:
 	dev->virtqueue[index] = vq;
 	dev->virtqueue[index + 1] = vq + 1;
-	vhost_devices[dev->device_fh] = dev;
+	vhost_devices[dev->vid] = dev;
 
 	return dev;
 }
@@ -527,12 +518,12 @@ numa_realloc(struct virtio_net *dev, int index __rte_unused)
  * This function then converts these to our address space.
  */
 int
-vhost_set_vring_addr(struct vhost_device_ctx ctx, struct vhost_vring_addr *addr)
+vhost_set_vring_addr(int vid, struct vhost_vring_addr *addr)
 {
 	struct virtio_net *dev;
 	struct vhost_virtqueue *vq;
 
-	dev = get_device(ctx);
+	dev = get_device(vid);
 	if ((dev == NULL) || (dev->mem == NULL))
 		return -1;
 
@@ -544,8 +535,8 @@ vhost_set_vring_addr(struct vhost_device_ctx ctx, struct vhost_vring_addr *addr)
 			addr->desc_user_addr);
 	if (vq->desc == 0) {
 		RTE_LOG(ERR, VHOST_CONFIG,
-			"(%"PRIu64") Failed to find desc ring address.\n",
-			dev->device_fh);
+			"(%d) failed to find desc ring address.\n",
+			dev->vid);
 		return -1;
 	}
 
@@ -556,8 +547,8 @@ vhost_set_vring_addr(struct vhost_device_ctx ctx, struct vhost_vring_addr *addr)
 			addr->avail_user_addr);
 	if (vq->avail == 0) {
 		RTE_LOG(ERR, VHOST_CONFIG,
-			"(%"PRIu64") Failed to find avail ring address.\n",
-			dev->device_fh);
+			"(%d) failed to find avail ring address.\n",
+			dev->vid);
 		return -1;
 	}
 
@@ -565,21 +556,29 @@ vhost_set_vring_addr(struct vhost_device_ctx ctx, struct vhost_vring_addr *addr)
 			addr->used_user_addr);
 	if (vq->used == 0) {
 		RTE_LOG(ERR, VHOST_CONFIG,
-			"(%"PRIu64") Failed to find used ring address.\n",
-			dev->device_fh);
+			"(%d) failed to find used ring address.\n",
+			dev->vid);
 		return -1;
 	}
 
+	if (vq->last_used_idx != vq->used->idx) {
+		RTE_LOG(WARNING, VHOST_CONFIG,
+			"last_used_idx (%u) and vq->used->idx (%u) mismatches; "
+			"some packets maybe resent for Tx and dropped for Rx\n",
+			vq->last_used_idx, vq->used->idx);
+		vq->last_used_idx     = vq->used->idx;
+	}
+
 	vq->log_guest_addr = addr->log_guest_addr;
 
-	LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") mapped address desc: %p\n",
-			dev->device_fh, vq->desc);
-	LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") mapped address avail: %p\n",
-			dev->device_fh, vq->avail);
-	LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") mapped address used: %p\n",
-			dev->device_fh, vq->used);
-	LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") log_guest_addr: %"PRIx64"\n",
-			dev->device_fh, vq->log_guest_addr);
+	LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address desc: %p\n",
+			dev->vid, vq->desc);
+	LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address avail: %p\n",
+			dev->vid, vq->avail);
+	LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address used: %p\n",
+			dev->vid, vq->used);
+	LOG_DEBUG(VHOST_CONFIG, "(%d) log_guest_addr: %" PRIx64 "\n",
+			dev->vid, vq->log_guest_addr);
 
 	return 0;
 }
@@ -589,18 +588,16 @@ vhost_set_vring_addr(struct vhost_device_ctx ctx, struct vhost_vring_addr *addr)
  * The virtio device sends us the available ring last used index.
  */
 int
-vhost_set_vring_base(struct vhost_device_ctx ctx,
-	struct vhost_vring_state *state)
+vhost_set_vring_base(int vid, struct vhost_vring_state *state)
 {
 	struct virtio_net *dev;
 
-	dev = get_device(ctx);
+	dev = get_device(vid);
 	if (dev == NULL)
 		return -1;
 
 	/* State->index refers to the queue index. The txq is 1, rxq is 0. */
 	dev->virtqueue[state->index]->last_used_idx = state->num;
-	dev->virtqueue[state->index]->last_used_idx_res = state->num;
 
 	return 0;
 }
@@ -610,12 +607,12 @@ vhost_set_vring_base(struct vhost_device_ctx ctx,
  * We send the virtio device our available ring last used index.
  */
 int
-vhost_get_vring_base(struct vhost_device_ctx ctx, uint32_t index,
+vhost_get_vring_base(int vid, uint32_t index,
 	struct vhost_vring_state *state)
 {
 	struct virtio_net *dev;
 
-	dev = get_device(ctx);
+	dev = get_device(vid);
 	if (dev == NULL)
 		return -1;
 
@@ -633,13 +630,13 @@ vhost_get_vring_base(struct vhost_device_ctx ctx, uint32_t index,
  * copied into our process space.
  */
 int
-vhost_set_vring_call(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
+vhost_set_vring_call(int vid, struct vhost_vring_file *file)
 {
 	struct virtio_net *dev;
 	struct vhost_virtqueue *vq;
 	uint32_t cur_qp_idx = file->index / VIRTIO_QNUM;
 
-	dev = get_device(ctx);
+	dev = get_device(vid);
 	if (dev == NULL)
 		return -1;
 
@@ -670,12 +667,12 @@ vhost_set_vring_call(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
  * This fd gets copied into our process space.
  */
 int
-vhost_set_vring_kick(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
+vhost_set_vring_kick(int vid, struct vhost_vring_file *file)
 {
 	struct virtio_net *dev;
 	struct vhost_virtqueue *vq;
 
-	dev = get_device(ctx);
+	dev = get_device(vid);
 	if (dev == NULL)
 		return -1;
 
@@ -700,11 +697,11 @@ vhost_set_vring_kick(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
  * The device will still exist in the device configuration linked list.
  */
 int
-vhost_set_backend(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
+vhost_set_backend(int vid, struct vhost_vring_file *file)
 {
 	struct virtio_net *dev;
 
-	dev = get_device(ctx);
+	dev = get_device(vid);
 	if (dev == NULL)
 		return -1;
 
@@ -716,20 +713,98 @@ vhost_set_backend(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
 	 * we add the device.
 	 */
 	if (!(dev->flags & VIRTIO_DEV_RUNNING)) {
-		if (((int)dev->virtqueue[VIRTIO_TXQ]->backend != VIRTIO_DEV_STOPPED) &&
-			((int)dev->virtqueue[VIRTIO_RXQ]->backend != VIRTIO_DEV_STOPPED)) {
-			return notify_ops->new_device(dev);
+		if (dev->virtqueue[VIRTIO_TXQ]->backend != VIRTIO_DEV_STOPPED &&
+		    dev->virtqueue[VIRTIO_RXQ]->backend != VIRTIO_DEV_STOPPED) {
+			if (notify_ops->new_device(vid) < 0)
+				return -1;
+			dev->flags |= VIRTIO_DEV_RUNNING;
 		}
-	/* Otherwise we remove it. */
-	} else
-		if (file->fd == VIRTIO_DEV_STOPPED)
-			notify_ops->destroy_device(dev);
+	} else if (file->fd == VIRTIO_DEV_STOPPED) {
+		dev->flags &= ~VIRTIO_DEV_RUNNING;
+		notify_ops->destroy_device(vid);
+	}
+
+	return 0;
+}
+
+int
+rte_vhost_get_numa_node(int vid)
+{
+#ifdef RTE_LIBRTE_VHOST_NUMA
+	struct virtio_net *dev = get_device(vid);
+	int numa_node;
+	int ret;
+
+	if (dev == NULL)
+		return -1;
+
+	ret = get_mempolicy(&numa_node, NULL, 0, dev,
+			    MPOL_F_NODE | MPOL_F_ADDR);
+	if (ret < 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"(%d) failed to query numa node: %d\n", vid, ret);
+		return -1;
+	}
+
+	return numa_node;
+#else
+	RTE_SET_USED(vid);
+	return -1;
+#endif
+}
+
+uint32_t
+rte_vhost_get_queue_num(int vid)
+{
+	struct virtio_net *dev = get_device(vid);
+
+	if (dev == NULL)
+		return 0;
+
+	return dev->virt_qp_nb;
+}
+
+int
+rte_vhost_get_ifname(int vid, char *buf, size_t len)
+{
+	struct virtio_net *dev = get_device(vid);
+
+	if (dev == NULL)
+		return -1;
+
+	len = RTE_MIN(len, sizeof(dev->ifname));
+
+	strncpy(buf, dev->ifname, len);
+	buf[len - 1] = '\0';
+
 	return 0;
 }
 
-int rte_vhost_enable_guest_notification(struct virtio_net *dev,
-	uint16_t queue_id, int enable)
+uint16_t
+rte_vhost_avail_entries(int vid, uint16_t queue_id)
+{
+	struct virtio_net *dev;
+	struct vhost_virtqueue *vq;
+
+	dev = get_device(vid);
+	if (!dev)
+		return 0;
+
+	vq = dev->virtqueue[queue_id];
+	if (!vq->enabled)
+		return 0;
+
+	return *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx;
+}
+
+int
+rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable)
 {
+	struct virtio_net *dev = get_device(vid);
+
+	if (dev == NULL)
+		return -1;
+
 	if (enable) {
 		RTE_LOG(ERR, VHOST_CONFIG,
 			"guest notification isn't supported.\n");
author	Christian Ehrhardt <christian.ehrhardt@canonical.com>	2016-07-06 09:22:35 +0200
committer	Christian Ehrhardt <christian.ehrhardt@canonical.com>	2016-07-06 16:09:40 +0200
commit	8b25d1ad5d2264bdfc2818c7bda74ee2697df6db (patch)
tree	8c3c769777f7e66a2d1ba7dd7651b563cfde370b /lib
parent	97f17497d162afdb82c8704bf097f0fee3724b2e (diff)