Imported Upstream version 16.11

Change-Id: I1944c65ddc88a9ad70f8c0eb6731552b84fbcb77 Signed-off-by: Christian Ehrhardt <christian.ehrhardt@canonical.com>
author: Christian Ehrhardt <christian.ehrhardt@canonical.com> 2016-12-08 14:07:29 +0100
committer: Christian Ehrhardt <christian.ehrhardt@canonical.com> 2016-12-08 14:10:05 +0100
commit: 6b3e017e5d25f15da73f7700f7f2ac553ef1a2e9 (patch)
tree: 1b1fb3f903b2282e261ade69e3c17952b3fd3464 /lib
parent: 32e04ea00cd159613e04acef75e52bfca6eeff2f (diff)
235 files changed, 7717 insertions, 13739 deletions
diff --git a/lib/Makefile b/lib/Makefile
index ca7c02fd..990f23a4 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -61,7 +61,6 @@ DIRS-$(CONFIG_RTE_LIBRTE_PDUMP) += librte_pdump
 
 ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
 DIRS-$(CONFIG_RTE_LIBRTE_KNI) += librte_kni
-DIRS-$(CONFIG_RTE_LIBRTE_IVSHMEM) += librte_ivshmem
 endif
 
 include $(RTE_SDK)/mk/rte.subdir.mk
diff --git a/lib/librte_acl/Makefile b/lib/librte_acl/Makefile
index 9803e9dd..d05be665 100644
--- a/lib/librte_acl/Makefile
+++ b/lib/librte_acl/Makefile
@@ -52,6 +52,8 @@ SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_run_scalar.c
 ifneq ($(filter y,$(CONFIG_RTE_ARCH_ARM) $(CONFIG_RTE_ARCH_ARM64)),)
 SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_run_neon.c
 CFLAGS_acl_run_neon.o += -flax-vector-conversions -Wno-maybe-uninitialized
+else ifeq ($(CONFIG_RTE_ARCH_PPC_64),y)
+SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_run_altivec.c
 else
 SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_run_sse.c
 #check if flag for SSE4.1 is already on, if not set it up manually
diff --git a/lib/librte_acl/acl.h b/lib/librte_acl/acl.h
index 09d67841..6664a55e 100644
--- a/lib/librte_acl/acl.h
+++ b/lib/librte_acl/acl.h
@@ -234,6 +234,10 @@ int
 rte_acl_classify_neon(const struct rte_acl_ctx *ctx, const uint8_t **data,
 	uint32_t *results, uint32_t num, uint32_t categories);
 
+int
+rte_acl_classify_altivec(const struct rte_acl_ctx *ctx, const uint8_t **data,
+	uint32_t *results, uint32_t num, uint32_t categories);
+
 #ifdef __cplusplus
 }
 #endif /* __cplusplus */
diff --git a/lib/librte_acl/acl_run.h b/lib/librte_acl/acl_run.h
index b2fc42c6..024f3931 100644
--- a/lib/librte_acl/acl_run.h
+++ b/lib/librte_acl/acl_run.h
@@ -39,7 +39,9 @@
 
 #define MAX_SEARCHES_AVX16	16
 #define MAX_SEARCHES_SSE8	8
+#define MAX_SEARCHES_ALTIVEC8	8
 #define MAX_SEARCHES_SSE4	4
+#define MAX_SEARCHES_ALTIVEC4	4
 #define MAX_SEARCHES_SCALAR	2
 
 #define GET_NEXT_4BYTES(prm, idx)	\
diff --git a/lib/librte_eal/bsdapp/eal/eal_log.c b/lib/librte_acl/acl_run_altivec.c
index a425f7a8..35235260 100644
--- a/lib/librte_eal/bsdapp/eal/eal_log.c
+++ b/lib/librte_acl/acl_run_altivec.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   Copyright (C) IBM Corporation 2016.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -31,27 +31,17 @@
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include <stdio.h>
-#include <rte_common.h>
-#include <rte_log.h>
-
-#include <eal_private.h>
-
-/*
- * set the log to default function, called during eal init process,
- * once memzones are available.
- */
-int
-rte_eal_log_init(const char *id __rte_unused, int facility __rte_unused)
-{
-	if (rte_eal_common_log_init(stderr) < 0)
-		return -1;
-	return 0;
-}
+#include "acl_run_altivec.h"
 
 int
-rte_eal_log_early_init(void)
+rte_acl_classify_altivec(const struct rte_acl_ctx *ctx, const uint8_t **data,
+	uint32_t *results, uint32_t num, uint32_t categories)
 {
-	rte_openlog_stream(stderr);
-	return 0;
+	if (likely(num >= MAX_SEARCHES_ALTIVEC8))
+		return search_altivec_8(ctx, data, results, num, categories);
+	else if (num >= MAX_SEARCHES_ALTIVEC4)
+		return search_altivec_4(ctx, data, results, num, categories);
+	else
+		return rte_acl_classify_scalar(ctx, data, results, num,
+			categories);
 }
diff --git a/lib/librte_acl/acl_run_altivec.h b/lib/librte_acl/acl_run_altivec.h
new file mode 100644
index 00000000..7d329bcf
--- /dev/null
+++ b/lib/librte_acl/acl_run_altivec.h
@@ -0,0 +1,329 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) IBM Corporation 2016.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of IBM Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "acl_run.h"
+#include "acl_vect.h"
+
+struct _altivec_acl_const {
+	rte_xmm_t xmm_shuffle_input;
+	rte_xmm_t xmm_index_mask;
+	rte_xmm_t xmm_ones_16;
+	rte_xmm_t range_base;
+} altivec_acl_const  __attribute__((aligned(RTE_CACHE_LINE_SIZE))) = {
+	{
+		.u32 = {0x00000000, 0x04040404, 0x08080808, 0x0c0c0c0c}
+	},
+	{
+		.u32 = {RTE_ACL_NODE_INDEX, RTE_ACL_NODE_INDEX,
+		RTE_ACL_NODE_INDEX, RTE_ACL_NODE_INDEX}
+	},
+	{
+		.u16 = {1, 1, 1, 1, 1, 1, 1, 1}
+	},
+	{
+		.u32 = {0xffffff00, 0xffffff04, 0xffffff08, 0xffffff0c}
+	},
+};
+
+/*
+ * Resolve priority for multiple results (altivec version).
+ * This consists comparing the priority of the current traversal with the
+ * running set of results for the packet.
+ * For each result, keep a running array of the result (rule number) and
+ * its priority for each category.
+ */
+static inline void
+resolve_priority_altivec(uint64_t transition, int n,
+	const struct rte_acl_ctx *ctx, struct parms *parms,
+	const struct rte_acl_match_results *p, uint32_t categories)
+{
+	uint32_t x;
+	xmm_t results, priority, results1, priority1;
+	vector bool int selector;
+	xmm_t *saved_results, *saved_priority;
+
+	for (x = 0; x < categories; x += RTE_ACL_RESULTS_MULTIPLIER) {
+
+		saved_results = (xmm_t *)(&parms[n].cmplt->results[x]);
+		saved_priority =
+			(xmm_t *)(&parms[n].cmplt->priority[x]);
+
+		/* get results and priorities for completed trie */
+		results = *(const xmm_t *)&p[transition].results[x];
+		priority = *(const xmm_t *)&p[transition].priority[x];
+
+		/* if this is not the first completed trie */
+		if (parms[n].cmplt->count != ctx->num_tries) {
+
+			/* get running best results and their priorities */
+			results1 = *saved_results;
+			priority1 = *saved_priority;
+
+			/* select results that are highest priority */
+			selector = vec_cmpgt(priority1, priority);
+			results = vec_sel(results, results1, selector);
+			priority = vec_sel(priority, priority1,
+				selector);
+		}
+
+		/* save running best results and their priorities */
+		*saved_results = results;
+		*saved_priority = priority;
+	}
+}
+
+/*
+ * Check for any match in 4 transitions
+ */
+static inline __attribute__((always_inline)) uint32_t
+check_any_match_x4(uint64_t val[])
+{
+	return (val[0] | val[1] | val[2] | val[3]) & RTE_ACL_NODE_MATCH;
+}
+
+static inline __attribute__((always_inline)) void
+acl_match_check_x4(int slot, const struct rte_acl_ctx *ctx, struct parms *parms,
+	struct acl_flow_data *flows, uint64_t transitions[])
+{
+	while (check_any_match_x4(transitions)) {
+		transitions[0] = acl_match_check(transitions[0], slot, ctx,
+			parms, flows, resolve_priority_altivec);
+		transitions[1] = acl_match_check(transitions[1], slot + 1, ctx,
+			parms, flows, resolve_priority_altivec);
+		transitions[2] = acl_match_check(transitions[2], slot + 2, ctx,
+			parms, flows, resolve_priority_altivec);
+		transitions[3] = acl_match_check(transitions[3], slot + 3, ctx,
+			parms, flows, resolve_priority_altivec);
+	}
+}
+
+/*
+ * Process 4 transitions (in 2 XMM registers) in parallel
+ */
+static inline __attribute__((optimize("O2"))) xmm_t
+transition4(xmm_t next_input, const uint64_t *trans,
+	xmm_t *indices1, xmm_t *indices2)
+{
+	xmm_t addr, tr_lo, tr_hi;
+	xmm_t in, node_type, r, t;
+	xmm_t dfa_ofs, quad_ofs;
+	xmm_t *index_mask, *tp;
+	vector bool int dfa_msk;
+	vector signed char zeroes = {};
+	union {
+		uint64_t d64[2];
+		uint32_t d32[4];
+	} v;
+
+	/* Move low 32 into tr_lo and high 32 into tr_hi */
+	tr_lo = (xmm_t){(*indices1)[0], (*indices1)[2],
+			(*indices2)[0], (*indices2)[2]};
+	tr_hi = (xmm_t){(*indices1)[1], (*indices1)[3],
+			(*indices2)[1], (*indices2)[3]};
+
+	 /* Calculate the address (array index) for all 4 transitions. */
+	index_mask = (xmm_t *)&altivec_acl_const.xmm_index_mask.u32;
+	t = vec_xor(*index_mask, *index_mask);
+	in = vec_perm(next_input, (xmm_t){},
+		*(vector unsigned char *)&altivec_acl_const.xmm_shuffle_input);
+
+	/* Calc node type and node addr */
+	node_type = vec_and(vec_nor(*index_mask, *index_mask), tr_lo);
+	addr = vec_and(tr_lo, *index_mask);
+
+	/* mask for DFA type(0) nodes */
+	dfa_msk = vec_cmpeq(node_type, t);
+
+	/* DFA calculations. */
+	r = vec_sr(in, (vector unsigned int){30, 30, 30, 30});
+	tp = (xmm_t *)&altivec_acl_const.range_base.u32;
+	r = vec_add(r, *tp);
+	t = vec_sr(in, (vector unsigned int){24, 24, 24, 24});
+	r = vec_perm(tr_hi, (xmm_t){(uint16_t)0 << 16},
+		(vector unsigned char)r);
+
+	dfa_ofs = vec_sub(t, r);
+
+	/* QUAD/SINGLE caluclations. */
+	t = (xmm_t)vec_cmpgt((vector signed char)in, (vector signed char)tr_hi);
+	t = (xmm_t)vec_sel(
+		vec_sel(
+			(vector signed char)vec_sub(
+				zeroes, (vector signed char)t),
+			(vector signed char)t,
+			vec_cmpgt((vector signed char)t, zeroes)),
+		zeroes,
+		vec_cmpeq((vector signed char)t, zeroes));
+
+	t = (xmm_t)vec_msum((vector signed char)t,
+		(vector unsigned char)t, (xmm_t){});
+	quad_ofs = (xmm_t)vec_msum((vector signed short)t,
+		*(vector signed short *)&altivec_acl_const.xmm_ones_16.u16,
+		(xmm_t){});
+
+	/* blend DFA and QUAD/SINGLE. */
+	t = vec_sel(quad_ofs, dfa_ofs, dfa_msk);
+
+	/* calculate address for next transitions. */
+	addr = vec_add(addr, t);
+
+	v.d64[0] = (uint64_t)trans[addr[0]];
+	v.d64[1] = (uint64_t)trans[addr[1]];
+	*indices1 = (xmm_t){v.d32[0], v.d32[1], v.d32[2], v.d32[3]};
+	v.d64[0] = (uint64_t)trans[addr[2]];
+	v.d64[1] = (uint64_t)trans[addr[3]];
+	*indices2 = (xmm_t){v.d32[0], v.d32[1], v.d32[2], v.d32[3]};
+
+	return vec_sr(next_input,
+		(vector unsigned int){CHAR_BIT, CHAR_BIT, CHAR_BIT, CHAR_BIT});
+}
+
+/*
+ * Execute trie traversal with 8 traversals in parallel
+ */
+static inline int
+search_altivec_8(const struct rte_acl_ctx *ctx, const uint8_t **data,
+	uint32_t *results, uint32_t total_packets, uint32_t categories)
+{
+	int n;
+	struct acl_flow_data flows;
+	uint64_t index_array[MAX_SEARCHES_ALTIVEC8];
+	struct completion cmplt[MAX_SEARCHES_ALTIVEC8];
+	struct parms parms[MAX_SEARCHES_ALTIVEC8];
+	xmm_t input0, input1;
+
+	acl_set_flow(&flows, cmplt, RTE_DIM(cmplt), data, results,
+		total_packets, categories, ctx->trans_table);
+
+	for (n = 0; n < MAX_SEARCHES_ALTIVEC8; n++) {
+		cmplt[n].count = 0;
+		index_array[n] = acl_start_next_trie(&flows, parms, n, ctx);
+	}
+
+	 /* Check for any matches. */
+	acl_match_check_x4(0, ctx, parms, &flows, (uint64_t *)&index_array[0]);
+	acl_match_check_x4(4, ctx, parms, &flows, (uint64_t *)&index_array[4]);
+
+	while (flows.started > 0) {
+
+		/* Gather 4 bytes of input data for each stream. */
+		input0 = (xmm_t){GET_NEXT_4BYTES(parms, 0),
+				GET_NEXT_4BYTES(parms, 1),
+				GET_NEXT_4BYTES(parms, 2),
+				GET_NEXT_4BYTES(parms, 3)};
+
+		input1 = (xmm_t){GET_NEXT_4BYTES(parms, 4),
+				GET_NEXT_4BYTES(parms, 5),
+				GET_NEXT_4BYTES(parms, 6),
+				GET_NEXT_4BYTES(parms, 7)};
+
+		 /* Process the 4 bytes of input on each stream. */
+
+		input0 = transition4(input0, flows.trans,
+			(xmm_t *)&index_array[0], (xmm_t *)&index_array[2]);
+		input1 = transition4(input1, flows.trans,
+			(xmm_t *)&index_array[4], (xmm_t *)&index_array[6]);
+
+		input0 = transition4(input0, flows.trans,
+			(xmm_t *)&index_array[0], (xmm_t *)&index_array[2]);
+		input1 = transition4(input1, flows.trans,
+			(xmm_t *)&index_array[4], (xmm_t *)&index_array[6]);
+
+		input0 = transition4(input0, flows.trans,
+			(xmm_t *)&index_array[0], (xmm_t *)&index_array[2]);
+		input1 = transition4(input1, flows.trans,
+			(xmm_t *)&index_array[4], (xmm_t *)&index_array[6]);
+
+		input0 = transition4(input0, flows.trans,
+			(xmm_t *)&index_array[0], (xmm_t *)&index_array[2]);
+		input1 = transition4(input1, flows.trans,
+			(xmm_t *)&index_array[4], (xmm_t *)&index_array[6]);
+
+		 /* Check for any matches. */
+		acl_match_check_x4(0, ctx, parms, &flows,
+			(uint64_t *)&index_array[0]);
+		acl_match_check_x4(4, ctx, parms, &flows,
+			(uint64_t *)&index_array[4]);
+	}
+
+	return 0;
+}
+
+/*
+ * Execute trie traversal with 4 traversals in parallel
+ */
+static inline int
+search_altivec_4(const struct rte_acl_ctx *ctx, const uint8_t **data,
+	 uint32_t *results, int total_packets, uint32_t categories)
+{
+	int n;
+	struct acl_flow_data flows;
+	uint64_t index_array[MAX_SEARCHES_ALTIVEC4];
+	struct completion cmplt[MAX_SEARCHES_ALTIVEC4];
+	struct parms parms[MAX_SEARCHES_ALTIVEC4];
+	xmm_t input;
+
+	acl_set_flow(&flows, cmplt, RTE_DIM(cmplt), data, results,
+		total_packets, categories, ctx->trans_table);
+
+	for (n = 0; n < MAX_SEARCHES_ALTIVEC4; n++) {
+		cmplt[n].count = 0;
+		index_array[n] = acl_start_next_trie(&flows, parms, n, ctx);
+	}
+
+	/* Check for any matches. */
+	acl_match_check_x4(0, ctx, parms, &flows, index_array);
+
+	while (flows.started > 0) {
+
+		/* Gather 4 bytes of input data for each stream. */
+		input = (xmm_t){GET_NEXT_4BYTES(parms, 0),
+				GET_NEXT_4BYTES(parms, 1),
+				GET_NEXT_4BYTES(parms, 2),
+				GET_NEXT_4BYTES(parms, 3)};
+
+		/* Process the 4 bytes of input on each stream. */
+		input = transition4(input, flows.trans,
+			(xmm_t *)&index_array[0], (xmm_t *)&index_array[2]);
+		input = transition4(input, flows.trans,
+			(xmm_t *)&index_array[0], (xmm_t *)&index_array[2]);
+		input = transition4(input, flows.trans,
+			(xmm_t *)&index_array[0], (xmm_t *)&index_array[2]);
+		input = transition4(input, flows.trans,
+			(xmm_t *)&index_array[0], (xmm_t *)&index_array[2]);
+
+		/* Check for any matches. */
+		acl_match_check_x4(0, ctx, parms, &flows, index_array);
+	}
+
+	return 0;
+}
diff --git a/lib/librte_acl/rte_acl.c b/lib/librte_acl/rte_acl.c
index 4ba9786b..8b7e92ce 100644
--- a/lib/librte_acl/rte_acl.c
+++ b/lib/librte_acl/rte_acl.c
@@ -75,12 +75,23 @@ rte_acl_classify_neon(__rte_unused const struct rte_acl_ctx *ctx,
 	return -ENOTSUP;
 }
 
+int __attribute__ ((weak))
+rte_acl_classify_altivec(__rte_unused const struct rte_acl_ctx *ctx,
+	__rte_unused const uint8_t **data,
+	__rte_unused uint32_t *results,
+	__rte_unused uint32_t num,
+	__rte_unused uint32_t categories)
+{
+	return -ENOTSUP;
+}
+
 static const rte_acl_classify_t classify_fns[] = {
 	[RTE_ACL_CLASSIFY_DEFAULT] = rte_acl_classify_scalar,
 	[RTE_ACL_CLASSIFY_SCALAR] = rte_acl_classify_scalar,
 	[RTE_ACL_CLASSIFY_SSE] = rte_acl_classify_sse,
 	[RTE_ACL_CLASSIFY_AVX2] = rte_acl_classify_avx2,
 	[RTE_ACL_CLASSIFY_NEON] = rte_acl_classify_neon,
+	[RTE_ACL_CLASSIFY_ALTIVEC] = rte_acl_classify_altivec,
 };
 
 /* by default, use always available scalar code path. */
@@ -119,6 +130,8 @@ rte_acl_init(void)
 #elif defined(RTE_ARCH_ARM)
 	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON))
 		alg =  RTE_ACL_CLASSIFY_NEON;
+#elif defined(RTE_ARCH_PPC_64)
+	alg = RTE_ACL_CLASSIFY_ALTIVEC;
 #else
 #ifdef CC_AVX2_SUPPORT
 	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
diff --git a/lib/librte_acl/rte_acl.h b/lib/librte_acl/rte_acl.h
index 0979a098..caa91f7e 100644
--- a/lib/librte_acl/rte_acl.h
+++ b/lib/librte_acl/rte_acl.h
@@ -144,7 +144,7 @@ struct rte_acl_rule_data {
 	struct rte_acl_field field[fld_num];         \
 }
 
-RTE_ACL_RULE_DEF(rte_acl_rule, 0);
+RTE_ACL_RULE_DEF(rte_acl_rule,);
 
 #define	RTE_ACL_RULE_SZ(fld_num)	\
 	(sizeof(struct rte_acl_rule) + sizeof(struct rte_acl_field) * (fld_num))
@@ -271,6 +271,7 @@ enum rte_acl_classify_alg {
 	RTE_ACL_CLASSIFY_SSE = 2,     /**< requires SSE4.1 support. */
 	RTE_ACL_CLASSIFY_AVX2 = 3,    /**< requires AVX2 support. */
 	RTE_ACL_CLASSIFY_NEON = 4,    /**< requires NEON support. */
+	RTE_ACL_CLASSIFY_ALTIVEC = 5,    /**< requires ALTIVEC support. */
 	RTE_ACL_CLASSIFY_NUM          /* should always be the last one. */
 };
 
diff --git a/lib/librte_cfgfile/rte_cfgfile.h b/lib/librte_cfgfile/rte_cfgfile.h
index f649836c..b40e6a13 100644
--- a/lib/librte_cfgfile/rte_cfgfile.h
+++ b/lib/librte_cfgfile/rte_cfgfile.h
@@ -34,6 +34,8 @@
 #ifndef __INCLUDE_RTE_CFGFILE_H__
 #define __INCLUDE_RTE_CFGFILE_H__
 
+#include <stddef.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -86,7 +88,7 @@ struct rte_cfgfile *rte_cfgfile_load(const char *filename, int flags);
 * @param length
 *   Maximum section name length
 * @return
-*   0 on success, error code otherwise
+*   Number of sections
 */
 int rte_cfgfile_num_sections(struct rte_cfgfile *cfg, const char *sec_name,
 	size_t length);
@@ -100,13 +102,13 @@ int rte_cfgfile_num_sections(struct rte_cfgfile *cfg, const char *sec_name,
 * @param cfg
 *   Config file
 * @param sections
-*   Array containing section names after successful invocation. Each elemen
+*   Array containing section names after successful invocation. Each element
 *   of this array should be preallocated by the user with at least
 *   CFG_NAME_LEN characters.
 * @param max_sections
 *   Maximum number of section names to be stored in sections array
 * @return
-*   0 on success, error code otherwise
+*   Number of populated sections names
 */
 int rte_cfgfile_sections(struct rte_cfgfile *cfg, char *sections[],
 	int max_sections);
@@ -134,12 +136,13 @@ int rte_cfgfile_has_section(struct rte_cfgfile *cfg, const char *sectionname);
 * @param sectionname
 *   Section name
 * @return
-*   Number of entries in section
+*   Number of entries in section on success, -1 otherwise
 */
 int rte_cfgfile_section_num_entries(struct rte_cfgfile *cfg,
 	const char *sectionname);
 
-/** Get section entries as key-value pairs
+/**
+* Get section entries as key-value pairs
 *
 * If multiple sections have the given name this function operates on the
 * first one.
@@ -154,14 +157,15 @@ int rte_cfgfile_section_num_entries(struct rte_cfgfile *cfg,
 * @param max_entries
 *   Maximum number of section entries to be stored in entries array
 * @return
-*   0 on success, error code otherwise
+*   Number of entries populated on success, -1 otherwise
 */
 int rte_cfgfile_section_entries(struct rte_cfgfile *cfg,
 	const char *sectionname,
 	struct rte_cfgfile_entry *entries,
 	int max_entries);
 
-/** Get section entries as key-value pairs
+/**
+* Get section entries as key-value pairs
 *
 * The index of a section is the same as the index of its name in the
 * result of rte_cfgfile_sections. This API can be used when there are
@@ -180,7 +184,7 @@ int rte_cfgfile_section_entries(struct rte_cfgfile *cfg,
 * @param max_entries
 *   Maximum number of section entries to be stored in entries array
 * @return
-*   Number of entries populated on success, negative error code otherwise
+*   Number of entries populated on success, -1 otherwise
 */
 int rte_cfgfile_section_entries_by_index(struct rte_cfgfile *cfg,
 	int index,
@@ -188,7 +192,8 @@ int rte_cfgfile_section_entries_by_index(struct rte_cfgfile *cfg,
 	struct rte_cfgfile_entry *entries,
 	int max_entries);
 
-/** Get value of the named entry in named config file section
+/**
+* Get value of the named entry in named config file section
 *
 * If multiple sections have the given name this function operates on the
 * first one.
@@ -200,13 +205,14 @@ int rte_cfgfile_section_entries_by_index(struct rte_cfgfile *cfg,
 * @param entryname
 *   Entry name
 * @return
-*   Entry value
+*   Entry value on success, NULL otherwise
 */
 const char *rte_cfgfile_get_entry(struct rte_cfgfile *cfg,
 	const char *sectionname,
 	const char *entryname);
 
-/** Check if given entry exists in named config file section
+/**
+* Check if given entry exists in named config file section
 *
 * If multiple sections have the given name this function operates on the
 * first one.
@@ -223,12 +229,13 @@ const char *rte_cfgfile_get_entry(struct rte_cfgfile *cfg,
 int rte_cfgfile_has_entry(struct rte_cfgfile *cfg, const char *sectionname,
 	const char *entryname);
 
-/** Close config file
+/**
+* Close config file
 *
 * @param cfg
 *   Config file
 * @return
-*   0 on success, error code otherwise
+*   0 on success, -1 otherwise
 */
 int rte_cfgfile_close(struct rte_cfgfile *cfg);
 
diff --git a/lib/librte_cmdline/cmdline.h b/lib/librte_cmdline/cmdline.h
index 2578ca81..65d73b01 100644
--- a/lib/librte_cmdline/cmdline.h
+++ b/lib/librte_cmdline/cmdline.h
@@ -63,6 +63,7 @@
 
 #include <termios.h>
 #include <cmdline_rdline.h>
+#include <cmdline_parse.h>
 
 /**
  * @file
diff --git a/lib/librte_cmdline/cmdline_parse_portlist.h b/lib/librte_cmdline/cmdline_parse_portlist.h
index 73d70e05..058df3ee 100644
--- a/lib/librte_cmdline/cmdline_parse_portlist.h
+++ b/lib/librte_cmdline/cmdline_parse_portlist.h
@@ -61,6 +61,7 @@
 #ifndef _PARSE_PORTLIST_H_
 #define _PARSE_PORTLIST_H_
 
+#include <stdint.h>
 #include <cmdline_parse.h>
 
 #ifdef __cplusplus
diff --git a/lib/librte_cmdline/cmdline_socket.h b/lib/librte_cmdline/cmdline_socket.h
index 8cc2dfbc..aa6068e7 100644
--- a/lib/librte_cmdline/cmdline_socket.h
+++ b/lib/librte_cmdline/cmdline_socket.h
@@ -61,6 +61,9 @@
 #ifndef _CMDLINE_SOCKET_H_
 #define _CMDLINE_SOCKET_H_
 
+#include <cmdline_parse.h>
+#include <cmdline.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
diff --git a/lib/librte_cryptodev/Makefile b/lib/librte_cryptodev/Makefile
index 314a0466..aebf5d9f 100644
--- a/lib/librte_cryptodev/Makefile
+++ b/lib/librte_cryptodev/Makefile
@@ -34,7 +34,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
 LIB = librte_cryptodev.a
 
 # library version
-LIBABIVER := 1
+LIBABIVER := 2
 
 # build flags
 CFLAGS += -O3
diff --git a/lib/librte_cryptodev/rte_crypto.h b/lib/librte_cryptodev/rte_crypto.h
index 5bc3eaa7..90195188 100644
--- a/lib/librte_cryptodev/rte_crypto.h
+++ b/lib/librte_cryptodev/rte_crypto.h
@@ -48,6 +48,7 @@ extern "C" {
 #include <rte_mbuf.h>
 #include <rte_memory.h>
 #include <rte_mempool.h>
+#include <rte_common.h>
 
 #include "rte_crypto_sym.h"
 
@@ -111,6 +112,7 @@ struct rte_crypto_op {
 	void *opaque_data;
 	/**< Opaque pointer for user data */
 
+	RTE_STD_C11
 	union {
 		struct rte_crypto_sym_op *sym;
 		/**< Symmetric operation parameters */
diff --git a/lib/librte_cryptodev/rte_crypto_sym.h b/lib/librte_cryptodev/rte_crypto_sym.h
index d9bd8210..d3d38e4f 100644
--- a/lib/librte_cryptodev/rte_crypto_sym.h
+++ b/lib/librte_cryptodev/rte_crypto_sym.h
@@ -51,6 +51,7 @@ extern "C" {
 #include <rte_mbuf.h>
 #include <rte_memory.h>
 #include <rte_mempool.h>
+#include <rte_common.h>
 
 
 /** Symmetric Cipher Algorithms */
@@ -83,11 +84,11 @@ enum rte_crypto_cipher_algorithm {
 	/**< AES algorithm in F8 mode */
 	RTE_CRYPTO_CIPHER_AES_GCM,
 	/**< AES algorithm in GCM mode. When this cipher algorithm is used the
-	 * *RTE_CRYPTO_AUTH_AES_GCM* element of the
-	 * *rte_crypto_auth_algorithm* enum MUST be used to set up the related
-	 * *rte_crypto_auth_setup_data* structure in the session context or in
-	 * the op_params of the crypto operation structure in the case of a
-	 * session-less crypto operation.
+	 * *RTE_CRYPTO_AUTH_AES_GCM* or *RTE_CRYPTO_AUTH_AES_GMAC* element
+	 * of the *rte_crypto_auth_algorithm* enum MUST be used to set up
+	 * the related *rte_crypto_auth_setup_data* structure in the session
+	 * context or in the op_params of the crypto operation structure
+	 * in the case of a session-less crypto operation.
 	 */
 	RTE_CRYPTO_CIPHER_AES_XTS,
 	/**< AES algorithm in XTS mode */
@@ -96,10 +97,10 @@ enum rte_crypto_cipher_algorithm {
 	/**< (A)RC4 cipher algorithm */
 
 	RTE_CRYPTO_CIPHER_KASUMI_F8,
-	/**< Kasumi algorithm in F8 mode */
+	/**< KASUMI algorithm in F8 mode */
 
 	RTE_CRYPTO_CIPHER_SNOW3G_UEA2,
-	/**< SNOW3G algorithm in UEA2 mode */
+	/**< SNOW 3G algorithm in UEA2 mode */
 
 	RTE_CRYPTO_CIPHER_ZUC_EEA3,
 	/**< ZUC algorithm in EEA3 mode */
@@ -203,7 +204,7 @@ enum rte_crypto_auth_algorithm {
 	/**< AES XCBC algorithm. */
 
 	RTE_CRYPTO_AUTH_KASUMI_F9,
-	/**< Kasumi algorithm in F9 mode. */
+	/**< KASUMI algorithm in F9 mode. */
 
 	RTE_CRYPTO_AUTH_MD5,
 	/**< MD5 algorithm */
@@ -232,7 +233,7 @@ enum rte_crypto_auth_algorithm {
 	/**< HMAC using 512 bit SHA algorithm. */
 
 	RTE_CRYPTO_AUTH_SNOW3G_UIA2,
-	/**< SNOW3G algorithm in UIA2 mode. */
+	/**< SNOW 3G algorithm in UIA2 mode. */
 
 	RTE_CRYPTO_AUTH_ZUC_EIA3,
 	/**< ZUC algorithm in EIA3 mode */
@@ -290,7 +291,7 @@ struct rte_crypto_auth_xform {
 	 * This field must be specified when the hash algorithm is one of the
 	 * following:
 	 *
-	 * - For SNOW3G (@ref RTE_CRYPTO_AUTH_SNOW3G_UIA2), this is the
+	 * - For SNOW 3G (@ref RTE_CRYPTO_AUTH_SNOW3G_UIA2), this is the
 	 *   length of the IV (which should be 16).
 	 *
 	 * - For GCM (@ref RTE_CRYPTO_AUTH_AES_GCM).  In this case, this is
@@ -307,8 +308,8 @@ struct rte_crypto_auth_xform {
 	 * @note
 	 *  For AES-GMAC (@ref RTE_CRYPTO_AUTH_AES_GMAC) mode of operation
 	 *  this field is not used and should be set to 0. Instead the length
-	 *  of the AAD data is specified in the message length to hash field of
-	 *  the rte_crypto_sym_op_data structure.
+	 *  of the AAD data is specified in additional authentication data
+	 *  length field of the rte_crypto_sym_op_data structure
 	 */
 };
 
@@ -333,6 +334,7 @@ struct rte_crypto_sym_xform {
 	/**< next xform in chain */
 	enum rte_crypto_sym_xform_type type
 	; /**< xform type */
+	RTE_STD_C11
 	union {
 		struct rte_crypto_auth_xform auth;
 		/**< Authentication / hash xform */
@@ -364,6 +366,25 @@ struct rte_cryptodev_sym_session;
  * it must have a valid *rte_mbuf* structure attached, via m_src parameter,
  * which contains the source data which the crypto operation is to be performed
  * on.
+ * While the mbuf is in use by a crypto operation no part of the mbuf should be
+ * changed by the application as the device may read or write to any part of the
+ * mbuf. In the case of hardware crypto devices some or all of the mbuf
+ * may be DMAed in and out of the device, so writing over the original data,
+ * though only the part specified by the rte_crypto_sym_op for transformation
+ * will be changed.
+ * Out-of-place (OOP) operation, where the source mbuf is different to the
+ * destination mbuf, is a special case. Data will be copied from m_src to m_dst.
+ * The part copied includes all the parts of the source mbuf that will be
+ * operated on, based on the cipher.data.offset+cipher.data.length and
+ * auth.data.offset+auth.data.length values in the rte_crypto_sym_op. The part
+ * indicated by the cipher parameters will be transformed, any extra data around
+ * this indicated by the auth parameters will be copied unchanged from source to
+ * destination mbuf.
+ * Also in OOP operation the cipher.data.offset and auth.data.offset apply to
+ * both source and destination mbufs. As these offsets are relative to the
+ * data_off parameter in each mbuf this can result in the data written to the
+ * destination buffer being at a different alignment, relative to buffer start,
+ * to the data in the source buffer.
  */
 struct rte_crypto_sym_op {
 	struct rte_mbuf *m_src;	/**< source mbuf */
@@ -371,6 +392,7 @@ struct rte_crypto_sym_op {
 
 	enum rte_crypto_sym_op_sess_type sess_type;
 
+	RTE_STD_C11
 	union {
 		struct rte_cryptodev_sym_session *session;
 		/**< Handle for the initialised session context */
@@ -388,8 +410,9 @@ struct rte_crypto_sym_op {
 			  * this location.
 			  *
 			  * @note
-			  * For Snow3G @ RTE_CRYPTO_CIPHER_SNOW3G_UEA2
-			  * and KASUMI @ RTE_CRYPTO_CIPHER_KASUMI_F8,
+			  * For SNOW 3G @ RTE_CRYPTO_CIPHER_SNOW3G_UEA2,
+			  * KASUMI @ RTE_CRYPTO_CIPHER_KASUMI_F8
+			  * and ZUC @ RTE_CRYPTO_CIPHER_ZUC_EEA3,
 			  * this field should be in bits.
 			  */
 
@@ -413,8 +436,9 @@ struct rte_crypto_sym_op {
 			  * field should be set to 0.
 			  *
 			  * @note
-			  * For Snow3G @ RTE_CRYPTO_AUTH_SNOW3G_UEA2
-			  * and KASUMI @ RTE_CRYPTO_CIPHER_KASUMI_F8,
+			  * For SNOW 3G @ RTE_CRYPTO_AUTH_SNOW3G_UEA2,
+			  * KASUMI @ RTE_CRYPTO_CIPHER_KASUMI_F8
+			  * and ZUC @ RTE_CRYPTO_CIPHER_ZUC_EEA3,
 			  * this field should be in bits.
 			  */
 		} data; /**< Data offsets and length for ciphering */
@@ -423,8 +447,8 @@ struct rte_crypto_sym_op {
 			uint8_t *data;
 			/**< Initialisation Vector or Counter.
 			 *
-			 * - For block ciphers in CBC or F8 mode, or for Kasumi
-			 * in F8 mode, or for SNOW3G in UEA2 mode, this is the
+			 * - For block ciphers in CBC or F8 mode, or for KASUMI
+			 * in F8 mode, or for SNOW 3G in UEA2 mode, this is the
 			 * Initialisation Vector (IV) value.
 			 *
 			 * - For block ciphers in CTR mode, this is the counter.
@@ -451,8 +475,8 @@ struct rte_crypto_sym_op {
 			uint16_t length;
 			/**< Length of valid IV data.
 			 *
-			 * - For block ciphers in CBC or F8 mode, or for Kasumi
-			 * in F8 mode, or for SNOW3G in UEA2 mode, this is the
+			 * - For block ciphers in CBC or F8 mode, or for KASUMI
+			 * in F8 mode, or for SNOW 3G in UEA2 mode, this is the
 			 * length of the IV (which must be the same as the
 			 * block length of the cipher).
 			 *
@@ -482,12 +506,14 @@ struct rte_crypto_sym_op {
 			  * should be set instead.
 			  *
 			  * @note For AES-GMAC (@ref RTE_CRYPTO_AUTH_AES_GMAC)
-			  * mode of operation, this field specifies the start
-			  * of the AAD data in the source buffer.
+			  * mode of operation, this field is set to 0. aad data
+			  * pointer of rte_crypto_sym_op_data structure is
+			  * used instead
 			  *
 			  * @note
-			  * For Snow3G @ RTE_CRYPTO_AUTH_SNOW3G_UIA2
-			  * and KASUMI @ RTE_CRYPTO_AUTH_KASUMI_F9,
+			  * For SNOW 3G @ RTE_CRYPTO_AUTH_SNOW3G_UIA2,
+			  * KASUMI @ RTE_CRYPTO_AUTH_KASUMI_F9
+			  * and ZUC @ RTE_CRYPTO_AUTH_ZUC_EIA3,
 			  * this field should be in bits.
 			  */
 
@@ -502,12 +528,13 @@ struct rte_crypto_sym_op {
 			  *
 			  * @note
 			  * For AES-GMAC @ref RTE_CRYPTO_AUTH_AES_GMAC mode
-			  * of operation, this field specifies the length of
-			  * the AAD data in the source buffer.
+			  * of operation, this field is set to 0.
+			  * Auth.aad.length is used instead.
 			  *
 			  * @note
-			  * For Snow3G @ RTE_CRYPTO_AUTH_SNOW3G_UIA2
-			  * and KASUMI @ RTE_CRYPTO_AUTH_KASUMI_F9,
+			  * For SNOW 3G @ RTE_CRYPTO_AUTH_SNOW3G_UIA2,
+			  * KASUMI @ RTE_CRYPTO_AUTH_KASUMI_F9
+			  * and ZUC @ RTE_CRYPTO_AUTH_ZUC_EIA3,
 			  * this field should be in bits.
 			  */
 		} data; /**< Data offsets and length for authentication */
@@ -551,7 +578,7 @@ struct rte_crypto_sym_op {
 			uint8_t *data;
 			/**< Pointer to Additional Authenticated Data (AAD)
 			 * needed for authenticated cipher mechanisms (CCM and
-			 * GCM), and to the IV for SNOW3G authentication
+			 * GCM), and to the IV for SNOW 3G authentication
 			 * (@ref RTE_CRYPTO_AUTH_SNOW3G_UIA2). For other
 			 * authentication mechanisms this pointer is ignored.
 			 *
@@ -589,9 +616,7 @@ struct rte_crypto_sym_op {
 			 *
 			 * @note
 			 * For AES-GMAC (@ref RTE_CRYPTO_AUTH_AES_GMAC) mode of
-			 * operation, this field is not used and should be set
-			 * to 0. Instead the AAD data should be placed in the
-			 * source buffer.
+			 * operation, this field is used to pass plaintext.
 			 */
 			phys_addr_t phys_addr;	/**< physical address */
 			uint16_t length;	/**< Length of digest */
diff --git a/lib/librte_cryptodev/rte_cryptodev.c b/lib/librte_cryptodev/rte_cryptodev.c
index fc4123b6..127e8d0d 100644
--- a/lib/librte_cryptodev/rte_cryptodev.c
+++ b/lib/librte_cryptodev/rte_cryptodev.c
@@ -59,7 +59,6 @@
 #include <rte_atomic.h>
 #include <rte_branch_prediction.h>
 #include <rte_common.h>
-#include <rte_ring.h>
 #include <rte_mempool.h>
 #include <rte_malloc.h>
 #include <rte_mbuf.h>
@@ -319,7 +318,7 @@ rte_cryptodev_find_free_device_index(void)
 }
 
 struct rte_cryptodev *
-rte_cryptodev_pmd_allocate(const char *name, enum pmd_type type, int socket_id)
+rte_cryptodev_pmd_allocate(const char *name, int socket_id)
 {
 	struct rte_cryptodev *cryptodev;
 	uint8_t dev_id;
@@ -358,7 +357,6 @@ rte_cryptodev_pmd_allocate(const char *name, enum pmd_type type, int socket_id)
 		cryptodev->data->dev_started = 0;
 
 		cryptodev->attached = RTE_CRYPTODEV_ATTACHED;
-		cryptodev->pmd_type = type;
 
 		cryptodev_globals.nb_devs++;
 	}
@@ -366,23 +364,6 @@ rte_cryptodev_pmd_allocate(const char *name, enum pmd_type type, int socket_id)
 	return cryptodev;
 }
 
-static inline int
-rte_cryptodev_create_unique_device_name(char *name, size_t size,
-		struct rte_pci_device *pci_dev)
-{
-	int ret;
-
-	if ((name == NULL) || (pci_dev == NULL))
-		return -EINVAL;
-
-	ret = snprintf(name, size, "%d:%d.%d",
-			pci_dev->addr.bus, pci_dev->addr.devid,
-			pci_dev->addr.function);
-	if (ret < 0)
-		return ret;
-	return 0;
-}
-
 int
 rte_cryptodev_pmd_release_device(struct rte_cryptodev *cryptodev)
 {
@@ -407,7 +388,7 @@ rte_cryptodev_pmd_virtual_dev_init(const char *name, size_t dev_private_size,
 	struct rte_cryptodev *cryptodev;
 
 	/* allocate device structure */
-	cryptodev = rte_cryptodev_pmd_allocate(name, PMD_VDEV, socket_id);
+	cryptodev = rte_cryptodev_pmd_allocate(name, socket_id);
 	if (cryptodev == NULL)
 		return NULL;
 
@@ -430,9 +411,9 @@ rte_cryptodev_pmd_virtual_dev_init(const char *name, size_t dev_private_size,
 	return cryptodev;
 }
 
-static int
-rte_cryptodev_init(struct rte_pci_driver *pci_drv,
-		struct rte_pci_device *pci_dev)
+int
+rte_cryptodev_pci_probe(struct rte_pci_driver *pci_drv,
+			struct rte_pci_device *pci_dev)
 {
 	struct rte_cryptodev_driver *cryptodrv;
 	struct rte_cryptodev *cryptodev;
@@ -445,12 +426,10 @@ rte_cryptodev_init(struct rte_pci_driver *pci_drv,
 	if (cryptodrv == NULL)
 		return -ENODEV;
 
-	/* Create unique Crypto device name using PCI address */
-	rte_cryptodev_create_unique_device_name(cryptodev_name,
-			sizeof(cryptodev_name), pci_dev);
+	rte_eal_pci_device_name(&pci_dev->addr, cryptodev_name,
+			sizeof(cryptodev_name));
 
-	cryptodev = rte_cryptodev_pmd_allocate(cryptodev_name, PMD_PDEV,
-			rte_socket_id());
+	cryptodev = rte_cryptodev_pmd_allocate(cryptodev_name, rte_socket_id());
 	if (cryptodev == NULL)
 		return -ENOMEM;
 
@@ -479,7 +458,7 @@ rte_cryptodev_init(struct rte_pci_driver *pci_drv,
 		return 0;
 
 	CDEV_LOG_ERR("driver %s: crypto_dev_init(vendor_id=0x%x device_id=0x%x)"
-			" failed", pci_drv->name,
+			" failed", pci_drv->driver.name,
 			(unsigned) pci_dev->id.vendor_id,
 			(unsigned) pci_dev->id.device_id);
 
@@ -492,8 +471,8 @@ rte_cryptodev_init(struct rte_pci_driver *pci_drv,
 	return -ENXIO;
 }
 
-static int
-rte_cryptodev_uninit(struct rte_pci_device *pci_dev)
+int
+rte_cryptodev_pci_remove(struct rte_pci_device *pci_dev)
 {
 	const struct rte_cryptodev_driver *cryptodrv;
 	struct rte_cryptodev *cryptodev;
@@ -503,9 +482,8 @@ rte_cryptodev_uninit(struct rte_pci_device *pci_dev)
 	if (pci_dev == NULL)
 		return -EINVAL;
 
-	/* Create unique device name using PCI address */
-	rte_cryptodev_create_unique_device_name(cryptodev_name,
-			sizeof(cryptodev_name), pci_dev);
+	rte_eal_pci_device_name(&pci_dev->addr, cryptodev_name,
+			sizeof(cryptodev_name));
 
 	cryptodev = rte_cryptodev_pmd_get_named_dev(cryptodev_name);
 	if (cryptodev == NULL)
@@ -535,28 +513,6 @@ rte_cryptodev_uninit(struct rte_pci_device *pci_dev)
 	return 0;
 }
 
-int
-rte_cryptodev_pmd_driver_register(struct rte_cryptodev_driver *cryptodrv,
-		enum pmd_type type)
-{
-	/* Call crypto device initialization directly if device is virtual */
-	if (type == PMD_VDEV)
-		return rte_cryptodev_init((struct rte_pci_driver *)cryptodrv,
-				NULL);
-
-	/*
-	 * Register PCI driver for physical device intialisation during
-	 * PCI probing
-	 */
-	cryptodrv->pci_drv.devinit = rte_cryptodev_init;
-	cryptodrv->pci_drv.devuninit = rte_cryptodev_uninit;
-
-	rte_eal_pci_register(&cryptodrv->pci_drv);
-
-	return 0;
-}
-
-
 uint16_t
 rte_cryptodev_queue_pair_count(uint8_t dev_id)
 {
@@ -913,7 +869,7 @@ rte_cryptodev_info_get(uint8_t dev_id, struct rte_cryptodev_info *dev_info)
 
 	dev_info->pci_dev = dev->pci_dev;
 	if (dev->driver)
-		dev_info->driver_name = dev->driver->pci_drv.name;
+		dev_info->driver_name = dev->driver->pci_drv.driver.name;
 }
 
 
diff --git a/lib/librte_cryptodev/rte_cryptodev.h b/lib/librte_cryptodev/rte_cryptodev.h
index affbdecc..8f63e8f6 100644
--- a/lib/librte_cryptodev/rte_cryptodev.h
+++ b/lib/librte_cryptodev/rte_cryptodev.h
@@ -48,18 +48,23 @@ extern "C" {
 #include "rte_kvargs.h"
 #include "rte_crypto.h"
 #include "rte_dev.h"
+#include <rte_common.h>
 
-#define CRYPTODEV_NAME_NULL_PMD		cryptodev_null_pmd
+#define CRYPTODEV_NAME_NULL_PMD		crypto_null
 /**< Null crypto PMD device name */
-#define CRYPTODEV_NAME_AESNI_MB_PMD	cryptodev_aesni_mb_pmd
+#define CRYPTODEV_NAME_AESNI_MB_PMD	crypto_aesni_mb
 /**< AES-NI Multi buffer PMD device name */
-#define CRYPTODEV_NAME_AESNI_GCM_PMD	cryptodev_aesni_gcm_pmd
+#define CRYPTODEV_NAME_AESNI_GCM_PMD	crypto_aesni_gcm
 /**< AES-NI GCM PMD device name */
-#define CRYPTODEV_NAME_QAT_SYM_PMD	cryptodev_qat_sym_pmd
+#define CRYPTODEV_NAME_OPENSSL_PMD	crypto_openssl
+/**< Open SSL Crypto PMD device name */
+#define CRYPTODEV_NAME_QAT_SYM_PMD	crypto_qat
 /**< Intel QAT Symmetric Crypto PMD device name */
-#define CRYPTODEV_NAME_SNOW3G_PMD	cryptodev_snow3g_pmd
+#define CRYPTODEV_NAME_SNOW3G_PMD	crypto_snow3g
 /**< SNOW 3G PMD device name */
-#define CRYPTODEV_NAME_KASUMI_PMD	cryptodev_kasumi_pmd
+#define CRYPTODEV_NAME_KASUMI_PMD	crypto_kasumi
+/**< KASUMI PMD device name */
+#define CRYPTODEV_NAME_ZUC_PMD		crypto_zuc
 /**< KASUMI PMD device name */
 
 /** Crypto device type */
@@ -70,32 +75,38 @@ enum rte_cryptodev_type {
 	RTE_CRYPTODEV_QAT_SYM_PMD,	/**< QAT PMD Symmetric Crypto */
 	RTE_CRYPTODEV_SNOW3G_PMD,	/**< SNOW 3G PMD */
 	RTE_CRYPTODEV_KASUMI_PMD,	/**< KASUMI PMD */
+	RTE_CRYPTODEV_ZUC_PMD,		/**< ZUC PMD */
+	RTE_CRYPTODEV_OPENSSL_PMD,    /**<  OpenSSL PMD */
 };
 
 extern const char **rte_cyptodev_names;
 
 /* Logging Macros */
 
-#define CDEV_LOG_ERR(fmt, args...)					\
-		RTE_LOG(ERR, CRYPTODEV, "%s() line %u: " fmt "\n",	\
-				__func__, __LINE__, ## args)
+#define CDEV_LOG_ERR(...) \
+	RTE_LOG(ERR, CRYPTODEV, \
+		RTE_FMT("%s() line %u: " RTE_FMT_HEAD(__VA_ARGS__,) "\n", \
+			__func__, __LINE__, RTE_FMT_TAIL(__VA_ARGS__,)))
 
-#define CDEV_PMD_LOG_ERR(dev, fmt, args...)				\
-		RTE_LOG(ERR, CRYPTODEV, "[%s] %s() line %u: " fmt "\n", \
-				dev, __func__, __LINE__, ## args)
+#define CDEV_PMD_LOG_ERR(dev, ...) \
+	RTE_LOG(ERR, CRYPTODEV, \
+		RTE_FMT("[%s] %s() line %u: " RTE_FMT_HEAD(__VA_ARGS__,) "\n", \
+			dev, __func__, __LINE__, RTE_FMT_TAIL(__VA_ARGS__,)))
 
 #ifdef RTE_LIBRTE_CRYPTODEV_DEBUG
-#define CDEV_LOG_DEBUG(fmt, args...)					\
-		RTE_LOG(DEBUG, CRYPTODEV, "%s() line %u: " fmt "\n",	\
-				__func__, __LINE__, ## args)		\
+#define CDEV_LOG_DEBUG(...) \
+	RTE_LOG(DEBUG, CRYPTODEV, \
+		RTE_FMT("%s() line %u: " RTE_FMT_HEAD(__VA_ARGS__,) "\n", \
+			__func__, __LINE__, RTE_FMT_TAIL(__VA_ARGS__,)))
 
-#define CDEV_PMD_TRACE(fmt, args...)					\
-		RTE_LOG(DEBUG, CRYPTODEV, "[%s] %s: " fmt "\n",		\
-				dev, __func__, ## args)
+#define CDEV_PMD_TRACE(...) \
+	RTE_LOG(DEBUG, CRYPTODEV, \
+		RTE_FMT("[%s] %s: " RTE_FMT_HEAD(__VA_ARGS__,) "\n", \
+			dev, __func__, RTE_FMT_TAIL(__VA_ARGS__,)))
 
 #else
-#define CDEV_LOG_DEBUG(fmt, args...)
-#define CDEV_PMD_TRACE(fmt, args...)
+#define CDEV_LOG_DEBUG(...) (void)0
+#define CDEV_PMD_TRACE(...) (void)0
 #endif
 
 /**
@@ -104,6 +115,7 @@ extern const char **rte_cyptodev_names;
 struct rte_cryptodev_symmetric_capability {
 	enum rte_crypto_sym_xform_type xform_type;
 	/**< Transform type : Authentication / Cipher */
+	RTE_STD_C11
 	union {
 		struct {
 			enum rte_crypto_auth_algorithm algo;
@@ -177,6 +189,7 @@ struct rte_cryptodev_capabilities {
 	enum rte_crypto_op_type op;
 	/**< Operation type */
 
+	RTE_STD_C11
 	union {
 		struct rte_cryptodev_symmetric_capability sym;
 		/**< Symmetric operation capability parameters */
@@ -613,12 +626,11 @@ struct rte_cryptodev {
 
 	enum rte_cryptodev_type dev_type;
 	/**< Crypto device type */
-	enum pmd_type pmd_type;
-	/**< PMD type - PDEV / VDEV */
 
 	struct rte_cryptodev_cb_list link_intr_cbs;
 	/**< User application callback for interrupts if present */
 
+	__extension__
 	uint8_t attached : 1;
 	/**< Flag indicating the device is attached */
 } __rte_cache_aligned;
@@ -642,6 +654,7 @@ struct rte_cryptodev_data {
 	char name[RTE_CRYPTODEV_NAME_MAX_LEN];
 	/**< Unique identifier name */
 
+	__extension__
 	uint8_t dev_started : 1;
 	/**< Device state: STARTED(1)/STOPPED(0) */
 
@@ -749,6 +762,7 @@ rte_cryptodev_enqueue_burst(uint8_t dev_id, uint16_t qp_id,
 
 /** Cryptodev symmetric crypto session */
 struct rte_cryptodev_sym_session {
+	RTE_STD_C11
 	struct {
 		uint8_t dev_id;
 		/**< Device Id */
@@ -759,7 +773,7 @@ struct rte_cryptodev_sym_session {
 	} __rte_aligned(8);
 	/**< Public symmetric session details */
 
-	char _private[0];
+	__extension__ char _private[0];
 	/**< Private session material */
 };
 
diff --git a/lib/librte_cryptodev/rte_cryptodev_pmd.h b/lib/librte_cryptodev/rte_cryptodev_pmd.h
index 7d049ea3..abfe2dc1 100644
--- a/lib/librte_cryptodev/rte_cryptodev_pmd.h
+++ b/lib/librte_cryptodev/rte_cryptodev_pmd.h
@@ -52,6 +52,7 @@ extern "C" {
 #include <rte_mbuf.h>
 #include <rte_mempool.h>
 #include <rte_log.h>
+#include <rte_common.h>
 
 #include "rte_crypto.h"
 #include "rte_cryptodev.h"
@@ -61,17 +62,18 @@ extern "C" {
 #define RTE_PMD_DEBUG_TRACE(...) \
 	rte_pmd_debug_trace(__func__, __VA_ARGS__)
 #else
-#define RTE_PMD_DEBUG_TRACE(fmt, args...)
+#define RTE_PMD_DEBUG_TRACE(...)
 #endif
 
 struct rte_cryptodev_session {
+	RTE_STD_C11
 	struct {
 		uint8_t dev_id;
 		enum rte_cryptodev_type type;
 		struct rte_mempool *mp;
 	} __rte_aligned(8);
 
-	char _private[0];
+	__extension__ char _private[0];
 };
 
 struct rte_cryptodev_driver;
@@ -454,13 +456,12 @@ struct rte_cryptodev_ops {
  * to that slot for the driver to use.
  *
  * @param	name		Unique identifier name for each device
- * @param	type		Device type of this Crypto device
  * @param	socket_id	Socket to allocate resources on.
  * @return
  *   - Slot in the rte_dev_devices array for a new device;
  */
 struct rte_cryptodev *
-rte_cryptodev_pmd_allocate(const char *name, enum pmd_type type, int socket_id);
+rte_cryptodev_pmd_allocate(const char *name, int socket_id);
 
 /**
  * Creates a new virtual crypto device and returns the pointer
@@ -492,36 +493,6 @@ rte_cryptodev_pmd_virtual_dev_init(const char *name, size_t dev_private_size,
 extern int
 rte_cryptodev_pmd_release_device(struct rte_cryptodev *cryptodev);
 
-
-/**
- * Register a Crypto [Poll Mode] driver.
- *
- * Function invoked by the initialization function of a Crypto driver
- * to simultaneously register itself as Crypto Poll Mode Driver and to either:
- *
- *	a - register itself as PCI driver if the crypto device is a physical
- *		device, by invoking the rte_eal_pci_register() function to
- *		register the *pci_drv* structure embedded in the *crypto_drv*
- *		structure, after having stored the address of the
- *		rte_cryptodev_init() function in the *devinit* field of the
- *		*pci_drv* structure.
- *
- *		During the PCI probing phase, the rte_cryptodev_init()
- *		function is invoked for each PCI [device] matching the
- *		embedded PCI identifiers provided by the driver.
- *
- *	b, complete the initialization sequence if the device is a virtual
- *		device by calling the rte_cryptodev_init() directly passing a
- *		NULL parameter for the rte_pci_device structure.
- *
- *   @param crypto_drv	crypto_driver structure associated with the crypto
- *					driver.
- *   @param type		pmd type
- */
-extern int
-rte_cryptodev_pmd_driver_register(struct rte_cryptodev_driver *crypto_drv,
-		enum pmd_type type);
-
 /**
  * Executes all the user application registered callbacks for the specific
  * device.
@@ -535,6 +506,18 @@ rte_cryptodev_pmd_driver_register(struct rte_cryptodev_driver *crypto_drv,
 void rte_cryptodev_pmd_callback_process(struct rte_cryptodev *dev,
 				enum rte_cryptodev_event_type event);
 
+/**
+ * Wrapper for use by pci drivers as a .probe function to attach to a crypto
+ * interface.
+ */
+int rte_cryptodev_pci_probe(struct rte_pci_driver *pci_drv,
+			    struct rte_pci_device *pci_dev);
+
+/**
+ * Wrapper for use by pci drivers as a .remove function to detach a crypto
+ * interface.
+ */
+int rte_cryptodev_pci_remove(struct rte_pci_device *pci_dev);
 
 #ifdef __cplusplus
 }
diff --git a/lib/librte_cryptodev/rte_cryptodev_version.map b/lib/librte_cryptodev/rte_cryptodev_version.map
index a08fd202..9dde0e72 100644
--- a/lib/librte_cryptodev/rte_cryptodev_version.map
+++ b/lib/librte_cryptodev/rte_cryptodev_version.map
@@ -14,7 +14,6 @@ DPDK_16.04 {
 	rte_cryptodev_info_get;
 	rte_cryptodev_pmd_allocate;
 	rte_cryptodev_pmd_callback_process;
-	rte_cryptodev_pmd_driver_register;
 	rte_cryptodev_pmd_release_device;
 	rte_cryptodev_pmd_virtual_dev_init;
 	rte_cryptodev_sym_session_create;
@@ -39,3 +38,11 @@ DPDK_16.07 {
 	rte_cryptodev_parse_vdev_init_params;
 
 } DPDK_16.04;
+
+DPDK_16.11 {
+	global:
+
+	rte_cryptodev_pci_probe;
+	rte_cryptodev_pci_remove;
+
+} DPDK_16.07;
diff --git a/lib/librte_eal/bsdapp/eal/Makefile b/lib/librte_eal/bsdapp/eal/Makefile
index 988cbbce..a15b762b 100644
--- a/lib/librte_eal/bsdapp/eal/Makefile
+++ b/lib/librte_eal/bsdapp/eal/Makefile
@@ -48,14 +48,13 @@ LDLIBS += -lgcc_s
 
 EXPORT_MAP := rte_eal_version.map
 
-LIBABIVER := 2
+LIBABIVER := 3
 
-# specific to linuxapp exec-env
+# specific to bsdapp exec-env
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) := eal.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_memory.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_hugepage_info.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_thread.c
-SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_log.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_pci.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_debug.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_lcore.c
@@ -69,6 +68,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_timer.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_memzone.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_log.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_launch.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_vdev.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_pci.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_pci_uio.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_memory.c
diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c
index a0c8f8c8..35e3117a 100644
--- a/lib/librte_eal/bsdapp/eal/eal.c
+++ b/lib/librte_eal/bsdapp/eal/eal.c
@@ -496,14 +496,14 @@ rte_eal_init(int argc, char **argv)
 	char cpuset[RTE_CPU_AFFINITY_STR_LEN];
 	char thread_name[RTE_MAX_THREAD_NAME_LEN];
 
+	/* checks if the machine is adequate */
+	rte_cpu_check_supported();
+
 	if (!rte_atomic32_test_and_set(&run_once))
 		return -1;
 
 	thread_id = pthread_self();
 
-	if (rte_eal_log_early_init() < 0)
-		rte_panic("Cannot init early logs\n");
-
 	eal_log_level_parse(argc, argv);
 
 	/* set log level as early as possible */
@@ -552,9 +552,6 @@ rte_eal_init(int argc, char **argv)
 	if (rte_eal_tailqs_init() < 0)
 		rte_panic("Cannot init tail queues for objects\n");
 
-/*	if (rte_eal_log_init(argv[0], internal_config.syslog_facility) < 0)
-		rte_panic("Cannot init logs\n");*/
-
 	if (rte_eal_alarm_init() < 0)
 		rte_panic("Cannot init interrupt-handling thread\n");
 
diff --git a/lib/librte_eal/bsdapp/eal/eal_pci.c b/lib/librte_eal/bsdapp/eal/eal_pci.c
index 374b68f2..8b3ed881 100644
--- a/lib/librte_eal/bsdapp/eal/eal_pci.c
+++ b/lib/librte_eal/bsdapp/eal/eal_pci.c
@@ -287,7 +287,7 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf)
 	dev->max_vfs = 0;
 
 	/* FreeBSD has no NUMA support (yet) */
-	dev->numa_node = 0;
+	dev->device.numa_node = 0;
 
 	/* FreeBSD has only one pass through driver */
 	dev->kdrv = RTE_KDRV_NIC_UIO;
@@ -406,6 +406,55 @@ error:
 	return -1;
 }
 
+int
+pci_update_device(const struct rte_pci_addr *addr)
+{
+	int fd;
+	struct pci_conf matches[2];
+	struct pci_match_conf match = {
+		.pc_sel = {
+			.pc_domain = addr->domain,
+			.pc_bus = addr->bus,
+			.pc_dev = addr->devid,
+			.pc_func = addr->function,
+		},
+	};
+	struct pci_conf_io conf_io = {
+		.pat_buf_len = 0,
+		.num_patterns = 1,
+		.patterns = &match,
+		.match_buf_len = sizeof(matches),
+		.matches = &matches[0],
+	};
+
+	fd = open("/dev/pci", O_RDONLY);
+	if (fd < 0) {
+		RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__);
+		goto error;
+	}
+
+	if (ioctl(fd, PCIOCGETCONF, &conf_io) < 0) {
+		RTE_LOG(ERR, EAL, "%s(): error with ioctl on /dev/pci: %s\n",
+				__func__, strerror(errno));
+		goto error;
+	}
+
+	if (conf_io.num_matches != 1)
+		goto error;
+
+	if (pci_scan_one(fd, &matches[0]) < 0)
+		goto error;
+
+	close(fd);
+
+	return 0;
+
+error:
+	if (fd >= 0)
+		close(fd);
+	return -1;
+}
+
 /* Read PCI config space. */
 int rte_eal_pci_read_config(const struct rte_pci_device *dev,
 			    void *buf, size_t len, off_t offset)
@@ -623,9 +672,6 @@ rte_eal_pci_ioport_unmap(struct rte_pci_ioport *p)
 int
 rte_eal_pci_init(void)
 {
-	TAILQ_INIT(&pci_driver_list);
-	TAILQ_INIT(&pci_device_list);
-
 	/* for debug purposes, PCI can be disabled */
 	if (internal_config.no_pci)
 		return 0;
diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
index a335e04b..2f81f7c0 100644
--- a/lib/librte_eal/bsdapp/eal/rte_eal_version.map
+++ b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
@@ -162,3 +162,15 @@ DPDK_16.07 {
 	rte_thread_setname;
 
 } DPDK_16.04;
+
+DPDK_16.11 {
+	global:
+
+	rte_delay_us_block;
+	rte_delay_us_callback_register;
+	rte_eal_dev_attach;
+	rte_eal_dev_detach;
+	rte_eal_vdrv_register;
+	rte_eal_vdrv_unregister;
+
+} DPDK_16.07;
diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
index f5ea0ee8..dfd64aa5 100644
--- a/lib/librte_eal/common/Makefile
+++ b/lib/librte_eal/common/Makefile
@@ -34,11 +34,11 @@ include $(RTE_SDK)/mk/rte.vars.mk
 INC := rte_branch_prediction.h rte_common.h
 INC += rte_debug.h rte_eal.h rte_errno.h rte_launch.h rte_lcore.h
 INC += rte_log.h rte_memory.h rte_memzone.h rte_pci.h
-INC += rte_pci_dev_ids.h rte_per_lcore.h rte_random.h
+INC += rte_per_lcore.h rte_random.h
 INC += rte_tailq.h rte_interrupts.h rte_alarm.h
 INC += rte_string_fns.h rte_version.h
 INC += rte_eal_memconfig.h rte_malloc_heap.h
-INC += rte_hexdump.h rte_devargs.h rte_dev.h
+INC += rte_hexdump.h rte_devargs.h rte_dev.h rte_vdev.h
 INC += rte_pci_dev_feature_defs.h rte_pci_dev_features.h
 INC += rte_malloc.h rte_keepalive.h rte_time.h
 
diff --git a/lib/librte_eal/common/eal_common_cpuflags.c b/lib/librte_eal/common/eal_common_cpuflags.c
index ecb12409..b5f76f7f 100644
--- a/lib/librte_eal/common/eal_common_cpuflags.c
+++ b/lib/librte_eal/common/eal_common_cpuflags.c
@@ -39,14 +39,8 @@
 /**
  * Checks if the machine is adequate for running the binary. If it is not, the
  * program exits with status 1.
- * The function attribute forces this function to be called before main(). But
- * with ICC, the check is generated by the compiler.
  */
-#ifndef __INTEL_COMPILER
-void __attribute__ ((__constructor__))
-#else
 void
-#endif
 rte_cpu_check_supported(void)
 {
 	/* This is generated at compile-time by the build system */
diff --git a/lib/librte_eal/common/eal_common_dev.c b/lib/librte_eal/common/eal_common_dev.c
index a8a4146c..4f3b4934 100644
--- a/lib/librte_eal/common/eal_common_dev.c
+++ b/lib/librte_eal/common/eal_common_dev.c
@@ -48,6 +48,9 @@
 /** Global list of device drivers. */
 static struct rte_driver_list dev_driver_list =
 	TAILQ_HEAD_INITIALIZER(dev_driver_list);
+/** Global list of device drivers. */
+static struct rte_device_list dev_device_list =
+	TAILQ_HEAD_INITIALIZER(dev_device_list);
 
 /* register a driver */
 void
@@ -63,42 +66,25 @@ rte_eal_driver_unregister(struct rte_driver *driver)
 	TAILQ_REMOVE(&dev_driver_list, driver, next);
 }
 
-int
-rte_eal_vdev_init(const char *name, const char *args)
+void rte_eal_device_insert(struct rte_device *dev)
 {
-	struct rte_driver *driver;
-
-	if (name == NULL)
-		return -EINVAL;
-
-	TAILQ_FOREACH(driver, &dev_driver_list, next) {
-		if (driver->type != PMD_VDEV)
-			continue;
-
-		/*
-		 * search a driver prefix in virtual device name.
-		 * For example, if the driver is pcap PMD, driver->name
-		 * will be "eth_pcap", but "name" will be "eth_pcapN".
-		 * So use strncmp to compare.
-		 */
-		if (!strncmp(driver->name, name, strlen(driver->name)))
-			return driver->init(name, args);
-	}
+	TAILQ_INSERT_TAIL(&dev_device_list, dev, next);
+}
 
-	RTE_LOG(ERR, EAL, "no driver found for %s\n", name);
-	return -EINVAL;
+void rte_eal_device_remove(struct rte_device *dev)
+{
+	TAILQ_REMOVE(&dev_device_list, dev, next);
 }
 
 int
 rte_eal_dev_init(void)
 {
 	struct rte_devargs *devargs;
-	struct rte_driver *driver;
 
 	/*
 	 * Note that the dev_driver_list is populated here
 	 * from calls made to rte_eal_driver_register from constructor functions
-	 * embedded into PMD modules via the PMD_REGISTER_DRIVER macro
+	 * embedded into PMD modules via the RTE_PMD_REGISTER_VDEV macro
 	 */
 
 	/* call the init function for each virtual device */
@@ -115,38 +101,53 @@ rte_eal_dev_init(void)
 		}
 	}
 
-	/* Once the vdevs are initalized, start calling all the pdev drivers */
-	TAILQ_FOREACH(driver, &dev_driver_list, next) {
-		if (driver->type != PMD_PDEV)
-			continue;
-		/* PDEV drivers don't get passed any parameters */
-		driver->init(NULL, NULL);
-	}
 	return 0;
 }
 
-int
-rte_eal_vdev_uninit(const char *name)
+int rte_eal_dev_attach(const char *name, const char *devargs)
 {
-	struct rte_driver *driver;
+	struct rte_pci_addr addr;
 
-	if (name == NULL)
+	if (name == NULL || devargs == NULL) {
+		RTE_LOG(ERR, EAL, "Invalid device or arguments provided\n");
 		return -EINVAL;
+	}
 
-	TAILQ_FOREACH(driver, &dev_driver_list, next) {
-		if (driver->type != PMD_VDEV)
-			continue;
+	if (eal_parse_pci_DomBDF(name, &addr) == 0) {
+		if (rte_eal_pci_probe_one(&addr) < 0)
+			goto err;
+
+	} else {
+		if (rte_eal_vdev_init(name, devargs))
+			goto err;
+	}
+
+	return 0;
+
+err:
+	RTE_LOG(ERR, EAL, "Driver cannot attach the device (%s)\n", name);
+	return -EINVAL;
+}
+
+int rte_eal_dev_detach(const char *name)
+{
+	struct rte_pci_addr addr;
 
-		/*
-		 * search a driver prefix in virtual device name.
-		 * For example, if the driver is pcap PMD, driver->name
-		 * will be "eth_pcap", but "name" will be "eth_pcapN".
-		 * So use strncmp to compare.
-		 */
-		if (!strncmp(driver->name, name, strlen(driver->name)))
-			return driver->uninit(name);
+	if (name == NULL) {
+		RTE_LOG(ERR, EAL, "Invalid device provided.\n");
+		return -EINVAL;
 	}
 
-	RTE_LOG(ERR, EAL, "no driver found for %s\n", name);
+	if (eal_parse_pci_DomBDF(name, &addr) == 0) {
+		if (rte_eal_pci_detach(&addr) < 0)
+			goto err;
+	} else {
+		if (rte_eal_vdev_uninit(name))
+			goto err;
+	}
+	return 0;
+
+err:
+	RTE_LOG(ERR, EAL, "Driver cannot detach the device (%s)\n", name);
 	return -EINVAL;
 }
diff --git a/lib/librte_eal/common/eal_common_log.c b/lib/librte_eal/common/eal_common_log.c
index 7916c781..e45d3269 100644
--- a/lib/librte_eal/common/eal_common_log.c
+++ b/lib/librte_eal/common/eal_common_log.c
@@ -48,11 +48,12 @@ struct rte_logs rte_logs = {
 	.file = NULL,
 };
 
+/* Stream to use for logging if rte_logs.file is NULL */
 static FILE *default_log_stream;
 
 /**
  * This global structure stores some informations about the message
- * that is currently beeing processed by one lcore
+ * that is currently being processed by one lcore
  */
 struct log_cur_msg {
 	uint32_t loglevel; /**< log level - see rte_log.h */
@@ -64,27 +65,11 @@ static RTE_DEFINE_PER_LCORE(struct log_cur_msg, log_cur_msg);
 
 /* default logs */
 
-int
-rte_log_add_in_history(const char *buf __rte_unused, size_t size __rte_unused)
-{
-	return 0;
-}
-
-void
-rte_log_set_history(int enable)
-{
-	if (enable)
-		RTE_LOG(WARNING, EAL, "The log history is deprecated.\n");
-}
-
 /* Change the stream that will be used by logging system */
 int
 rte_openlog_stream(FILE *f)
 {
-	if (f == NULL)
-		rte_logs.file = default_log_stream;
-	else
-		rte_logs.file = f;
+	rte_logs.file = f;
 	return 0;
 }
 
@@ -131,12 +116,6 @@ int rte_log_cur_msg_logtype(void)
 	return RTE_PER_LCORE(log_cur_msg).logtype;
 }
 
-/* Dump log history to file */
-void
-rte_log_dump_history(FILE *out __rte_unused)
-{
-}
-
 /*
  * Generates a log message The message will be sent in the stream
  * defined by the previous call to rte_openlog_stream().
@@ -146,6 +125,19 @@ rte_vlog(uint32_t level, uint32_t logtype, const char *format, va_list ap)
 {
 	int ret;
 	FILE *f = rte_logs.file;
+	if (f == NULL) {
+		f = default_log_stream;
+		if (f == NULL) {
+			/*
+			 * Grab the current value of stderr here, rather than
+			 * just initializing default_log_stream to stderr. This
+			 * ensures that we will always use the current value
+			 * of stderr, even if the application closes and
+			 * reopens it.
+			 */
+			f = stderr;
+		}
+	}
 
 	if ((level > rte_logs.level) || !(logtype & rte_logs.type))
 		return 0;
@@ -177,17 +169,14 @@ rte_log(uint32_t level, uint32_t logtype, const char *format, ...)
 }
 
 /*
- * called by environment-specific log init function
+ * Called by environment-specific initialization functions.
  */
-int
-rte_eal_common_log_init(FILE *default_log)
+void
+eal_log_set_default(FILE *default_log)
 {
 	default_log_stream = default_log;
-	rte_openlog_stream(default_log);
 
 #if RTE_LOG_LEVEL >= RTE_LOG_DEBUG
 	RTE_LOG(NOTICE, EAL, "Debug logs available - lower performance\n");
 #endif
-
-	return 0;
 }
diff --git a/lib/librte_eal/common/eal_common_memzone.c b/lib/librte_eal/common/eal_common_memzone.c
index 1bd0a33d..64f4e0ad 100644
--- a/lib/librte_eal/common/eal_common_memzone.c
+++ b/lib/librte_eal/common/eal_common_memzone.c
@@ -337,19 +337,7 @@ rte_memzone_free(const struct rte_memzone *mz)
 	idx = ((uintptr_t)mz - (uintptr_t)mcfg->memzone);
 	idx = idx / sizeof(struct rte_memzone);
 
-#ifdef RTE_LIBRTE_IVSHMEM
-	/*
-	 * If ioremap_addr is set, it's an IVSHMEM memzone and we cannot
-	 * free it.
-	 */
-	if (mcfg->memzone[idx].ioremap_addr != 0) {
-		rte_rwlock_write_unlock(&mcfg->mlock);
-		return -EINVAL;
-	}
-#endif
-
 	addr = mcfg->memzone[idx].addr;
-
 	if (addr == NULL)
 		ret = -EINVAL;
 	else if (mcfg->memzone_cnt == 0) {
diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
index 1a1bab36..6ca8af17 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -1021,7 +1021,7 @@ eal_common_usage(void)
 	       "                      [NOTE: PCI whitelist cannot be used with -b option]\n"
 	       "  --"OPT_VDEV"              Add a virtual device.\n"
 	       "                      The argument format is <driver><id>[,key=val,...]\n"
-	       "                      (ex: --vdev=eth_pcap0,iface=eth2).\n"
+	       "                      (ex: --vdev=net_pcap0,iface=eth2).\n"
 	       "  -d LIB.so|DIR       Add a driver or driver directory\n"
 	       "                      (can be used multiple times)\n"
 	       "  --"OPT_VMWARE_TSC_MAP"    Use VMware TSC map instead of native RDTSC\n"
diff --git a/lib/librte_eal/common/eal_common_pci.c b/lib/librte_eal/common/eal_common_pci.c
index 096c65e4..6bff6752 100644
--- a/lib/librte_eal/common/eal_common_pci.c
+++ b/lib/librte_eal/common/eal_common_pci.c
@@ -82,8 +82,10 @@
 
 #include "eal_private.h"
 
-struct pci_driver_list pci_driver_list;
-struct pci_device_list pci_device_list;
+struct pci_driver_list pci_driver_list =
+	TAILQ_HEAD_INITIALIZER(pci_driver_list);
+struct pci_device_list pci_device_list =
+	TAILQ_HEAD_INITIALIZER(pci_device_list);
 
 #define SYSFS_PCI_DEVICES "/sys/bus/pci/devices"
 
@@ -151,7 +153,7 @@ pci_unmap_resource(void *requested_addr, size_t size)
 }
 
 /*
- * If vendor/device ID match, call the devinit() function of the
+ * If vendor/device ID match, call the probe() function of the
  * driver.
  */
 static int
@@ -183,17 +185,18 @@ rte_eal_pci_probe_one_driver(struct rte_pci_driver *dr, struct rte_pci_device *d
 
 		RTE_LOG(INFO, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n",
 				loc->domain, loc->bus, loc->devid, loc->function,
-				dev->numa_node);
+				dev->device.numa_node);
 
 		/* no initialization when blacklisted, return without error */
-		if (dev->devargs != NULL &&
-			dev->devargs->type == RTE_DEVTYPE_BLACKLISTED_PCI) {
+		if (dev->device.devargs != NULL &&
+			dev->device.devargs->type ==
+				RTE_DEVTYPE_BLACKLISTED_PCI) {
 			RTE_LOG(INFO, EAL, "  Device is blacklisted, not initializing\n");
 			return 1;
 		}
 
 		RTE_LOG(INFO, EAL, "  probe driver: %x:%x %s\n", dev->id.vendor_id,
-				dev->id.device_id, dr->name);
+				dev->id.device_id, dr->driver.name);
 
 		if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) {
 			/* map resources for devices that use igb_uio */
@@ -210,15 +213,19 @@ rte_eal_pci_probe_one_driver(struct rte_pci_driver *dr, struct rte_pci_device *d
 		/* reference driver structure */
 		dev->driver = dr;
 
-		/* call the driver devinit() function */
-		return dr->devinit(dr, dev);
+		/* call the driver probe() function */
+		ret = dr->probe(dr, dev);
+		if (ret)
+			dev->driver = NULL;
+
+		return ret;
 	}
 	/* return positive value if driver doesn't support this device */
 	return 1;
 }
 
 /*
- * If vendor/device ID match, call the devuninit() function of the
+ * If vendor/device ID match, call the remove() function of the
  * driver.
  */
 static int
@@ -250,12 +257,12 @@ rte_eal_pci_detach_dev(struct rte_pci_driver *dr,
 
 		RTE_LOG(DEBUG, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n",
 				loc->domain, loc->bus, loc->devid,
-				loc->function, dev->numa_node);
+				loc->function, dev->device.numa_node);
 
 		RTE_LOG(DEBUG, EAL, "  remove driver: %x:%x %s\n", dev->id.vendor_id,
-				dev->id.device_id, dr->name);
+				dev->id.device_id, dr->driver.name);
 
-		if (dr->devuninit && (dr->devuninit(dev) < 0))
+		if (dr->remove && (dr->remove(dev) < 0))
 			return -1;	/* negative value is an error */
 
 		/* clear driver structure */
@@ -273,7 +280,7 @@ rte_eal_pci_detach_dev(struct rte_pci_driver *dr,
 }
 
 /*
- * If vendor/device ID match, call the devinit() function of all
+ * If vendor/device ID match, call the probe() function of all
  * registered driver for the given device. Return -1 if initialization
  * failed, return 1 if no driver is found for this device.
  */
@@ -286,6 +293,10 @@ pci_probe_all_drivers(struct rte_pci_device *dev)
 	if (dev == NULL)
 		return -1;
 
+	/* Check if a driver is already loaded */
+	if (dev->driver != NULL)
+		return 0;
+
 	TAILQ_FOREACH(dr, &pci_driver_list, next) {
 		rc = rte_eal_pci_probe_one_driver(dr, dev);
 		if (rc < 0)
@@ -300,7 +311,7 @@ pci_probe_all_drivers(struct rte_pci_device *dev)
 }
 
 /*
- * If vendor/device ID match, call the devuninit() function of all
+ * If vendor/device ID match, call the remove() function of all
  * registered driver for the given device. Return -1 if initialization
  * failed, return 1 if no driver is found for this device.
  */
@@ -339,6 +350,12 @@ rte_eal_pci_probe_one(const struct rte_pci_addr *addr)
 	if (addr == NULL)
 		return -1;
 
+	/* update current pci device in global list, kernel bindings might have
+	 * changed since last time we looked at it.
+	 */
+	if (pci_update_device(addr) < 0)
+		goto err_return;
+
 	TAILQ_FOREACH(dev, &pci_device_list, next) {
 		if (rte_eal_compare_pci_addr(&dev->addr, addr))
 			continue;
@@ -351,9 +368,9 @@ rte_eal_pci_probe_one(const struct rte_pci_addr *addr)
 	return -1;
 
 err_return:
-	RTE_LOG(WARNING, EAL, "Requested device " PCI_PRI_FMT
-			" cannot be used\n", dev->addr.domain, dev->addr.bus,
-			dev->addr.devid, dev->addr.function);
+	RTE_LOG(WARNING, EAL,
+		"Requested device " PCI_PRI_FMT " cannot be used\n",
+		addr->domain, addr->bus, addr->devid, addr->function);
 	return -1;
 }
 
@@ -391,7 +408,7 @@ err_return:
 }
 
 /*
- * Scan the content of the PCI bus, and call the devinit() function for
+ * Scan the content of the PCI bus, and call the probe() function for
  * all registered drivers that have a matching entry in its id_table
  * for discovered devices.
  */
@@ -411,7 +428,7 @@ rte_eal_pci_probe(void)
 		/* set devargs in PCI structure */
 		devargs = pci_devargs_lookup(dev);
 		if (devargs != NULL)
-			dev->devargs = devargs;
+			dev->device.devargs = devargs;
 
 		/* probe all or only whitelisted devices */
 		if (probe_all)
@@ -464,11 +481,13 @@ void
 rte_eal_pci_register(struct rte_pci_driver *driver)
 {
 	TAILQ_INSERT_TAIL(&pci_driver_list, driver, next);
+	rte_eal_driver_register(&driver->driver);
 }
 
 /* unregister a driver */
 void
 rte_eal_pci_unregister(struct rte_pci_driver *driver)
 {
+	rte_eal_driver_unregister(&driver->driver);
 	TAILQ_REMOVE(&pci_driver_list, driver, next);
 }
diff --git a/lib/librte_eal/common/eal_common_timer.c b/lib/librte_eal/common/eal_common_timer.c
index c4227cd8..72656176 100644
--- a/lib/librte_eal/common/eal_common_timer.c
+++ b/lib/librte_eal/common/eal_common_timer.c
@@ -47,8 +47,11 @@
 /* The frequency of the RDTSC timer resolution */
 static uint64_t eal_tsc_resolution_hz;
 
+/* Pointer to user delay function */
+void (*rte_delay_us)(unsigned int) = NULL;
+
 void
-rte_delay_us(unsigned us)
+rte_delay_us_block(unsigned int us)
 {
 	const uint64_t start = rte_get_timer_cycles();
 	const uint64_t ticks = (uint64_t)us * rte_get_timer_hz() / 1E6;
@@ -84,3 +87,15 @@ set_tsc_freq(void)
 	RTE_LOG(DEBUG, EAL, "TSC frequency is ~%" PRIu64 " KHz\n", freq / 1000);
 	eal_tsc_resolution_hz = freq;
 }
+
+void rte_delay_us_callback_register(void (*userfunc)(unsigned int))
+{
+	rte_delay_us = userfunc;
+}
+
+static void __attribute__((constructor))
+rte_timer_init(void)
+{
+	/* set rte_delay_us_block as a delay function */
+	rte_delay_us_callback_register(rte_delay_us_block);
+}
diff --git a/lib/librte_eal/common/eal_common_vdev.c b/lib/librte_eal/common/eal_common_vdev.c
new file mode 100644
index 00000000..0ff2377d
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_vdev.c
@@ -0,0 +1,116 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 RehiveTech. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of RehiveTech nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <sys/queue.h>
+
+#include <rte_vdev.h>
+#include <rte_common.h>
+
+struct vdev_driver_list vdev_driver_list =
+	TAILQ_HEAD_INITIALIZER(vdev_driver_list);
+
+/* register a driver */
+void
+rte_eal_vdrv_register(struct rte_vdev_driver *driver)
+{
+	TAILQ_INSERT_TAIL(&vdev_driver_list, driver, next);
+	rte_eal_driver_register(&driver->driver);
+}
+
+/* unregister a driver */
+void
+rte_eal_vdrv_unregister(struct rte_vdev_driver *driver)
+{
+	rte_eal_driver_unregister(&driver->driver);
+	TAILQ_REMOVE(&vdev_driver_list, driver, next);
+}
+
+int
+rte_eal_vdev_init(const char *name, const char *args)
+{
+	struct rte_vdev_driver *driver;
+
+	if (name == NULL)
+		return -EINVAL;
+
+	TAILQ_FOREACH(driver, &vdev_driver_list, next) {
+		/*
+		 * search a driver prefix in virtual device name.
+		 * For example, if the driver is pcap PMD, driver->name
+		 * will be "net_pcap", but "name" will be "net_pcapN".
+		 * So use strncmp to compare.
+		 */
+		if (!strncmp(driver->driver.name, name,
+			    strlen(driver->driver.name)))
+			return driver->probe(name, args);
+	}
+
+	/* Give new names precedence over aliases. */
+	TAILQ_FOREACH(driver, &vdev_driver_list, next) {
+		if (driver->driver.alias &&
+		    !strncmp(driver->driver.alias, name,
+			    strlen(driver->driver.alias)))
+			return driver->probe(name, args);
+	}
+
+	RTE_LOG(ERR, EAL, "no driver found for %s\n", name);
+	return -EINVAL;
+}
+
+int
+rte_eal_vdev_uninit(const char *name)
+{
+	struct rte_vdev_driver *driver;
+
+	if (name == NULL)
+		return -EINVAL;
+
+	TAILQ_FOREACH(driver, &vdev_driver_list, next) {
+		/*
+		 * search a driver prefix in virtual device name.
+		 * For example, if the driver is pcap PMD, driver->name
+		 * will be "net_pcap", but "name" will be "net_pcapN".
+		 * So use strncmp to compare.
+		 */
+		if (!strncmp(driver->driver.name, name,
+			     strlen(driver->driver.name)))
+			return driver->remove(name);
+	}
+
+	RTE_LOG(ERR, EAL, "no driver found for %s\n", name);
+	return -EINVAL;
+}
diff --git a/lib/librte_eal/common/eal_filesystem.h b/lib/librte_eal/common/eal_filesystem.h
index fdb4a70b..8acbd996 100644
--- a/lib/librte_eal/common/eal_filesystem.h
+++ b/lib/librte_eal/common/eal_filesystem.h
@@ -97,17 +97,6 @@ eal_get_hugefile_path(char *buffer, size_t buflen, const char *hugedir, int f_id
 	return buffer;
 }
 
-#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
-static inline const char *
-eal_get_hugefile_temp_path(char *buffer, size_t buflen, const char *hugedir, int f_id)
-{
-	snprintf(buffer, buflen, TEMP_HUGEFILE_FMT, hugedir,
-			internal_config.hugefile_prefix, f_id);
-	buffer[buflen - 1] = '\0';
-	return buffer;
-}
-#endif
-
 /** define the default filename prefix for the %s values above */
 #define HUGEFILE_PREFIX_DEFAULT "rte"
 
diff --git a/lib/librte_eal/common/eal_hugepages.h b/lib/librte_eal/common/eal_hugepages.h
index 38edac03..68369f26 100644
--- a/lib/librte_eal/common/eal_hugepages.h
+++ b/lib/librte_eal/common/eal_hugepages.h
@@ -52,9 +52,6 @@ struct hugepage_file {
 	int socket_id;      /**< NUMA socket ID */
 	int file_id;        /**< the '%d' in HUGEFILE_FMT */
 	int memseg_id;      /**< the memory segment to which page belongs */
-#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
-	int repeated;		/**< number of times the page size is repeated */
-#endif
 	char filepath[MAX_HUGEPAGE_PATH]; /**< path to backing file on filesystem */
 };
 
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index 857dc3ea..9e7d8f6b 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -47,7 +47,9 @@
 int rte_eal_memzone_init(void);
 
 /**
- * Common log initialization function (private to eal).
+ * Common log initialization function (private to eal).  Determines
+ * where log data is written when no call to rte_openlog_stream is
+ * in effect.
  *
  * @param default_log
  *   The default log stream to be used.
@@ -55,7 +57,7 @@ int rte_eal_memzone_init(void);
  *   - 0 on success
  *   - Negative on error
  */
-int rte_eal_common_log_init(FILE *default_log);
+void eal_log_set_default(FILE *default_log);
 
 /**
  * Fill configuration with number of physical and logical processors
@@ -97,16 +99,6 @@ int rte_eal_memory_init(void);
 int rte_eal_timer_init(void);
 
 /**
- * Init early logs
- *
- * This function is private to EAL.
- *
- * @return
- *   0 on success, negative on error
- */
-int rte_eal_log_early_init(void);
-
-/**
  * Init the default log stream
  *
  * This function is private to EAL.
@@ -117,7 +109,7 @@ int rte_eal_log_early_init(void);
 int rte_eal_log_init(const char *id, int facility);
 
 /**
- * Init the default log stream
+ * Init the PCI infrastructure
  *
  * This function is private to EAL.
  *
@@ -126,30 +118,21 @@ int rte_eal_log_init(const char *id, int facility);
  */
 int rte_eal_pci_init(void);
 
-#ifdef RTE_LIBRTE_IVSHMEM
-/**
- * Init the memory from IVSHMEM devices
- *
- * This function is private to EAL.
- *
- * @return
- *  0 on success, negative on error
- */
-int rte_eal_ivshmem_init(void);
+struct rte_pci_driver;
+struct rte_pci_device;
 
 /**
- * Init objects in IVSHMEM devices
+ * Update a pci device object by asking the kernel for the latest information.
  *
  * This function is private to EAL.
  *
+ * @param addr
+ *	The PCI Bus-Device-Function address to look for
  * @return
- *  0 on success, negative on error
+ *   - 0 on success.
+ *   - negative on error.
  */
-int rte_eal_ivshmem_obj_init(void);
-#endif
-
-struct rte_pci_driver;
-struct rte_pci_device;
+int pci_update_device(const struct rte_pci_addr *addr);
 
 /**
  * Unbind kernel driver for this device
@@ -259,13 +242,6 @@ int rte_eal_intr_init(void);
 int rte_eal_alarm_init(void);
 
 /**
- * This function initialises any virtual devices
- *
- * This function is private to the EAL.
- */
-int rte_eal_dev_init(void);
-
-/**
  * Function is to check if the kernel module(like, vfio, vfio_iommu_type1,
  * etc.) loaded.
  *
diff --git a/lib/librte_eal/common/include/arch/arm/rte_byteorder.h b/lib/librte_eal/common/include/arch/arm/rte_byteorder.h
index 3f2dd1f2..1b312b30 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_byteorder.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_byteorder.h
@@ -41,6 +41,8 @@
 extern "C" {
 #endif
 
+#include <stdint.h>
+#include <rte_common.h>
 #include "generic/rte_byteorder.h"
 
 /* fix missing __builtin_bswap16 for gcc older then 4.8 */
diff --git a/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h b/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h
index 14f26120..867a9468 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h
@@ -45,6 +45,11 @@ extern "C" {
  * @return
  *   The time base for this lcore.
  */
+#ifndef RTE_ARM_EAL_RDTSC_USE_PMU
+/**
+ * This call is portable to any ARMv8 architecture, however, typically
+ * cntvct_el0 runs at <= 100MHz and it may be imprecise for some tasks.
+ */
 static inline uint64_t
 rte_rdtsc(void)
 {
@@ -53,6 +58,34 @@ rte_rdtsc(void)
 	asm volatile("mrs %0, cntvct_el0" : "=r" (tsc));
 	return tsc;
 }
+#else
+/**
+ * This is an alternative method to enable rte_rdtsc() with high resolution
+ * PMU cycles counter.The cycle counter runs at cpu frequency and this scheme
+ * uses ARMv8 PMU subsystem to get the cycle counter at userspace, However,
+ * access to PMU cycle counter from user space is not enabled by default in
+ * arm64 linux kernel.
+ * It is possible to enable cycle counter at user space access by configuring
+ * the PMU from the privileged mode (kernel space).
+ *
+ * asm volatile("msr pmintenset_el1, %0" : : "r" ((u64)(0 << 31)));
+ * asm volatile("msr pmcntenset_el0, %0" :: "r" BIT(31));
+ * asm volatile("msr pmuserenr_el0, %0" : : "r"(BIT(0) | BIT(2)));
+ * asm volatile("mrs %0, pmcr_el0" : "=r" (val));
+ * val |= (BIT(0) | BIT(2));
+ * isb();
+ * asm volatile("msr pmcr_el0, %0" : : "r" (val));
+ *
+ */
+static inline uint64_t
+rte_rdtsc(void)
+{
+	uint64_t tsc;
+
+	asm volatile("mrs %0, pmccntr_el0" : "=r"(tsc));
+	return tsc;
+}
+#endif
 
 static inline uint64_t
 rte_rdtsc_precise(void)
diff --git a/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h b/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h
index da6c233a..c3a26192 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h
@@ -148,7 +148,8 @@ rte_mov256(uint8_t *dst, const uint8_t *src)
 }
 
 #define rte_memcpy(dst, src, n)              \
-	({ (__builtin_constant_p(n)) ?       \
+	__extension__ ({                     \
+	(__builtin_constant_p(n)) ?          \
 	memcpy((dst), (src), (n)) :          \
 	rte_memcpy_func((dst), (src), (n)); })
 
diff --git a/lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h b/lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h
index 5aeed22d..43cde172 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h
@@ -37,6 +37,7 @@
 extern "C" {
 #endif
 
+#include <rte_common.h>
 #include "generic/rte_prefetch.h"
 
 static inline void rte_prefetch0(const volatile void *p)
diff --git a/lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h b/lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h
index 3ed46a46..0d077ea6 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h
@@ -37,6 +37,7 @@
 extern "C" {
 #endif
 
+#include <rte_common.h>
 #include "generic/rte_prefetch.h"
 
 static inline void rte_prefetch0(const volatile void *p)
diff --git a/lib/librte_eal/common/include/arch/arm/rte_vect.h b/lib/librte_eal/common/include/arch/arm/rte_vect.h
index a33c0544..b86c2cf5 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_vect.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_vect.h
@@ -33,6 +33,7 @@
 #ifndef _RTE_VECT_ARM_H_
 #define _RTE_VECT_ARM_H_
 
+#include <stdint.h>
 #include "arm_neon.h"
 
 #ifdef __cplusplus
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h b/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h
index 924e8940..fb4fccb4 100644
--- a/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h
@@ -46,6 +46,7 @@
 extern "C" {
 #endif
 
+#include <stdint.h>
 #include "generic/rte_atomic.h"
 
 /**
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_byteorder.h b/lib/librte_eal/common/include/arch/ppc_64/rte_byteorder.h
index 3c1734ed..544de3c2 100644
--- a/lib/librte_eal/common/include/arch/ppc_64/rte_byteorder.h
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_byteorder.h
@@ -42,6 +42,7 @@
 extern "C" {
 #endif
 
+#include <stdint.h>
 #include "generic/rte_byteorder.h"
 
 /*
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h b/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h
index 64beddf9..8fa6fc60 100644
--- a/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h
@@ -40,6 +40,7 @@ extern "C" {
 #include "generic/rte_cycles.h"
 
 #include <rte_byteorder.h>
+#include <rte_common.h>
 
 /**
  * Read the time base register.
@@ -52,6 +53,7 @@ rte_rdtsc(void)
 {
 	union {
 		uint64_t tsc_64;
+		RTE_STD_C11
 		struct {
 #if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
 			uint32_t hi_32;
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h b/lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h
index acf7aac2..ca9d1dc5 100644
--- a/lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h
@@ -95,7 +95,8 @@ rte_mov256(uint8_t *dst, const uint8_t *src)
 }
 
 #define rte_memcpy(dst, src, n)              \
-	({ (__builtin_constant_p(n)) ?       \
+	__extension__ ({                     \
+	(__builtin_constant_p(n)) ?          \
 	memcpy((dst), (src), (n)) :          \
 	rte_memcpy_func((dst), (src), (n)); })
 
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h b/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h
index 9a1995ea..fd2e53b9 100644
--- a/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h
@@ -37,6 +37,7 @@
 extern "C" {
 #endif
 
+#include <rte_common.h>
 #include "generic/rte_prefetch.h"
 
 static inline void rte_prefetch0(const volatile void *p)
diff --git a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.h b/lib/librte_eal/common/include/arch/ppc_64/rte_vect.h
index 3f67154b..05209e52 100644
--- a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.h
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_vect.h
@@ -1,8 +1,7 @@
-/*-
+/*
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *   All rights reserved.
+ *   Copyright (C) IBM Corporation 2016.
  *
  *   Redistribution and use in source and binary forms, with or without
  *   modification, are permitted provided that the following conditions
@@ -14,7 +13,7 @@
  *       notice, this list of conditions and the following disclaimer in
  *       the documentation and/or other materials provided with the
  *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
+ *     * Neither the name of IBM Corporation nor the names of its
  *       contributors may be used to endorse or promote products derived
  *       from this software without specific prior written permission.
  *
@@ -29,28 +28,33 @@
  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-#ifndef _VIRTIO_NET_CDEV_H
-#define _VIRTIO_NET_CDEV_H
+*/
 
-#include <stdint.h>
-#include <linux/vhost.h>
+#ifndef _RTE_VECT_PPC_64_H_
+#define _RTE_VECT_PPC_64_H_
 
-#include "vhost-net.h"
+#include <altivec.h>
 
-/*
- * Structure used to identify device context.
- */
-struct vhost_cuse_device_ctx {
-	pid_t	pid;	/* PID of process calling the IOCTL. */
-	int	vid;	/* Virtio-net device ID */
-};
+#ifdef __cplusplus
+extern "C" {
+#endif
 
-int
-cuse_set_mem_table(struct vhost_cuse_device_ctx ctx,
-	const struct vhost_memory *mem_regions_addr, uint32_t nregions);
+typedef vector signed int xmm_t;
 
-int
-cuse_set_backend(struct vhost_cuse_device_ctx ctx, struct vhost_vring_file *);
+#define	XMM_SIZE	(sizeof(xmm_t))
+#define	XMM_MASK	(XMM_SIZE - 1)
 
+typedef union rte_xmm {
+	xmm_t    x;
+	uint8_t  u8[XMM_SIZE / sizeof(uint8_t)];
+	uint16_t u16[XMM_SIZE / sizeof(uint16_t)];
+	uint32_t u32[XMM_SIZE / sizeof(uint32_t)];
+	uint64_t u64[XMM_SIZE / sizeof(uint64_t)];
+	double   pd[XMM_SIZE / sizeof(double)];
+} __attribute__((aligned(16))) rte_xmm_t;
+
+#ifdef __cplusplus
+}
 #endif
+
+#endif /* _RTE_VECT_PPC_64_H_ */
diff --git a/lib/librte_eal/common/include/arch/x86/rte_atomic.h b/lib/librte_eal/common/include/arch/x86/rte_atomic.h
index b20056b8..00b1cdf5 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_atomic.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_atomic.h
@@ -38,6 +38,8 @@
 extern "C" {
 #endif
 
+#include <stdint.h>
+#include <rte_common.h>
 #include <emmintrin.h>
 #include "generic/rte_atomic.h"
 
diff --git a/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h b/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h
index 400d8a96..2e04c759 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h
@@ -37,9 +37,17 @@
  * All rights reserved.
  */
 
+#ifndef _RTE_ATOMIC_X86_H_
+#error do not include this file directly, use <rte_atomic.h> instead
+#endif
+
 #ifndef _RTE_ATOMIC_I686_H_
 #define _RTE_ATOMIC_I686_H_
 
+#include <stdint.h>
+#include <rte_common.h>
+#include <rte_atomic.h>
+
 /*------------------------- 64 bit atomic operations -------------------------*/
 
 #ifndef RTE_FORCE_INTRINSICS
@@ -47,6 +55,7 @@ static inline int
 rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src)
 {
 	uint8_t res;
+	RTE_STD_C11
 	union {
 		struct {
 			uint32_t l32;
diff --git a/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h b/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h
index 4de66000..1a53a766 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h
@@ -37,9 +37,17 @@
  * All rights reserved.
  */
 
+#ifndef _RTE_ATOMIC_X86_H_
+#error do not include this file directly, use <rte_atomic.h> instead
+#endif
+
 #ifndef _RTE_ATOMIC_X86_64_H_
 #define _RTE_ATOMIC_X86_64_H_
 
+#include <stdint.h>
+#include <rte_common.h>
+#include <rte_atomic.h>
+
 /*------------------------- 64 bit atomic operations -------------------------*/
 
 #ifndef RTE_FORCE_INTRINSICS
diff --git a/lib/librte_eal/common/include/arch/x86/rte_byteorder.h b/lib/librte_eal/common/include/arch/x86/rte_byteorder.h
index ffdb6ef5..251f11b4 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_byteorder.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_byteorder.h
@@ -38,6 +38,8 @@
 extern "C" {
 #endif
 
+#include <stdint.h>
+#include <rte_common.h>
 #include "generic/rte_byteorder.h"
 
 #ifndef RTE_BYTE_ORDER
diff --git a/lib/librte_eal/common/include/arch/x86/rte_byteorder_32.h b/lib/librte_eal/common/include/arch/x86/rte_byteorder_32.h
index 51c306f8..14d64834 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_byteorder_32.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_byteorder_32.h
@@ -31,9 +31,16 @@
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#ifndef _RTE_BYTEORDER_X86_H_
+#error do not include this file directly, use <rte_byteorder.h> instead
+#endif
+
 #ifndef _RTE_BYTEORDER_I686_H_
 #define _RTE_BYTEORDER_I686_H_
 
+#include <stdint.h>
+#include <rte_byteorder.h>
+
 /*
  * An architecture-optimized byte swap for a 64-bit value.
  *
diff --git a/lib/librte_eal/common/include/arch/x86/rte_byteorder_64.h b/lib/librte_eal/common/include/arch/x86/rte_byteorder_64.h
index dda572bd..516ac052 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_byteorder_64.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_byteorder_64.h
@@ -31,9 +31,16 @@
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#ifndef _RTE_BYTEORDER_X86_H_
+#error do not include this file directly, use <rte_byteorder.h> instead
+#endif
+
 #ifndef _RTE_BYTEORDER_X86_64_H_
 #define _RTE_BYTEORDER_X86_64_H_
 
+#include <stdint.h>
+#include <rte_common.h>
+
 /*
  * An architecture-optimized byte swap for a 64-bit value.
  *
diff --git a/lib/librte_eal/common/include/arch/x86/rte_cycles.h b/lib/librte_eal/common/include/arch/x86/rte_cycles.h
index 6e3c7d89..5eb6ce96 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_cycles.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_cycles.h
@@ -75,12 +75,14 @@ extern "C" {
 extern int rte_cycles_vmware_tsc_map;
 #include <rte_branch_prediction.h>
 #endif
+#include <rte_common.h>
 
 static inline uint64_t
 rte_rdtsc(void)
 {
 	union {
 		uint64_t tsc_64;
+		RTE_STD_C11
 		struct {
 			uint32_t lo_32;
 			uint32_t hi_32;
diff --git a/lib/librte_eal/common/include/arch/x86/rte_memcpy.h b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h
index 413035e7..b3bfc235 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_memcpy.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h
@@ -594,7 +594,7 @@ rte_mov256(uint8_t *dst, const uint8_t *src)
  * - __m128i <xmm0> ~ <xmm8> must be pre-defined
  */
 #define MOVEUNALIGNED_LEFT47_IMM(dst, src, len, offset)                                                     \
-({                                                                                                          \
+__extension__ ({                                                                                            \
     int tmp;                                                                                                \
     while (len >= 128 + 16 - offset) {                                                                      \
         xmm0 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 0 * 16));                  \
@@ -655,7 +655,7 @@ rte_mov256(uint8_t *dst, const uint8_t *src)
  * - __m128i <xmm0> ~ <xmm8> used in MOVEUNALIGNED_LEFT47_IMM must be pre-defined
  */
 #define MOVEUNALIGNED_LEFT47(dst, src, len, offset)                   \
-({                                                                    \
+__extension__ ({                                                      \
     switch (offset) {                                                 \
     case 0x01: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x01); break;    \
     case 0x02: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x02); break;    \
diff --git a/lib/librte_eal/common/include/arch/x86/rte_prefetch.h b/lib/librte_eal/common/include/arch/x86/rte_prefetch.h
index 5dac47eb..f464398f 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_prefetch.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_prefetch.h
@@ -38,6 +38,7 @@
 extern "C" {
 #endif
 
+#include <rte_common.h>
 #include "generic/rte_prefetch.h"
 
 static inline void rte_prefetch0(const volatile void *p)
diff --git a/lib/librte_eal/common/include/arch/x86/rte_rtm.h b/lib/librte_eal/common/include/arch/x86/rte_rtm.h
index 0649f794..ab099952 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_rtm.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_rtm.h
@@ -20,6 +20,7 @@
 /* Official RTM intrinsics interface matching gcc/icc, but works
    on older gcc compatible compilers and binutils. */
 
+#include <rte_common.h>
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/lib/librte_eal/common/include/arch/x86/rte_vect.h b/lib/librte_eal/common/include/arch/x86/rte_vect.h
index b698797c..77f2e253 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_vect.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_vect.h
@@ -40,6 +40,8 @@
  * RTE SSE/AVX related header.
  */
 
+#include <stdint.h>
+
 #if (defined(__ICC) || (__GNUC__ == 4 &&  __GNUC_MINOR__ < 4))
 
 #ifdef __SSE__
@@ -106,7 +108,8 @@ typedef union rte_ymm {
 #endif /* __AVX__ */
 
 #ifdef RTE_ARCH_I686
-#define _mm_cvtsi128_si64(a) ({ \
+#define _mm_cvtsi128_si64(a)    \
+__extension__ ({                \
 	rte_xmm_t m;            \
 	m.x = (a);              \
 	(m.u64[0]);             \
@@ -117,7 +120,8 @@ typedef union rte_ymm {
  * Prior to version 12.1 icc doesn't support _mm_set_epi64x.
  */
 #if (defined(__ICC) && __ICC < 1210)
-#define _mm_set_epi64x(a, b)  ({ \
+#define _mm_set_epi64x(a, b)     \
+__extension__ ({                 \
 	rte_xmm_t m;             \
 	m.u64[0] = b;            \
 	m.u64[1] = a;            \
diff --git a/lib/librte_eal/common/include/generic/rte_atomic.h b/lib/librte_eal/common/include/generic/rte_atomic.h
index bfb4fe44..43a704ec 100644
--- a/lib/librte_eal/common/include/generic/rte_atomic.h
+++ b/lib/librte_eal/common/include/generic/rte_atomic.h
@@ -42,6 +42,7 @@
  */
 
 #include <stdint.h>
+#include <rte_common.h>
 
 #ifdef __DOXYGEN__
 
diff --git a/lib/librte_eal/common/include/generic/rte_byteorder.h b/lib/librte_eal/common/include/generic/rte_byteorder.h
index c46fdcf2..e00bccbc 100644
--- a/lib/librte_eal/common/include/generic/rte_byteorder.h
+++ b/lib/librte_eal/common/include/generic/rte_byteorder.h
@@ -50,6 +50,8 @@
 #include <endian.h>
 #endif
 
+#include <rte_common.h>
+
 /*
  * Compile-time endianness detection
  */
diff --git a/lib/librte_eal/common/include/generic/rte_cpuflags.h b/lib/librte_eal/common/include/generic/rte_cpuflags.h
index c1da357c..71321f32 100644
--- a/lib/librte_eal/common/include/generic/rte_cpuflags.h
+++ b/lib/librte_eal/common/include/generic/rte_cpuflags.h
@@ -44,6 +44,7 @@
 /**
  * Enumeration of all CPU features supported
  */
+__extension__
 enum rte_cpu_flag_t;
 
 /**
@@ -55,6 +56,7 @@ enum rte_cpu_flag_t;
  *     flag name
  *     NULL if flag ID is invalid
  */
+__extension__
 const char *
 rte_cpu_get_flag_name(enum rte_cpu_flag_t feature);
 
@@ -68,6 +70,7 @@ rte_cpu_get_flag_name(enum rte_cpu_flag_t feature);
  *     0 if flag is not available
  *     -ENOENT if flag is invalid
  */
+__extension__
 int
 rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature);
 
diff --git a/lib/librte_eal/common/include/generic/rte_cycles.h b/lib/librte_eal/common/include/generic/rte_cycles.h
index 8cc21f20..00103ca9 100644
--- a/lib/librte_eal/common/include/generic/rte_cycles.h
+++ b/lib/librte_eal/common/include/generic/rte_cycles.h
@@ -180,15 +180,16 @@ rte_get_timer_hz(void)
 	default: rte_panic("Invalid timer source specified\n");
 	}
 }
-
 /**
  * Wait at least us microseconds.
+ * This function can be replaced with user-defined function.
+ * @see rte_delay_us_callback_register
  *
  * @param us
  *   The number of microseconds to wait.
  */
-void
-rte_delay_us(unsigned us);
+extern void
+(*rte_delay_us)(unsigned int us);
 
 /**
  * Wait at least ms milliseconds.
@@ -202,4 +203,21 @@ rte_delay_ms(unsigned ms)
 	rte_delay_us(ms * 1000);
 }
 
+/**
+ * Blocking delay function.
+ *
+ * @param us
+ *   Number of microseconds to wait.
+ */
+void rte_delay_us_block(unsigned int us);
+
+/**
+ * Replace rte_delay_us with user defined function.
+ *
+ * @param userfunc
+ *   User function which replaces rte_delay_us. rte_delay_us_block restores
+ *   buildin block delay function.
+ */
+void rte_delay_us_callback_register(void(*userfunc)(unsigned int));
+
 #endif /* _RTE_CYCLES_H_ */
diff --git a/lib/librte_eal/common/include/generic/rte_memcpy.h b/lib/librte_eal/common/include/generic/rte_memcpy.h
index afb0afe4..4e9d8794 100644
--- a/lib/librte_eal/common/include/generic/rte_memcpy.h
+++ b/lib/librte_eal/common/include/generic/rte_memcpy.h
@@ -64,6 +64,8 @@ rte_mov16(uint8_t *dst, const uint8_t *src);
 static inline void
 rte_mov32(uint8_t *dst, const uint8_t *src);
 
+#ifdef __DOXYGEN__
+
 /**
  * Copy 48 bytes from one location to another using optimised
  * instructions. The locations should not overlap.
@@ -76,6 +78,8 @@ rte_mov32(uint8_t *dst, const uint8_t *src);
 static inline void
 rte_mov48(uint8_t *dst, const uint8_t *src);
 
+#endif /* __DOXYGEN__ */
+
 /**
  * Copy 64 bytes from one location to another using optimised
  * instructions. The locations should not overlap.
diff --git a/lib/librte_eal/common/include/rte_common.h b/lib/librte_eal/common/include/rte_common.h
index 332f2a43..db5ac91c 100644
--- a/lib/librte_eal/common/include/rte_common.h
+++ b/lib/librte_eal/common/include/rte_common.h
@@ -59,6 +59,13 @@ extern "C" {
 #define asm __asm__
 #endif
 
+/** C extension macro for environments lacking C11 features. */
+#if !defined(__STDC_VERSION__) || __STDC_VERSION__ < 201112L
+#define RTE_STD_C11 __extension__
+#else
+#define RTE_STD_C11
+#endif
+
 #ifdef RTE_ARCH_STRICT_ALIGN
 typedef uint64_t unaligned_uint64_t __attribute__ ((aligned(1)));
 typedef uint32_t unaligned_uint32_t __attribute__ ((aligned(1)));
@@ -268,7 +275,8 @@ rte_align64pow2(uint64_t v)
 /**
  * Macro to return the minimum of two numbers
  */
-#define RTE_MIN(a, b) ({ \
+#define RTE_MIN(a, b) \
+	__extension__ ({ \
 		typeof (a) _a = (a); \
 		typeof (b) _b = (b); \
 		_a < _b ? _a : _b; \
@@ -277,7 +285,8 @@ rte_align64pow2(uint64_t v)
 /**
  * Macro to return the maximum of two numbers
  */
-#define RTE_MAX(a, b) ({ \
+#define RTE_MAX(a, b) \
+	__extension__ ({ \
 		typeof (a) _a = (a); \
 		typeof (b) _b = (b); \
 		_a > _b ? _a : _b; \
@@ -326,6 +335,15 @@ rte_bsf32(uint32_t v)
 /** Take a macro value and get a string version of it */
 #define RTE_STR(x) _RTE_STR(x)
 
+/**
+ * ISO C helpers to modify format strings using variadic macros.
+ * This is a replacement for the ", ## __VA_ARGS__" GNU extension.
+ * An empty %s argument is appended to avoid a dangling comma.
+ */
+#define RTE_FMT(fmt, ...) fmt "%.0s", __VA_ARGS__ ""
+#define RTE_FMT_HEAD(fmt, ...) fmt
+#define RTE_FMT_TAIL(fmt, ...) __VA_ARGS__
+
 /** Mask value of type "tp" for the first "ln" bit set. */
 #define	RTE_LEN2MASK(ln, tp)	\
 	((tp)((uint64_t)-1 >> (sizeof(uint64_t) * CHAR_BIT - (ln))))
diff --git a/lib/librte_eal/common/include/rte_dev.h b/lib/librte_eal/common/include/rte_dev.h
index 95789f9d..8840380d 100644
--- a/lib/librte_eal/common/include/rte_dev.h
+++ b/lib/librte_eal/common/include/rte_dev.h
@@ -100,37 +100,56 @@ rte_pmd_debug_trace(const char *func_name, const char *fmt, ...)
 	} \
 } while (0)
 
+/**
+ * A generic memory resource representation.
+ */
+struct rte_mem_resource {
+	uint64_t phys_addr; /**< Physical address, 0 if not resource. */
+	uint64_t len;       /**< Length of the resource. */
+	void *addr;         /**< Virtual address, NULL when not mapped. */
+};
 
 /** Double linked list of device drivers. */
 TAILQ_HEAD(rte_driver_list, rte_driver);
+/** Double linked list of devices. */
+TAILQ_HEAD(rte_device_list, rte_device);
+
+/* Forward declaration */
+struct rte_driver;
 
 /**
- * Initialization function called for each device driver once.
+ * A structure describing a generic device.
  */
-typedef int (rte_dev_init_t)(const char *name, const char *args);
+struct rte_device {
+	TAILQ_ENTRY(rte_device) next; /**< Next device */
+	struct rte_driver *driver;    /**< Associated driver */
+	int numa_node;                /**< NUMA node connection */
+	struct rte_devargs *devargs;  /**< Device user arguments */
+};
 
 /**
- * Uninitilization function called for each device driver once.
+ * Insert a device detected by a bus scanning.
+ *
+ * @param dev
+ *   A pointer to a rte_device structure describing the detected device.
  */
-typedef int (rte_dev_uninit_t)(const char *name);
+void rte_eal_device_insert(struct rte_device *dev);
 
 /**
- * Driver type enumeration
+ * Remove a device (e.g. when being unplugged).
+ *
+ * @param dev
+ *   A pointer to a rte_device structure describing the device to be removed.
  */
-enum pmd_type {
-	PMD_VDEV = 0,
-	PMD_PDEV = 1,
-};
+void rte_eal_device_remove(struct rte_device *dev);
 
 /**
  * A structure describing a device driver.
  */
 struct rte_driver {
 	TAILQ_ENTRY(rte_driver) next;  /**< Next in list. */
-	enum pmd_type type;		   /**< PMD Driver type */
 	const char *name;                   /**< Driver name. */
-	rte_dev_init_t *init;              /**< Device init. function. */
-	rte_dev_uninit_t *uninit;          /**< Device uninit. function. */
+	const char *alias;              /**< Driver alias. */
 };
 
 /**
@@ -178,28 +197,45 @@ int rte_eal_vdev_init(const char *name, const char *args);
  */
 int rte_eal_vdev_uninit(const char *name);
 
-#define DRIVER_EXPORT_NAME_ARRAY(n, idx) n##idx[]
+/**
+ * Attach a device to a registered driver.
+ *
+ * @param name
+ *   The device name, that refers to a pci device (or some private
+ *   way of designating a vdev device). Based on this device name, eal
+ *   will identify a driver capable of handling it and pass it to the
+ *   driver probing function.
+ * @param devargs
+ *   Device arguments to be passed to the driver.
+ * @return
+ *   0 on success, negative on error.
+ */
+int rte_eal_dev_attach(const char *name, const char *devargs);
 
-#define DRIVER_EXPORT_NAME(name, idx) \
-static const char DRIVER_EXPORT_NAME_ARRAY(this_pmd_name, idx) \
-__attribute__((used)) = RTE_STR(name)
+/**
+ * Detach a device from its driver.
+ *
+ * @param name
+ *   Same description as for rte_eal_dev_attach().
+ *   Here, eal will call the driver detaching function.
+ * @return
+ *   0 on success, negative on error.
+ */
+int rte_eal_dev_detach(const char *name);
 
-#define PMD_REGISTER_DRIVER(drv, nm)\
-void devinitfn_ ##drv(void);\
-void __attribute__((constructor, used)) devinitfn_ ##drv(void)\
-{\
-	(drv).name = RTE_STR(nm);\
-	rte_eal_driver_register(&drv);\
-} \
-DRIVER_EXPORT_NAME(nm, __COUNTER__)
+#define RTE_PMD_EXPORT_NAME_ARRAY(n, idx) n##idx[]
+
+#define RTE_PMD_EXPORT_NAME(name, idx) \
+static const char RTE_PMD_EXPORT_NAME_ARRAY(this_pmd_name, idx) \
+__attribute__((used)) = RTE_STR(name)
 
 #define DRV_EXP_TAG(name, tag) __##name##_##tag
 
-#define DRIVER_REGISTER_PCI_TABLE(name, table) \
+#define RTE_PMD_REGISTER_PCI_TABLE(name, table) \
 static const char DRV_EXP_TAG(name, pci_tbl_export)[] __attribute__((used)) = \
 RTE_STR(table)
 
-#define DRIVER_REGISTER_PARAM_STRING(name, str) \
+#define RTE_PMD_REGISTER_PARAM_STRING(name, str) \
 static const char DRV_EXP_TAG(name, param_string_export)[] \
 __attribute__((used)) = str
 
diff --git a/lib/librte_eal/common/include/rte_devargs.h b/lib/librte_eal/common/include/rte_devargs.h
index 53c59f56..88120a1c 100644
--- a/lib/librte_eal/common/include/rte_devargs.h
+++ b/lib/librte_eal/common/include/rte_devargs.h
@@ -76,6 +76,7 @@ struct rte_devargs {
 	TAILQ_ENTRY(rte_devargs) next;
 	/** Type of device. */
 	enum rte_devtype type;
+	RTE_STD_C11
 	union {
 		/** Used if type is RTE_DEVTYPE_*_PCI. */
 		struct {
@@ -106,8 +107,8 @@ extern struct rte_devargs_list devargs_list;
  * "04:00.0,arg=val".
  *
  * For virtual devices, the format of arguments string is "DRIVER_NAME*"
- * or "DRIVER_NAME*,key=val,key2=val2,...". Examples: "eth_ring",
- * "eth_ring0", "eth_pmdAnything,arg=0:arg2=1".
+ * or "DRIVER_NAME*,key=val,key2=val2,...". Examples: "net_ring",
+ * "net_ring0", "net_pmdAnything,arg=0:arg2=1".
  *
  * The function parses the arguments string to get driver name and driver
  * arguments.
@@ -134,8 +135,8 @@ int rte_eal_parse_devargs_str(const char *devargs_str,
  * "04:00.0,arg=val".
  *
  * For virtual devices, the format of arguments string is "DRIVER_NAME*"
- * or "DRIVER_NAME*,key=val,key2=val2,...". Examples: "eth_ring",
- * "eth_ring0", "eth_pmdAnything,arg=0:arg2=1". The validity of the
+ * or "DRIVER_NAME*,key=val,key2=val2,...". Examples: "net_ring",
+ * "net_ring0", "net_pmdAnything,arg=0:arg2=1". The validity of the
  * driver name is not checked by this function, it is done when probing
  * the drivers.
  *
diff --git a/lib/librte_eal/common/include/rte_eal.h b/lib/librte_eal/common/include/rte_eal.h
index a71d6f57..d150b9dd 100644
--- a/lib/librte_eal/common/include/rte_eal.h
+++ b/lib/librte_eal/common/include/rte_eal.h
@@ -44,6 +44,7 @@
 #include <sched.h>
 
 #include <rte_per_lcore.h>
+#include <rte_config.h>
 
 #ifdef __cplusplus
 extern "C" {
@@ -252,6 +253,9 @@ static inline int rte_gettid(void)
 	return RTE_PER_LCORE(_thread_id);
 }
 
+#define RTE_INIT(func) \
+static void __attribute__((constructor, used)) func(void)
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_eal/common/include/rte_interrupts.h b/lib/librte_eal/common/include/rte_interrupts.h
index ff11ef3a..fd3c6eff 100644
--- a/lib/librte_eal/common/include/rte_interrupts.h
+++ b/lib/librte_eal/common/include/rte_interrupts.h
@@ -34,6 +34,8 @@
 #ifndef _RTE_INTERRUPTS_H_
 #define _RTE_INTERRUPTS_H_
 
+#include <rte_common.h>
+
 /**
  * @file
  *
diff --git a/lib/librte_eal/common/include/rte_log.h b/lib/librte_eal/common/include/rte_log.h
index b1add04c..29f7d192 100644
--- a/lib/librte_eal/common/include/rte_log.h
+++ b/lib/librte_eal/common/include/rte_log.h
@@ -42,8 +42,6 @@
  * This file provides a log API to RTE applications.
  */
 
-#include "rte_common.h" /* for __rte_deprecated macro */
-
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -56,7 +54,7 @@ extern "C" {
 struct rte_logs {
 	uint32_t type;  /**< Bitfield with enabled logs. */
 	uint32_t level; /**< Log level. */
-	FILE *file;     /**< Pointer to current FILE* for logs. */
+	FILE *file;     /**< Output file set by rte_openlog_stream, or NULL. */
 };
 
 /** Global log informations */
@@ -102,9 +100,6 @@ extern struct rte_logs rte_logs;
 #define RTE_LOG_INFO     7U  /**< Informational.                    */
 #define RTE_LOG_DEBUG    8U  /**< Debug-level messages.             */
 
-/** The default log stream. */
-extern FILE *eal_default_log_stream;
-
 /**
  * Change the stream that will be used by the logging system.
  *
@@ -181,45 +176,6 @@ int rte_log_cur_msg_loglevel(void);
 int rte_log_cur_msg_logtype(void);
 
 /**
- * @deprecated
- * Enable or disable the history (enabled by default)
- *
- * @param enable
- *   true to enable, or 0 to disable history.
- */
-__rte_deprecated
-void rte_log_set_history(int enable);
-
-/**
- * @deprecated
- * Dump the log history to a file
- *
- * @param f
- *   A pointer to a file for output
- */
-__rte_deprecated
-void rte_log_dump_history(FILE *f);
-
-/**
- * @deprecated
- * Add a log message to the history.
- *
- * This function can be called from a user-defined log stream. It adds
- * the given message in the history that can be dumped using
- * rte_log_dump_history().
- *
- * @param buf
- *   A data buffer containing the message to be saved in the history.
- * @param size
- *   The length of the data buffer.
- * @return
- *   - 0: Success.
- *   - (-ENOBUFS) if there is no room to store the message.
- */
-__rte_deprecated
-int rte_log_add_in_history(const char *buf, size_t size);
-
-/**
  * Generates a log message.
  *
  * The message will be sent in the stream defined by the previous call
diff --git a/lib/librte_eal/common/include/rte_malloc.h b/lib/librte_eal/common/include/rte_malloc.h
index 74bb78c7..008ce134 100644
--- a/lib/librte_eal/common/include/rte_malloc.h
+++ b/lib/librte_eal/common/include/rte_malloc.h
@@ -294,7 +294,7 @@ rte_malloc_get_socket_stats(int socket,
 /**
  * Dump statistics.
  *
- * Dump for the specified type to the console. If the type argument is
+ * Dump for the specified type to a file. If the type argument is
  * NULL, all memory types will be dumped.
  *
  * @param f
diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h
index 06611093..4aa5d1f7 100644
--- a/lib/librte_eal/common/include/rte_memory.h
+++ b/lib/librte_eal/common/include/rte_memory.h
@@ -44,6 +44,8 @@
 #include <stddef.h>
 #include <stdio.h>
 
+#include <rte_config.h>
+
 #ifdef RTE_EXEC_ENV_LINUXAPP
 #include <exec-env/rte_dom0_common.h>
 #endif
@@ -54,6 +56,7 @@ extern "C" {
 
 #include <rte_common.h>
 
+__extension__
 enum rte_page_sizes {
 	RTE_PGSIZE_4K    = 1ULL << 12,
 	RTE_PGSIZE_64K   = 1ULL << 16,
@@ -103,13 +106,11 @@ typedef uint64_t phys_addr_t; /**< Physical address definition. */
  */
 struct rte_memseg {
 	phys_addr_t phys_addr;      /**< Start physical address. */
+	RTE_STD_C11
 	union {
 		void *addr;         /**< Start virtual address. */
 		uint64_t addr_64;   /**< Makes sure addr is always 64 bits */
 	};
-#ifdef RTE_LIBRTE_IVSHMEM
-	phys_addr_t ioremap_addr; /**< Real physical address inside the VM */
-#endif
 	size_t len;               /**< Length of the segment. */
 	uint64_t hugepage_sz;       /**< The pagesize of underlying memory */
 	int32_t socket_id;          /**< NUMA socket ID. */
@@ -161,7 +162,7 @@ phys_addr_t rte_mem_virt2phy(const void *virt);
 const struct rte_memseg *rte_eal_get_physmem_layout(void);
 
 /**
- * Dump the physical memory layout to the console.
+ * Dump the physical memory layout to a file.
  *
  * @param f
  *   A pointer to a file for output
diff --git a/lib/librte_eal/common/include/rte_memzone.h b/lib/librte_eal/common/include/rte_memzone.h
index f69b5a87..1d0827f4 100644
--- a/lib/librte_eal/common/include/rte_memzone.h
+++ b/lib/librte_eal/common/include/rte_memzone.h
@@ -53,6 +53,7 @@
 
 #include <stdio.h>
 #include <rte_memory.h>
+#include <rte_common.h>
 
 #ifdef __cplusplus
 extern "C" {
@@ -78,13 +79,11 @@ struct rte_memzone {
 	char name[RTE_MEMZONE_NAMESIZE];  /**< Name of the memory zone. */
 
 	phys_addr_t phys_addr;            /**< Start physical address. */
+	RTE_STD_C11
 	union {
 		void *addr;                   /**< Start virtual address. */
 		uint64_t addr_64;             /**< Makes sure addr is always 64-bits */
 	};
-#ifdef RTE_LIBRTE_IVSHMEM
-	phys_addr_t ioremap_addr;         /**< Real physical address inside the VM */
-#endif
 	size_t len;                       /**< Length of the memzone. */
 
 	uint64_t hugepage_sz;             /**< The page size of underlying memory */
@@ -256,12 +255,10 @@ const struct rte_memzone *rte_memzone_reserve_bounded(const char *name,
 /**
  * Free a memzone.
  *
- * Note: an IVSHMEM zone cannot be freed.
- *
  * @param mz
  *   A pointer to the memzone
  * @return
- *  -EINVAL - invalid parameter, IVSHMEM memzone.
+ *  -EINVAL - invalid parameter.
  *  0 - success
  */
 int rte_memzone_free(const struct rte_memzone *mz);
@@ -280,7 +277,7 @@ int rte_memzone_free(const struct rte_memzone *mz);
 const struct rte_memzone *rte_memzone_lookup(const char *name);
 
 /**
- * Dump all reserved memzones to the console.
+ * Dump all reserved memzones to a file.
  *
  * @param f
  *   A pointer to a file for output
diff --git a/lib/librte_eal/common/include/rte_pci.h b/lib/librte_eal/common/include/rte_pci.h
index fa749626..9ce88472 100644
--- a/lib/librte_eal/common/include/rte_pci.h
+++ b/lib/librte_eal/common/include/rte_pci.h
@@ -82,7 +82,9 @@ extern "C" {
 #include <stdint.h>
 #include <inttypes.h>
 
+#include <rte_debug.h>
 #include <rte_interrupts.h>
+#include <rte_dev.h>
 
 TAILQ_HEAD(pci_device_list, rte_pci_device); /**< PCI devices in D-linked Q. */
 TAILQ_HEAD(pci_driver_list, rte_pci_driver); /**< PCI drivers in D-linked Q. */
@@ -95,6 +97,7 @@ const char *pci_get_sysfs_path(void);
 
 /** Formatting string for PCI device identifier: Ex: 0000:00:01.0 */
 #define PCI_PRI_FMT "%.4" PRIx16 ":%.2" PRIx8 ":%.2" PRIx8 ".%" PRIx8
+#define PCI_PRI_STR_SIZE sizeof("XXXX:XX:XX.X")
 
 /** Short formatting string, without domain, for PCI device: Ex: 00:01.0 */
 #define PCI_SHORT_PRI_FMT "%.2" PRIx8 ":%.2" PRIx8 ".%" PRIx8
@@ -105,15 +108,6 @@ const char *pci_get_sysfs_path(void);
 /** Nb. of values in PCI resource format. */
 #define PCI_RESOURCE_FMT_NVAL 3
 
-/**
- * A structure describing a PCI resource.
- */
-struct rte_pci_resource {
-	uint64_t phys_addr;   /**< Physical address, 0 if no resource. */
-	uint64_t len;         /**< Length of the resource. */
-	void *addr;           /**< Virtual address, NULL when not mapped. */
-};
-
 /** Maximum number of PCI resources. */
 #define PCI_MAX_RESOURCE 6
 
@@ -155,14 +149,14 @@ enum rte_kernel_driver {
  */
 struct rte_pci_device {
 	TAILQ_ENTRY(rte_pci_device) next;       /**< Next probed PCI device. */
+	struct rte_device device;               /**< Inherit core device */
 	struct rte_pci_addr addr;               /**< PCI location. */
 	struct rte_pci_id id;                   /**< PCI ID. */
-	struct rte_pci_resource mem_resource[PCI_MAX_RESOURCE];   /**< PCI Memory Resource */
+	struct rte_mem_resource mem_resource[PCI_MAX_RESOURCE];
+						/**< PCI Memory Resource */
 	struct rte_intr_handle intr_handle;     /**< Interrupt handle */
 	struct rte_pci_driver *driver;          /**< Associated driver */
 	uint16_t max_vfs;                       /**< sriov enable if not zero */
-	int numa_node;                          /**< NUMA node connection */
-	struct rte_devargs *devargs;            /**< Device user arguments */
 	enum rte_kernel_driver kdrv;            /**< Kernel driver passthrough */
 };
 
@@ -193,21 +187,21 @@ struct rte_pci_driver;
 /**
  * Initialisation function for the driver called during PCI probing.
  */
-typedef int (pci_devinit_t)(struct rte_pci_driver *, struct rte_pci_device *);
+typedef int (pci_probe_t)(struct rte_pci_driver *, struct rte_pci_device *);
 
 /**
  * Uninitialisation function for the driver called during hotplugging.
  */
-typedef int (pci_devuninit_t)(struct rte_pci_device *);
+typedef int (pci_remove_t)(struct rte_pci_device *);
 
 /**
  * A structure describing a PCI driver.
  */
 struct rte_pci_driver {
 	TAILQ_ENTRY(rte_pci_driver) next;       /**< Next in list. */
-	const char *name;                       /**< Driver name. */
-	pci_devinit_t *devinit;                 /**< Device init. function. */
-	pci_devuninit_t *devuninit;             /**< Device uninit function. */
+	struct rte_driver driver;               /**< Inherit core driver. */
+	pci_probe_t *probe;                     /**< Device Probe function. */
+	pci_remove_t *remove;                   /**< Device Remove function. */
 	const struct rte_pci_id *id_table;	/**< ID table, NULL terminated. */
 	uint32_t drv_flags;                     /**< Flags contolling handling of device. */
 };
@@ -308,6 +302,28 @@ eal_parse_pci_DomBDF(const char *input, struct rte_pci_addr *dev_addr)
 }
 #undef GET_PCIADDR_FIELD
 
+/**
+ * Utility function to write a pci device name, this device name can later be
+ * used to retrieve the corresponding rte_pci_addr using eal_parse_pci_*
+ * BDF helpers.
+ *
+ * @param addr
+ *	The PCI Bus-Device-Function address
+ * @param output
+ *	The output buffer string
+ * @param size
+ *	The output buffer size
+ */
+static inline void
+rte_eal_pci_device_name(const struct rte_pci_addr *addr,
+		    char *output, size_t size)
+{
+	RTE_VERIFY(size >= PCI_PRI_STR_SIZE);
+	RTE_VERIFY(snprintf(output, size, PCI_PRI_FMT,
+			    addr->domain, addr->bus,
+			    addr->devid, addr->function) >= 0);
+}
+
 /* Compare two PCI device addresses. */
 /**
  * Utility function to compare two PCI device addresses.
@@ -442,7 +458,7 @@ int rte_eal_pci_probe_one(const struct rte_pci_addr *addr);
  * Close the single PCI device.
  *
  * Scan the content of the PCI bus, and find the pci device specified by pci
- * address, then call the devuninit() function for registered driver that has a
+ * address, then call the remove() function for registered driver that has a
  * matching entry in its id_table for discovered device.
  *
  * @param addr
@@ -470,6 +486,16 @@ void rte_eal_pci_dump(FILE *f);
  */
 void rte_eal_pci_register(struct rte_pci_driver *driver);
 
+/** Helper for PCI device registration from driver (eth, crypto) instance */
+#define RTE_PMD_REGISTER_PCI(nm, pci_drv) \
+RTE_INIT(pciinitfn_ ##nm); \
+static void pciinitfn_ ##nm(void) \
+{\
+	(pci_drv).driver.name = RTE_STR(nm);\
+	rte_eal_pci_register(&pci_drv); \
+} \
+RTE_PMD_EXPORT_NAME(nm, __COUNTER__)
+
 /**
  * Unregister a PCI driver.
  *
diff --git a/lib/librte_eal/common/include/rte_pci_dev_ids.h b/lib/librte_eal/common/include/rte_pci_dev_ids.h
deleted file mode 100644
index 6ec8ae8c..00000000
--- a/lib/librte_eal/common/include/rte_pci_dev_ids.h
+++ /dev/null
@@ -1,326 +0,0 @@
-/*-
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- *   redistributing this file, you may do so under either license.
- *
- *   GPL LICENSE SUMMARY
- *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
- *
- *   This program is free software; you can redistribute it and/or modify
- *   it under the terms of version 2 of the GNU General Public License as
- *   published by the Free Software Foundation.
- *
- *   This program is distributed in the hope that it will be useful, but
- *   WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *   General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program; if not, write to the Free Software
- *   Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *   The full GNU General Public License is included in this distribution
- *   in the file called LICENSE.GPL.
- *
- *   Contact Information:
- *   Intel Corporation
- *
- *   BSD LICENSE
- *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#ifndef RTE_PCI_DEV_ID_DECL_IGB
-#define RTE_PCI_DEV_ID_DECL_IGB(vend, dev)
-#endif
-
-#ifndef RTE_PCI_DEV_ID_DECL_IGBVF
-#define RTE_PCI_DEV_ID_DECL_IGBVF(vend, dev)
-#endif
-
-#ifndef RTE_PCI_DEV_ID_DECL_IXGBE
-#define RTE_PCI_DEV_ID_DECL_IXGBE(vend, dev)
-#endif
-
-#ifndef RTE_PCI_DEV_ID_DECL_IXGBEVF
-#define RTE_PCI_DEV_ID_DECL_IXGBEVF(vend, dev)
-#endif
-
-#ifndef PCI_VENDOR_ID_INTEL
-/** Vendor ID used by Intel devices */
-#define PCI_VENDOR_ID_INTEL 0x8086
-#endif
-
-/******************** Physical IGB devices from e1000_hw.h ********************/
-
-#define E1000_DEV_ID_82576                      0x10C9
-#define E1000_DEV_ID_82576_FIBER                0x10E6
-#define E1000_DEV_ID_82576_SERDES               0x10E7
-#define E1000_DEV_ID_82576_QUAD_COPPER          0x10E8
-#define E1000_DEV_ID_82576_QUAD_COPPER_ET2      0x1526
-#define E1000_DEV_ID_82576_NS                   0x150A
-#define E1000_DEV_ID_82576_NS_SERDES            0x1518
-#define E1000_DEV_ID_82576_SERDES_QUAD          0x150D
-#define E1000_DEV_ID_82575EB_COPPER             0x10A7
-#define E1000_DEV_ID_82575EB_FIBER_SERDES       0x10A9
-#define E1000_DEV_ID_82575GB_QUAD_COPPER        0x10D6
-#define E1000_DEV_ID_82580_COPPER               0x150E
-#define E1000_DEV_ID_82580_FIBER                0x150F
-#define E1000_DEV_ID_82580_SERDES               0x1510
-#define E1000_DEV_ID_82580_SGMII                0x1511
-#define E1000_DEV_ID_82580_COPPER_DUAL          0x1516
-#define E1000_DEV_ID_82580_QUAD_FIBER           0x1527
-#define E1000_DEV_ID_I350_COPPER                0x1521
-#define E1000_DEV_ID_I350_FIBER                 0x1522
-#define E1000_DEV_ID_I350_SERDES                0x1523
-#define E1000_DEV_ID_I350_SGMII                 0x1524
-#define E1000_DEV_ID_I350_DA4                   0x1546
-#define E1000_DEV_ID_I210_COPPER                0x1533
-#define E1000_DEV_ID_I210_COPPER_OEM1           0x1534
-#define E1000_DEV_ID_I210_COPPER_IT             0x1535
-#define E1000_DEV_ID_I210_FIBER                 0x1536
-#define E1000_DEV_ID_I210_SERDES                0x1537
-#define E1000_DEV_ID_I210_SGMII                 0x1538
-#define E1000_DEV_ID_I210_COPPER_FLASHLESS      0x157B
-#define E1000_DEV_ID_I210_SERDES_FLASHLESS      0x157C
-#define E1000_DEV_ID_I211_COPPER                0x1539
-#define E1000_DEV_ID_I354_BACKPLANE_1GBPS       0x1F40
-#define E1000_DEV_ID_I354_SGMII                 0x1F41
-#define E1000_DEV_ID_I354_BACKPLANE_2_5GBPS     0x1F45
-#define E1000_DEV_ID_DH89XXCC_SGMII             0x0438
-#define E1000_DEV_ID_DH89XXCC_SERDES            0x043A
-#define E1000_DEV_ID_DH89XXCC_BACKPLANE         0x043C
-#define E1000_DEV_ID_DH89XXCC_SFP               0x0440
-
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_FIBER)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_SERDES)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_QUAD_COPPER)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_NS)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_NS_SERDES)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_SERDES_QUAD)
-
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82575EB_COPPER)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER)
-
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_COPPER)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_FIBER)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_SERDES)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_SGMII)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_COPPER_DUAL)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_QUAD_FIBER)
-
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_COPPER)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_FIBER)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_SERDES)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_SGMII)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_DA4)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_COPPER)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_COPPER_OEM1)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_COPPER_IT)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_FIBER)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_SERDES)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_SGMII)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I211_COPPER)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I354_BACKPLANE_1GBPS)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I354_SGMII)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_DH89XXCC_SGMII)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_DH89XXCC_SERDES)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE)
-RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_DH89XXCC_SFP)
-
-/****************** Physical IXGBE devices from ixgbe_type.h ******************/
-
-#define IXGBE_DEV_ID_82598                      0x10B6
-#define IXGBE_DEV_ID_82598_BX                   0x1508
-#define IXGBE_DEV_ID_82598AF_DUAL_PORT          0x10C6
-#define IXGBE_DEV_ID_82598AF_SINGLE_PORT        0x10C7
-#define IXGBE_DEV_ID_82598AT                    0x10C8
-#define IXGBE_DEV_ID_82598AT2                   0x150B
-#define IXGBE_DEV_ID_82598EB_SFP_LOM            0x10DB
-#define IXGBE_DEV_ID_82598EB_CX4                0x10DD
-#define IXGBE_DEV_ID_82598_CX4_DUAL_PORT        0x10EC
-#define IXGBE_DEV_ID_82598_DA_DUAL_PORT         0x10F1
-#define IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM      0x10E1
-#define IXGBE_DEV_ID_82598EB_XF_LR              0x10F4
-#define IXGBE_DEV_ID_82599_KX4                  0x10F7
-#define IXGBE_DEV_ID_82599_KX4_MEZZ             0x1514
-#define IXGBE_DEV_ID_82599_KR                   0x1517
-#define IXGBE_DEV_ID_82599_COMBO_BACKPLANE      0x10F8
-#define IXGBE_SUBDEV_ID_82599_KX4_KR_MEZZ       0x000C
-#define IXGBE_DEV_ID_82599_CX4                  0x10F9
-#define IXGBE_DEV_ID_82599_SFP                  0x10FB
-#define IXGBE_SUBDEV_ID_82599_SFP               0x11A9
-#define IXGBE_SUBDEV_ID_82599_RNDC              0x1F72
-#define IXGBE_SUBDEV_ID_82599_560FLR            0x17D0
-#define IXGBE_SUBDEV_ID_82599_ECNA_DP           0x0470
-#define IXGBE_DEV_ID_82599_BACKPLANE_FCOE       0x152A
-#define IXGBE_DEV_ID_82599_SFP_FCOE             0x1529
-#define IXGBE_DEV_ID_82599_SFP_EM               0x1507
-#define IXGBE_DEV_ID_82599_SFP_SF2              0x154D
-#define IXGBE_DEV_ID_82599_SFP_SF_QP            0x154A
-#define IXGBE_DEV_ID_82599_QSFP_SF_QP           0x1558
-#define IXGBE_DEV_ID_82599EN_SFP                0x1557
-#define IXGBE_DEV_ID_82599_XAUI_LOM             0x10FC
-#define IXGBE_DEV_ID_82599_T3_LOM               0x151C
-#define IXGBE_DEV_ID_82599_LS                   0x154F
-#define IXGBE_DEV_ID_X540T                      0x1528
-#define IXGBE_DEV_ID_X540T1                     0x1560
-#define IXGBE_DEV_ID_X550EM_X_SFP               0x15AC
-#define IXGBE_DEV_ID_X550EM_X_10G_T             0x15AD
-#define IXGBE_DEV_ID_X550EM_X_1G_T              0x15AE
-#define IXGBE_DEV_ID_X550T                      0x1563
-#define IXGBE_DEV_ID_X550T1                     0x15D1
-#define IXGBE_DEV_ID_X550EM_A_KR                0x15C2
-#define IXGBE_DEV_ID_X550EM_A_KR_L              0x15C3
-#define IXGBE_DEV_ID_X550EM_A_SFP_N             0x15C4
-#define IXGBE_DEV_ID_X550EM_A_SGMII             0x15C6
-#define IXGBE_DEV_ID_X550EM_A_SGMII_L           0x15C7
-#define IXGBE_DEV_ID_X550EM_A_10G_T             0x15C8
-#define IXGBE_DEV_ID_X550EM_A_QSFP              0x15CA
-#define IXGBE_DEV_ID_X550EM_A_QSFP_N            0x15CC
-#define IXGBE_DEV_ID_X550EM_A_SFP               0x15CE
-#define IXGBE_DEV_ID_X550EM_A_1G_T              0x15E4
-#define IXGBE_DEV_ID_X550EM_A_1G_T_L            0x15E5
-#define IXGBE_DEV_ID_X550EM_X_KX4               0x15AA
-#define IXGBE_DEV_ID_X550EM_X_KR                0x15AB
-
-#ifdef RTE_NIC_BYPASS
-#define IXGBE_DEV_ID_82599_BYPASS               0x155D
-#endif
-
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598_BX)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598AF_DUAL_PORT)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, \
-	IXGBE_DEV_ID_82598AF_SINGLE_PORT)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598AT)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598AT2)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598EB_SFP_LOM)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598EB_CX4)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598_CX4_DUAL_PORT)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598_DA_DUAL_PORT)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, \
-	IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598EB_XF_LR)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_KX4)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_KX4_MEZZ)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_KR)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, \
-	IXGBE_DEV_ID_82599_COMBO_BACKPLANE)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, \
-	IXGBE_SUBDEV_ID_82599_KX4_KR_MEZZ)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_CX4)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_SUBDEV_ID_82599_SFP)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_SUBDEV_ID_82599_RNDC)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_SUBDEV_ID_82599_560FLR)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_SUBDEV_ID_82599_ECNA_DP)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_BACKPLANE_FCOE)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP_FCOE)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP_EM)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP_SF2)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP_SF_QP)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_QSFP_SF_QP)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599EN_SFP)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_XAUI_LOM)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_T3_LOM)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_LS)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X540T)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X540T1)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_SFP)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_10G_T)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_1G_T)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550T)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550T1)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_KR)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_KR_L)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_SFP_N)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_SGMII)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_SGMII_L)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_10G_T)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_QSFP)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_QSFP_N)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_SFP)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_1G_T)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_1G_T_L)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_KX4)
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_KR)
-
-#ifdef RTE_NIC_BYPASS
-RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_BYPASS)
-#endif
-
-/****************** Virtual IGB devices from e1000_hw.h ******************/
-
-#define E1000_DEV_ID_82576_VF                   0x10CA
-#define E1000_DEV_ID_82576_VF_HV                0x152D
-#define E1000_DEV_ID_I350_VF                    0x1520
-#define E1000_DEV_ID_I350_VF_HV                 0x152F
-
-RTE_PCI_DEV_ID_DECL_IGBVF(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_VF)
-RTE_PCI_DEV_ID_DECL_IGBVF(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_VF_HV)
-RTE_PCI_DEV_ID_DECL_IGBVF(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_VF)
-RTE_PCI_DEV_ID_DECL_IGBVF(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_VF_HV)
-
-/****************** Virtual IXGBE devices from ixgbe_type.h ******************/
-
-#define IXGBE_DEV_ID_82599_VF                   0x10ED
-#define IXGBE_DEV_ID_82599_VF_HV                0x152E
-#define IXGBE_DEV_ID_X540_VF                    0x1515
-#define IXGBE_DEV_ID_X540_VF_HV                 0x1530
-#define IXGBE_DEV_ID_X550_VF_HV                 0x1564
-#define IXGBE_DEV_ID_X550_VF                    0x1565
-#define IXGBE_DEV_ID_X550EM_A_VF                0x15C5
-#define IXGBE_DEV_ID_X550EM_A_VF_HV             0x15B4
-#define IXGBE_DEV_ID_X550EM_X_VF                0x15A8
-#define IXGBE_DEV_ID_X550EM_X_VF_HV             0x15A9
-
-RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_VF)
-RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_VF_HV)
-RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X540_VF)
-RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X540_VF_HV)
-RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550_VF_HV)
-RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550_VF)
-RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_VF)
-RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_VF_HV)
-RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_VF)
-RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_VF_HV)
-
-/*
- * Undef all RTE_PCI_DEV_ID_DECL_* here.
- */
-#undef RTE_PCI_DEV_ID_DECL_IGB
-#undef RTE_PCI_DEV_ID_DECL_IGBVF
-#undef RTE_PCI_DEV_ID_DECL_IXGBE
-#undef RTE_PCI_DEV_ID_DECL_IXGBEVF
diff --git a/lib/librte_eal/common/include/rte_tailq.h b/lib/librte_eal/common/include/rte_tailq.h
index cc3c0f1d..3aae098a 100644
--- a/lib/librte_eal/common/include/rte_tailq.h
+++ b/lib/librte_eal/common/include/rte_tailq.h
@@ -107,7 +107,7 @@ struct rte_tailq_elem {
 	RTE_TAILQ_CAST(rte_eal_tailq_lookup(name), struct_name)
 
 /**
- * Dump tail queues to the console.
+ * Dump tail queues to a file.
  *
  * @param f
  *   A pointer to a file for output
@@ -148,8 +148,8 @@ struct rte_tailq_head *rte_eal_tailq_lookup(const char *name);
 int rte_eal_tailq_register(struct rte_tailq_elem *t);
 
 #define EAL_REGISTER_TAILQ(t) \
-void tailqinitfn_ ##t(void); \
-void __attribute__((constructor, used)) tailqinitfn_ ##t(void) \
+RTE_INIT(tailqinitfn_ ##t); \
+static void tailqinitfn_ ##t(void) \
 { \
 	if (rte_eal_tailq_register(&t) < 0) \
 		rte_panic("Cannot initialize tailq: %s\n", t.name); \
diff --git a/lib/librte_eal/common/include/rte_time.h b/lib/librte_eal/common/include/rte_time.h
index 4b13b9c1..28c6274c 100644
--- a/lib/librte_eal/common/include/rte_time.h
+++ b/lib/librte_eal/common/include/rte_time.h
@@ -31,6 +31,12 @@
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#ifndef _RTE_TIME_H_
+#define _RTE_TIME_H_
+
+#include <stdint.h>
+#include <time.h>
+
 #define NSEC_PER_SEC             1000000000L
 
 /**
@@ -120,3 +126,5 @@ rte_ns_to_timespec(uint64_t nsec)
 
 	return ts;
 }
+
+#endif /* _RTE_TIME_H_ */
diff --git a/lib/librte_eal/common/include/rte_vdev.h b/lib/librte_eal/common/include/rte_vdev.h
new file mode 100644
index 00000000..784e837d
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_vdev.h
@@ -0,0 +1,102 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 RehiveTech. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of RehiveTech nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RTE_VDEV_H
+#define RTE_VDEV_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/queue.h>
+#include <rte_dev.h>
+
+/** Double linked list of virtual device drivers. */
+TAILQ_HEAD(vdev_driver_list, rte_vdev_driver);
+
+/**
+ * Probe function called for each virtual device driver once.
+ */
+typedef int (rte_vdev_probe_t)(const char *name, const char *args);
+
+/**
+ * Remove function called for each virtual device driver once.
+ */
+typedef int (rte_vdev_remove_t)(const char *name);
+
+/**
+ * A virtual device driver abstraction.
+ */
+struct rte_vdev_driver {
+	TAILQ_ENTRY(rte_vdev_driver) next; /**< Next in list. */
+	struct rte_driver driver;      /**< Inherited general driver. */
+	rte_vdev_probe_t *probe;       /**< Virtual device probe function. */
+	rte_vdev_remove_t *remove;     /**< Virtual device remove function. */
+};
+
+/**
+ * Register a virtual device driver.
+ *
+ * @param driver
+ *   A pointer to a rte_vdev_driver structure describing the driver
+ *   to be registered.
+ */
+void rte_eal_vdrv_register(struct rte_vdev_driver *driver);
+
+/**
+ * Unregister a virtual device driver.
+ *
+ * @param driver
+ *   A pointer to a rte_vdev_driver structure describing the driver
+ *   to be unregistered.
+ */
+void rte_eal_vdrv_unregister(struct rte_vdev_driver *driver);
+
+#define RTE_PMD_REGISTER_VDEV(nm, vdrv)\
+RTE_INIT(vdrvinitfn_ ##vdrv);\
+static const char *vdrvinit_ ## nm ## _alias;\
+static void vdrvinitfn_ ##vdrv(void)\
+{\
+	(vdrv).driver.name = RTE_STR(nm);\
+	(vdrv).driver.alias = vdrvinit_ ## nm ## _alias;\
+	rte_eal_vdrv_register(&vdrv);\
+} \
+RTE_PMD_EXPORT_NAME(nm, __COUNTER__)
+
+#define RTE_PMD_REGISTER_ALIAS(nm, alias)\
+static const char *vdrvinit_ ## nm ## _alias = RTE_STR(alias)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/librte_eal/common/include/rte_version.h b/lib/librte_eal/common/include/rte_version.h
index 8187dc7b..da204e63 100644
--- a/lib/librte_eal/common/include/rte_version.h
+++ b/lib/librte_eal/common/include/rte_version.h
@@ -45,6 +45,7 @@ extern "C" {
 
 #include <stdint.h>
 #include <string.h>
+#include <stdio.h>
 #include <rte_common.h>
 
 /**
@@ -60,12 +61,12 @@ extern "C" {
 /**
  * Minor version/month number i.e. the mm in yy.mm.z
  */
-#define RTE_VER_MONTH 7
+#define RTE_VER_MONTH 11
 
 /**
  * Patch level number i.e. the z in yy.mm.z
  */
-#define RTE_VER_MINOR 2
+#define RTE_VER_MINOR 0
 
 /**
  * Extra string to be appended to version number
diff --git a/lib/librte_eal/common/malloc_heap.c b/lib/librte_eal/common/malloc_heap.c
index 763fa324..267a4c6c 100644
--- a/lib/librte_eal/common/malloc_heap.c
+++ b/lib/librte_eal/common/malloc_heap.c
@@ -221,14 +221,6 @@ rte_eal_malloc_heap_init(void)
 	for (ms = &mcfg->memseg[0], ms_cnt = 0;
 			(ms_cnt < RTE_MAX_MEMSEG) && (ms->len > 0);
 			ms_cnt++, ms++) {
-#ifdef RTE_LIBRTE_IVSHMEM
-		/*
-		 * if segment has ioremap address set, it's an IVSHMEM segment and
-		 * it is not memory to allocate from.
-		 */
-		if (ms->ioremap_addr != 0)
-			continue;
-#endif
 		malloc_heap_add_memseg(&mcfg->malloc_heaps[ms->socket_id], ms);
 	}
 
diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile
index 182729c0..4e206f09 100644
--- a/lib/librte_eal/linuxapp/eal/Makefile
+++ b/lib/librte_eal/linuxapp/eal/Makefile
@@ -37,19 +37,13 @@ ARCH_DIR ?= $(RTE_ARCH)
 EXPORT_MAP := rte_eal_version.map
 VPATH += $(RTE_SDK)/lib/librte_eal/common/arch/$(ARCH_DIR)
 
-LIBABIVER := 2
+LIBABIVER := 3
 
 VPATH += $(RTE_SDK)/lib/librte_eal/common
 
 CFLAGS += -I$(SRCDIR)/include
 CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common
 CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common/include
-ifeq ($(CONFIG_RTE_LIBRTE_IVSHMEM),y)
-# workaround for circular dependency eal -> ivshmem -> ring/mempool -> eal
-CFLAGS += -I$(RTE_SDK)/lib/librte_ring
-CFLAGS += -I$(RTE_SDK)/lib/librte_mempool
-CFLAGS += -I$(RTE_SDK)/lib/librte_ivshmem
-endif
 CFLAGS += $(WERROR_FLAGS) -O3
 
 LDLIBS += -ldl
@@ -76,9 +70,6 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_lcore.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_timer.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_interrupts.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_alarm.c
-ifeq ($(CONFIG_RTE_LIBRTE_IVSHMEM),y)
-SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_ivshmem.c
-endif
 
 # from common dir
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_lcore.c
@@ -86,6 +77,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_timer.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_memzone.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_log.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_launch.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_vdev.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_pci.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_pci_uio.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_memory.c
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
index 3fb2188f..2075282e 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -70,6 +70,7 @@
 #include <rte_cpuflags.h>
 #include <rte_interrupts.h>
 #include <rte_pci.h>
+#include <rte_dev.h>
 #include <rte_devargs.h>
 #include <rte_common.h>
 #include <rte_version.h>
@@ -238,7 +239,8 @@ rte_eal_config_attach(void)
 	mem_config = (struct rte_mem_config *) mmap(NULL, sizeof(*mem_config),
 			PROT_READ, MAP_SHARED, mem_cfg_fd, 0);
 	if (mem_config == MAP_FAILED)
-		rte_panic("Cannot mmap memory for rte_config\n");
+		rte_panic("Cannot mmap memory for rte_config! error %i (%s)\n",
+			  errno, strerror(errno));
 
 	rte_config.mem_config = mem_config;
 }
@@ -263,9 +265,17 @@ rte_eal_config_reattach(void)
 	mem_config = (struct rte_mem_config *) mmap(rte_mem_cfg_addr,
 			sizeof(*mem_config), PROT_READ | PROT_WRITE, MAP_SHARED,
 			mem_cfg_fd, 0);
+	if (mem_config == MAP_FAILED || mem_config != rte_mem_cfg_addr) {
+		if (mem_config != MAP_FAILED)
+			/* errno is stale, don't use */
+			rte_panic("Cannot mmap memory for rte_config at [%p], got [%p]"
+				  " - please use '--base-virtaddr' option\n",
+				  rte_mem_cfg_addr, mem_config);
+		else
+			rte_panic("Cannot mmap memory for rte_config! error %i (%s)\n",
+				  errno, strerror(errno));
+	}
 	close(mem_cfg_fd);
-	if (mem_config == MAP_FAILED || mem_config != rte_mem_cfg_addr)
-		rte_panic("Cannot mmap memory for rte_config\n");
 
 	rte_config.mem_config = mem_config;
 }
@@ -740,6 +750,9 @@ rte_eal_init(int argc, char **argv)
 	char cpuset[RTE_CPU_AFFINITY_STR_LEN];
 	char thread_name[RTE_MAX_THREAD_NAME_LEN];
 
+	/* checks if the machine is adequate */
+	rte_cpu_check_supported();
+
 	if (!rte_atomic32_test_and_set(&run_once))
 		return -1;
 
@@ -748,9 +761,6 @@ rte_eal_init(int argc, char **argv)
 
 	thread_id = pthread_self();
 
-	if (rte_eal_log_early_init() < 0)
-		rte_panic("Cannot init early logs\n");
-
 	eal_log_level_parse(argc, argv);
 
 	/* set log level as early as possible */
@@ -789,6 +799,9 @@ rte_eal_init(int argc, char **argv)
 
 	rte_config_init();
 
+	if (rte_eal_log_init(logid, internal_config.syslog_facility) < 0)
+		rte_panic("Cannot init logs\n");
+
 	if (rte_eal_pci_init() < 0)
 		rte_panic("Cannot init PCI\n");
 
@@ -797,11 +810,6 @@ rte_eal_init(int argc, char **argv)
 		rte_panic("Cannot init VFIO\n");
 #endif
 
-#ifdef RTE_LIBRTE_IVSHMEM
-	if (rte_eal_ivshmem_init() < 0)
-		rte_panic("Cannot init IVSHMEM\n");
-#endif
-
 	if (rte_eal_memory_init() < 0)
 		rte_panic("Cannot init memory\n");
 
@@ -814,14 +822,6 @@ rte_eal_init(int argc, char **argv)
 	if (rte_eal_tailqs_init() < 0)
 		rte_panic("Cannot init tail queues for objects\n");
 
-#ifdef RTE_LIBRTE_IVSHMEM
-	if (rte_eal_ivshmem_obj_init() < 0)
-		rte_panic("Cannot init IVSHMEM objects\n");
-#endif
-
-	if (rte_eal_log_init(logid, internal_config.syslog_facility) < 0)
-		rte_panic("Cannot init logs\n");
-
 	if (rte_eal_alarm_init() < 0)
 		rte_panic("Cannot init interrupt-handling thread\n");
 
diff --git a/lib/librte_eal/linuxapp/eal/eal_ivshmem.c b/lib/librte_eal/linuxapp/eal/eal_ivshmem.c
deleted file mode 100644
index 67b3caf2..00000000
--- a/lib/librte_eal/linuxapp/eal/eal_ivshmem.c
+++ /dev/null
@@ -1,954 +0,0 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifdef RTE_LIBRTE_IVSHMEM /* hide it from coverage */
-
-#include <stdint.h>
-#include <unistd.h>
-#include <inttypes.h>
-#include <sys/mman.h>
-#include <sys/file.h>
-#include <string.h>
-#include <sys/queue.h>
-
-#include <rte_log.h>
-#include <rte_pci.h>
-#include <rte_memory.h>
-#include <rte_eal.h>
-#include <rte_eal_memconfig.h>
-#include <rte_string_fns.h>
-#include <rte_errno.h>
-#include <rte_ring.h>
-#include <rte_malloc.h>
-#include <rte_common.h>
-#include <rte_ivshmem.h>
-
-#include "eal_internal_cfg.h"
-#include "eal_private.h"
-
-#define PCI_VENDOR_ID_IVSHMEM 0x1Af4
-#define PCI_DEVICE_ID_IVSHMEM 0x1110
-
-#define IVSHMEM_MAGIC 0x0BADC0DE
-
-#define IVSHMEM_RESOURCE_PATH "/sys/bus/pci/devices/%04x:%02x:%02x.%x/resource2"
-#define IVSHMEM_CONFIG_PATH "/var/run/.%s_ivshmem_config"
-
-#define PHYS 0x1
-#define VIRT 0x2
-#define IOREMAP 0x4
-#define FULL (PHYS|VIRT|IOREMAP)
-
-#define METADATA_SIZE_ALIGNED \
-	(RTE_ALIGN_CEIL(sizeof(struct rte_ivshmem_metadata),pagesz))
-
-#define CONTAINS(x,y)\
-	(((y).addr_64 >= (x).addr_64) && ((y).addr_64 < (x).addr_64 + (x).len))
-
-#define DIM(x) (sizeof(x)/sizeof(x[0]))
-
-struct ivshmem_pci_device {
-	char path[PATH_MAX];
-	phys_addr_t ioremap_addr;
-};
-
-/* data type to store in config */
-struct ivshmem_segment {
-	struct rte_ivshmem_metadata_entry entry;
-	uint64_t align;
-	char path[PATH_MAX];
-};
-struct ivshmem_shared_config {
-	struct ivshmem_segment segment[RTE_MAX_MEMSEG];
-	uint32_t segment_idx;
-	struct ivshmem_pci_device pci_devs[RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS];
-	uint32_t pci_devs_idx;
-};
-static struct ivshmem_shared_config * ivshmem_config;
-static int memseg_idx;
-static int pagesz;
-
-/* Tailq heads to add rings to */
-TAILQ_HEAD(rte_ring_list, rte_tailq_entry);
-
-/*
- * Utility functions
- */
-
-static int
-is_ivshmem_device(struct rte_pci_device * dev)
-{
-	return dev->id.vendor_id == PCI_VENDOR_ID_IVSHMEM
-			&& dev->id.device_id == PCI_DEVICE_ID_IVSHMEM;
-}
-
-static void *
-map_metadata(int fd, uint64_t len)
-{
-	size_t metadata_len = sizeof(struct rte_ivshmem_metadata);
-	size_t aligned_len = METADATA_SIZE_ALIGNED;
-
-	return mmap(NULL, metadata_len, PROT_READ | PROT_WRITE,
-			MAP_SHARED, fd, len - aligned_len);
-}
-
-static void
-unmap_metadata(void * ptr)
-{
-	munmap(ptr, sizeof(struct rte_ivshmem_metadata));
-}
-
-static int
-has_ivshmem_metadata(int fd, uint64_t len)
-{
-	struct rte_ivshmem_metadata metadata;
-	void * ptr;
-
-	ptr = map_metadata(fd, len);
-
-	if (ptr == MAP_FAILED)
-		return -1;
-
-	metadata = *(struct rte_ivshmem_metadata*) (ptr);
-
-	unmap_metadata(ptr);
-
-	return metadata.magic_number == IVSHMEM_MAGIC;
-}
-
-static void
-remove_segment(struct ivshmem_segment * ms, int len, int idx)
-{
-	int i;
-
-	for (i = idx; i < len - 1; i++)
-		memcpy(&ms[i], &ms[i+1], sizeof(struct ivshmem_segment));
-	memset(&ms[len-1], 0, sizeof(struct ivshmem_segment));
-}
-
-static int
-overlap(const struct rte_memzone * mz1, const struct rte_memzone * mz2)
-{
-	uint64_t start1, end1, start2, end2;
-	uint64_t p_start1, p_end1, p_start2, p_end2;
-	uint64_t i_start1, i_end1, i_start2, i_end2;
-	int result = 0;
-
-	/* gather virtual addresses */
-	start1 = mz1->addr_64;
-	end1 = mz1->addr_64 + mz1->len;
-	start2 = mz2->addr_64;
-	end2 = mz2->addr_64 + mz2->len;
-
-	/* gather physical addresses */
-	p_start1 = mz1->phys_addr;
-	p_end1 = mz1->phys_addr + mz1->len;
-	p_start2 = mz2->phys_addr;
-	p_end2 = mz2->phys_addr + mz2->len;
-
-	/* gather ioremap addresses */
-	i_start1 = mz1->ioremap_addr;
-	i_end1 = mz1->ioremap_addr + mz1->len;
-	i_start2 = mz2->ioremap_addr;
-	i_end2 = mz2->ioremap_addr + mz2->len;
-
-	/* check for overlap in virtual addresses */
-	if (start1 >= start2 && start1 < end2)
-		result |= VIRT;
-	if (start2 >= start1 && start2 < end1)
-		result |= VIRT;
-
-	/* check for overlap in physical addresses */
-	if (p_start1 >= p_start2 && p_start1 < p_end2)
-		result |= PHYS;
-	if (p_start2 >= p_start1 && p_start2 < p_end1)
-		result |= PHYS;
-
-	/* check for overlap in ioremap addresses */
-	if (i_start1 >= i_start2 && i_start1 < i_end2)
-		result |= IOREMAP;
-	if (i_start2 >= i_start1 && i_start2 < i_end1)
-		result |= IOREMAP;
-
-	return result;
-}
-
-static int
-adjacent(const struct rte_memzone * mz1, const struct rte_memzone * mz2)
-{
-	uint64_t start1, end1, start2, end2;
-	uint64_t p_start1, p_end1, p_start2, p_end2;
-	uint64_t i_start1, i_end1, i_start2, i_end2;
-	int result = 0;
-
-	/* gather virtual addresses */
-	start1 = mz1->addr_64;
-	end1 = mz1->addr_64 + mz1->len;
-	start2 = mz2->addr_64;
-	end2 = mz2->addr_64 + mz2->len;
-
-	/* gather physical addresses */
-	p_start1 = mz1->phys_addr;
-	p_end1 = mz1->phys_addr + mz1->len;
-	p_start2 = mz2->phys_addr;
-	p_end2 = mz2->phys_addr + mz2->len;
-
-	/* gather ioremap addresses */
-	i_start1 = mz1->ioremap_addr;
-	i_end1 = mz1->ioremap_addr + mz1->len;
-	i_start2 = mz2->ioremap_addr;
-	i_end2 = mz2->ioremap_addr + mz2->len;
-
-	/* check if segments are virtually adjacent */
-	if (start1 == end2)
-		result |= VIRT;
-	if (start2 == end1)
-		result |= VIRT;
-
-	/* check if segments are physically adjacent */
-	if (p_start1 == p_end2)
-		result |= PHYS;
-	if (p_start2 == p_end1)
-		result |= PHYS;
-
-	/* check if segments are ioremap-adjacent */
-	if (i_start1 == i_end2)
-		result |= IOREMAP;
-	if (i_start2 == i_end1)
-		result |= IOREMAP;
-
-	return result;
-}
-
-static int
-has_adjacent_segments(struct ivshmem_segment * ms, int len)
-{
-	int i, j;
-
-	for (i = 0; i < len; i++)
-		for (j = i + 1; j < len; j++) {
-			/* we're only interested in fully adjacent segments; partially
-			 * adjacent segments can coexist.
-			 */
-			if (adjacent(&ms[i].entry.mz, &ms[j].entry.mz) == FULL)
-				return 1;
-		}
-	return 0;
-}
-
-static int
-has_overlapping_segments(struct ivshmem_segment * ms, int len)
-{
-	int i, j;
-
-	for (i = 0; i < len; i++)
-		for (j = i + 1; j < len; j++)
-			if (overlap(&ms[i].entry.mz, &ms[j].entry.mz))
-				return 1;
-	return 0;
-}
-
-static int
-seg_compare(const void * a, const void * b)
-{
-	const struct ivshmem_segment * s1 = (const struct ivshmem_segment*) a;
-	const struct ivshmem_segment * s2 = (const struct ivshmem_segment*) b;
-
-	/* move unallocated zones to the end */
-	if (s1->entry.mz.addr == NULL && s2->entry.mz.addr == NULL)
-		return 0;
-	if (s1->entry.mz.addr == 0)
-		return 1;
-	if (s2->entry.mz.addr == 0)
-		return -1;
-
-	return s1->entry.mz.phys_addr > s2->entry.mz.phys_addr;
-}
-
-#ifdef RTE_LIBRTE_IVSHMEM_DEBUG
-static void
-entry_dump(struct rte_ivshmem_metadata_entry *e)
-{
-	RTE_LOG(DEBUG, EAL, "\tvirt: %p-%p\n", e->mz.addr,
-			RTE_PTR_ADD(e->mz.addr, e->mz.len));
-	RTE_LOG(DEBUG, EAL, "\tphys: 0x%" PRIx64 "-0x%" PRIx64 "\n",
-			e->mz.phys_addr,
-			e->mz.phys_addr + e->mz.len);
-	RTE_LOG(DEBUG, EAL, "\tio: 0x%" PRIx64 "-0x%" PRIx64 "\n",
-			e->mz.ioremap_addr,
-			e->mz.ioremap_addr + e->mz.len);
-	RTE_LOG(DEBUG, EAL, "\tlen: 0x%" PRIx64 "\n", e->mz.len);
-	RTE_LOG(DEBUG, EAL, "\toff: 0x%" PRIx64 "\n", e->offset);
-}
-#endif
-
-
-
-/*
- * Actual useful code
- */
-
-/* read through metadata mapped from the IVSHMEM device */
-static int
-read_metadata(char * path, int path_len, int fd, uint64_t flen)
-{
-	struct rte_ivshmem_metadata metadata;
-	struct rte_ivshmem_metadata_entry * entry;
-	int idx, i;
-	void * ptr;
-
-	ptr = map_metadata(fd, flen);
-
-	if (ptr == MAP_FAILED)
-		return -1;
-
-	metadata = *(struct rte_ivshmem_metadata*) (ptr);
-
-	unmap_metadata(ptr);
-
-	RTE_LOG(DEBUG, EAL, "Parsing metadata for \"%s\"\n", metadata.name);
-
-	idx = ivshmem_config->segment_idx;
-
-	for (i = 0; i < RTE_LIBRTE_IVSHMEM_MAX_ENTRIES &&
-		idx <= RTE_MAX_MEMSEG; i++) {
-
-		if (idx == RTE_MAX_MEMSEG) {
-			RTE_LOG(ERR, EAL, "Not enough memory segments!\n");
-			return -1;
-		}
-
-		entry = &metadata.entry[i];
-
-		/* stop on uninitialized memzone */
-		if (entry->mz.len == 0)
-			break;
-
-		/* copy metadata entry */
-		memcpy(&ivshmem_config->segment[idx].entry, entry,
-				sizeof(struct rte_ivshmem_metadata_entry));
-
-		/* copy path */
-		snprintf(ivshmem_config->segment[idx].path, path_len, "%s", path);
-
-		idx++;
-	}
-	ivshmem_config->segment_idx = idx;
-
-	return 0;
-}
-
-/* check through each segment and look for adjacent or overlapping ones. */
-static int
-cleanup_segments(struct ivshmem_segment * ms, int tbl_len)
-{
-	struct ivshmem_segment * s, * tmp;
-	int i, j, concat, seg_adjacent, seg_overlapping;
-	uint64_t start1, start2, end1, end2, p_start1, p_start2, i_start1, i_start2;
-
-	qsort(ms, tbl_len, sizeof(struct ivshmem_segment),
-				seg_compare);
-
-	while (has_overlapping_segments(ms, tbl_len) ||
-			has_adjacent_segments(ms, tbl_len)) {
-
-		for (i = 0; i < tbl_len; i++) {
-			s = &ms[i];
-
-			concat = 0;
-
-			for (j = i + 1; j < tbl_len; j++) {
-				tmp = &ms[j];
-
-				/* check if this segment is overlapping with existing segment,
-				 * or is adjacent to existing segment */
-				seg_overlapping = overlap(&s->entry.mz, &tmp->entry.mz);
-				seg_adjacent = adjacent(&s->entry.mz, &tmp->entry.mz);
-
-				/* check if segments fully overlap or are fully adjacent */
-				if ((seg_adjacent == FULL) || (seg_overlapping == FULL)) {
-
-#ifdef RTE_LIBRTE_IVSHMEM_DEBUG
-					RTE_LOG(DEBUG, EAL, "Concatenating segments\n");
-					RTE_LOG(DEBUG, EAL, "Segment %i:\n", i);
-					entry_dump(&s->entry);
-					RTE_LOG(DEBUG, EAL, "Segment %i:\n", j);
-					entry_dump(&tmp->entry);
-#endif
-
-					start1 = s->entry.mz.addr_64;
-					start2 = tmp->entry.mz.addr_64;
-					p_start1 = s->entry.mz.phys_addr;
-					p_start2 = tmp->entry.mz.phys_addr;
-					i_start1 = s->entry.mz.ioremap_addr;
-					i_start2 = tmp->entry.mz.ioremap_addr;
-					end1 = s->entry.mz.addr_64 + s->entry.mz.len;
-					end2 = tmp->entry.mz.addr_64 + tmp->entry.mz.len;
-
-					/* settle for minimum start address and maximum length */
-					s->entry.mz.addr_64 = RTE_MIN(start1, start2);
-					s->entry.mz.phys_addr = RTE_MIN(p_start1, p_start2);
-					s->entry.mz.ioremap_addr = RTE_MIN(i_start1, i_start2);
-					s->entry.offset = RTE_MIN(s->entry.offset, tmp->entry.offset);
-					s->entry.mz.len = RTE_MAX(end1, end2) - s->entry.mz.addr_64;
-					concat = 1;
-
-#ifdef RTE_LIBRTE_IVSHMEM_DEBUG
-					RTE_LOG(DEBUG, EAL, "Resulting segment:\n");
-					entry_dump(&s->entry);
-
-#endif
-				}
-				/* if segments not fully overlap, we have an error condition.
-				 * adjacent segments can coexist.
-				 */
-				else if (seg_overlapping > 0) {
-					RTE_LOG(ERR, EAL, "Segments %i and %i overlap!\n", i, j);
-#ifdef RTE_LIBRTE_IVSHMEM_DEBUG
-					RTE_LOG(DEBUG, EAL, "Segment %i:\n", i);
-					entry_dump(&s->entry);
-					RTE_LOG(DEBUG, EAL, "Segment %i:\n", j);
-					entry_dump(&tmp->entry);
-#endif
-					return -1;
-				}
-				if (concat)
-					break;
-			}
-			/* if we concatenated, remove segment at j */
-			if (concat) {
-				remove_segment(ms, tbl_len, j);
-				tbl_len--;
-				break;
-			}
-		}
-	}
-
-	return tbl_len;
-}
-
-static int
-create_shared_config(void)
-{
-	char path[PATH_MAX];
-	int fd;
-
-	/* build ivshmem config file path */
-	snprintf(path, sizeof(path), IVSHMEM_CONFIG_PATH,
-			internal_config.hugefile_prefix);
-
-	fd = open(path, O_CREAT | O_RDWR, 0600);
-
-	if (fd < 0) {
-		RTE_LOG(ERR, EAL, "Could not open %s: %s\n", path, strerror(errno));
-		return -1;
-	}
-
-	/* try ex-locking first - if the file is locked, we have a problem */
-	if (flock(fd, LOCK_EX | LOCK_NB) == -1) {
-		RTE_LOG(ERR, EAL, "Locking %s failed: %s\n", path, strerror(errno));
-		close(fd);
-		return -1;
-	}
-
-	if (ftruncate(fd, sizeof(struct ivshmem_shared_config)) < 0) {
-		RTE_LOG(ERR, EAL, "ftruncate failed: %s\n", strerror(errno));
-		return -1;
-	}
-
-	ivshmem_config = mmap(NULL, sizeof(struct ivshmem_shared_config),
-			PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
-
-	if (ivshmem_config == MAP_FAILED)
-		return -1;
-
-	memset(ivshmem_config, 0, sizeof(struct ivshmem_shared_config));
-
-	/* change the exclusive lock we got earlier to a shared lock */
-	if (flock(fd, LOCK_SH | LOCK_NB) == -1) {
-		RTE_LOG(ERR, EAL, "Locking %s failed: %s \n", path, strerror(errno));
-		return -1;
-	}
-
-	close(fd);
-
-	return 0;
-}
-
-/* open shared config file and, if present, map the config.
- * having no config file is not an error condition, as we later check if
- * ivshmem_config is NULL (if it is, that means nothing was mapped). */
-static int
-open_shared_config(void)
-{
-	char path[PATH_MAX];
-	int fd;
-
-	/* build ivshmem config file path */
-	snprintf(path, sizeof(path), IVSHMEM_CONFIG_PATH,
-			internal_config.hugefile_prefix);
-
-	fd = open(path, O_RDONLY);
-
-	/* if the file doesn't exist, just return success */
-	if (fd < 0 && errno == ENOENT)
-		return 0;
-	/* else we have an error condition */
-	else if (fd < 0) {
-		RTE_LOG(ERR, EAL, "Could not open %s: %s\n",
-				path, strerror(errno));
-		return -1;
-	}
-
-	/* try ex-locking first - if the lock *does* succeed, this means it's a
-	 * stray config file, so it should be deleted.
-	 */
-	if (flock(fd, LOCK_EX | LOCK_NB) != -1) {
-
-		/* if we can't remove the file, something is wrong */
-		if (unlink(path) < 0) {
-			RTE_LOG(ERR, EAL, "Could not remove %s: %s\n", path,
-					strerror(errno));
-			return -1;
-		}
-
-		/* release the lock */
-		flock(fd, LOCK_UN);
-		close(fd);
-
-		/* return success as having a stray config file is equivalent to not
-		 * having config file at all.
-		 */
-		return 0;
-	}
-
-	ivshmem_config = mmap(NULL, sizeof(struct ivshmem_shared_config),
-			PROT_READ, MAP_SHARED, fd, 0);
-
-	if (ivshmem_config == MAP_FAILED)
-		return -1;
-
-	/* place a shared lock on config file */
-	if (flock(fd, LOCK_SH | LOCK_NB) == -1) {
-		RTE_LOG(ERR, EAL, "Locking %s failed: %s \n", path, strerror(errno));
-		return -1;
-	}
-
-	close(fd);
-
-	return 0;
-}
-
-/*
- * This function does the following:
- *
- * 1) Builds a table of ivshmem_segments with proper offset alignment
- * 2) Cleans up that table so that we don't have any overlapping or adjacent
- *    memory segments
- * 3) Creates memsegs from this table and maps them into memory.
- */
-static inline int
-map_all_segments(void)
-{
-	struct ivshmem_segment ms_tbl[RTE_MAX_MEMSEG];
-	struct ivshmem_pci_device * pci_dev;
-	struct rte_mem_config * mcfg;
-	struct ivshmem_segment * seg;
-	int fd, fd_zero;
-	unsigned i, j;
-	struct rte_memzone mz;
-	struct rte_memseg ms;
-	void * base_addr;
-	uint64_t align, len;
-	phys_addr_t ioremap_addr;
-
-	ioremap_addr = 0;
-
-	memset(ms_tbl, 0, sizeof(ms_tbl));
-	memset(&mz, 0, sizeof(struct rte_memzone));
-	memset(&ms, 0, sizeof(struct rte_memseg));
-
-	/* first, build a table of memsegs to map, to avoid failed mmaps due to
-	 * overlaps
-	 */
-	for (i = 0; i < ivshmem_config->segment_idx && i <= RTE_MAX_MEMSEG; i++) {
-		if (i == RTE_MAX_MEMSEG) {
-			RTE_LOG(ERR, EAL, "Too many segments requested!\n");
-			return -1;
-		}
-
-		seg = &ivshmem_config->segment[i];
-
-		/* copy segment to table */
-		memcpy(&ms_tbl[i], seg, sizeof(struct ivshmem_segment));
-
-		/* find ioremap addr */
-		for (j = 0; j < DIM(ivshmem_config->pci_devs); j++) {
-			pci_dev = &ivshmem_config->pci_devs[j];
-			if (!strncmp(pci_dev->path, seg->path, sizeof(pci_dev->path))) {
-				ioremap_addr = pci_dev->ioremap_addr;
-				break;
-			}
-		}
-		if (ioremap_addr == 0) {
-			RTE_LOG(ERR, EAL, "Cannot find ioremap addr!\n");
-			return -1;
-		}
-
-		/* work out alignments */
-		align = seg->entry.mz.addr_64 -
-				RTE_ALIGN_FLOOR(seg->entry.mz.addr_64, 0x1000);
-		len = RTE_ALIGN_CEIL(seg->entry.mz.len + align, 0x1000);
-
-		/* save original alignments */
-		ms_tbl[i].align = align;
-
-		/* create a memory zone */
-		mz.addr_64 = seg->entry.mz.addr_64 - align;
-		mz.len = len;
-		mz.hugepage_sz = seg->entry.mz.hugepage_sz;
-		mz.phys_addr = seg->entry.mz.phys_addr - align;
-
-		/* find true physical address */
-		mz.ioremap_addr = ioremap_addr + seg->entry.offset - align;
-
-		ms_tbl[i].entry.offset = seg->entry.offset - align;
-
-		memcpy(&ms_tbl[i].entry.mz, &mz, sizeof(struct rte_memzone));
-	}
-
-	/* clean up the segments */
-	memseg_idx = cleanup_segments(ms_tbl, ivshmem_config->segment_idx);
-
-	if (memseg_idx < 0)
-		return -1;
-
-	mcfg = rte_eal_get_configuration()->mem_config;
-
-	fd_zero = open("/dev/zero", O_RDWR);
-
-	if (fd_zero < 0) {
-		RTE_LOG(ERR, EAL, "Cannot open /dev/zero: %s\n", strerror(errno));
-		return -1;
-	}
-
-	/* create memsegs and put them into DPDK memory */
-	for (i = 0; i < (unsigned) memseg_idx; i++) {
-
-		seg = &ms_tbl[i];
-
-		ms.addr_64 = seg->entry.mz.addr_64;
-		ms.hugepage_sz = seg->entry.mz.hugepage_sz;
-		ms.len = seg->entry.mz.len;
-		ms.nchannel = rte_memory_get_nchannel();
-		ms.nrank = rte_memory_get_nrank();
-		ms.phys_addr = seg->entry.mz.phys_addr;
-		ms.ioremap_addr = seg->entry.mz.ioremap_addr;
-		ms.socket_id = seg->entry.mz.socket_id;
-
-		base_addr = mmap(ms.addr, ms.len,
-				PROT_READ | PROT_WRITE, MAP_PRIVATE, fd_zero, 0);
-
-		if (base_addr == MAP_FAILED || base_addr != ms.addr) {
-			RTE_LOG(ERR, EAL, "Cannot map /dev/zero!\n");
-			return -1;
-		}
-
-		fd = open(seg->path, O_RDWR);
-
-		if (fd < 0) {
-			RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", seg->path,
-					strerror(errno));
-			return -1;
-		}
-
-		munmap(ms.addr, ms.len);
-
-		base_addr = mmap(ms.addr, ms.len,
-				PROT_READ | PROT_WRITE, MAP_SHARED, fd,
-				seg->entry.offset);
-
-
-		if (base_addr == MAP_FAILED || base_addr != ms.addr) {
-			RTE_LOG(ERR, EAL, "Cannot map segment into memory: "
-					"expected %p got %p (%s)\n", ms.addr, base_addr,
-					strerror(errno));
-			return -1;
-		}
-
-		RTE_LOG(DEBUG, EAL, "Memory segment mapped: %p (len %" PRIx64 ") at "
-				"offset 0x%" PRIx64 "\n",
-				ms.addr, ms.len, seg->entry.offset);
-
-		/* put the pointers back into their real positions using original
-		 * alignment */
-		ms.addr_64 += seg->align;
-		ms.phys_addr += seg->align;
-		ms.ioremap_addr += seg->align;
-		ms.len -= seg->align;
-
-		/* at this point, the rest of DPDK memory is not initialized, so we
-		 * expect memsegs to be empty */
-		memcpy(&mcfg->memseg[i], &ms,
-				sizeof(struct rte_memseg));
-
-		close(fd);
-
-		RTE_LOG(DEBUG, EAL, "IVSHMEM segment found, size: 0x%lx\n",
-				ms.len);
-	}
-
-	return 0;
-}
-
-/* this happens at a later stage, after general EAL memory initialization */
-int
-rte_eal_ivshmem_obj_init(void)
-{
-	struct rte_ring_list* ring_list = NULL;
-	struct rte_mem_config * mcfg;
-	struct ivshmem_segment * seg;
-	struct rte_memzone * mz;
-	struct rte_ring * r;
-	struct rte_tailq_entry *te;
-	unsigned i, ms, idx;
-	uint64_t offset;
-
-	/* secondary process would not need any object discovery - it'll all
-	 * already be in shared config */
-	if (rte_eal_process_type() != RTE_PROC_PRIMARY || ivshmem_config == NULL)
-		return 0;
-
-	/* check that we have an initialised ring tail queue */
-	ring_list = RTE_TAILQ_LOOKUP(RTE_TAILQ_RING_NAME, rte_ring_list);
-	if (ring_list == NULL) {
-		RTE_LOG(ERR, EAL, "No rte_ring tailq found!\n");
-		return -1;
-	}
-
-	mcfg = rte_eal_get_configuration()->mem_config;
-
-	/* create memzones */
-	for (i = 0; i < ivshmem_config->segment_idx && i <= RTE_MAX_MEMZONE; i++) {
-
-		seg = &ivshmem_config->segment[i];
-
-		/* add memzone */
-		if (mcfg->memzone_cnt == RTE_MAX_MEMZONE) {
-			RTE_LOG(ERR, EAL, "No more memory zones available!\n");
-			return -1;
-		}
-
-		idx = mcfg->memzone_cnt;
-
-		RTE_LOG(DEBUG, EAL, "Found memzone: '%s' at %p (len 0x%" PRIx64 ")\n",
-				seg->entry.mz.name, seg->entry.mz.addr, seg->entry.mz.len);
-
-		memcpy(&mcfg->memzone[idx], &seg->entry.mz,
-				sizeof(struct rte_memzone));
-
-		/* find ioremap address */
-		for (ms = 0; ms <= RTE_MAX_MEMSEG; ms++) {
-			if (ms == RTE_MAX_MEMSEG) {
-				RTE_LOG(ERR, EAL, "Physical address of segment not found!\n");
-				return -1;
-			}
-			if (CONTAINS(mcfg->memseg[ms], mcfg->memzone[idx])) {
-				offset = mcfg->memzone[idx].addr_64 -
-								mcfg->memseg[ms].addr_64;
-				mcfg->memzone[idx].ioremap_addr = mcfg->memseg[ms].ioremap_addr +
-						offset;
-				break;
-			}
-		}
-
-		mcfg->memzone_cnt++;
-	}
-
-	rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
-
-	/* find rings */
-	for (i = 0; i < mcfg->memzone_cnt; i++) {
-		mz = &mcfg->memzone[i];
-
-		/* check if memzone has a ring prefix */
-		if (strncmp(mz->name, RTE_RING_MZ_PREFIX,
-				sizeof(RTE_RING_MZ_PREFIX) - 1) != 0)
-			continue;
-
-		r = (struct rte_ring*) (mz->addr_64);
-
-		te = rte_zmalloc("RING_TAILQ_ENTRY", sizeof(*te), 0);
-		if (te == NULL) {
-			RTE_LOG(ERR, EAL, "Cannot allocate ring tailq entry!\n");
-			return -1;
-		}
-
-		te->data = (void *) r;
-
-		TAILQ_INSERT_TAIL(ring_list, te, next);
-
-		RTE_LOG(DEBUG, EAL, "Found ring: '%s' at %p\n", r->name, mz->addr);
-	}
-	rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
-
-#ifdef RTE_LIBRTE_IVSHMEM_DEBUG
-	rte_memzone_dump(stdout);
-	rte_ring_list_dump(stdout);
-#endif
-
-	return 0;
-}
-
-/* initialize ivshmem structures */
-int rte_eal_ivshmem_init(void)
-{
-	struct rte_pci_device * dev;
-	struct rte_pci_resource * res;
-	int fd, ret;
-	char path[PATH_MAX];
-
-	/* initialize everything to 0 */
-	memset(path, 0, sizeof(path));
-	ivshmem_config = NULL;
-
-	pagesz = getpagesize();
-
-	RTE_LOG(DEBUG, EAL, "Searching for IVSHMEM devices...\n");
-
-	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
-
-		if (open_shared_config() < 0) {
-			RTE_LOG(ERR, EAL, "Could not open IVSHMEM config!\n");
-			return -1;
-		}
-	}
-	else {
-
-		TAILQ_FOREACH(dev, &pci_device_list, next) {
-
-			if (is_ivshmem_device(dev)) {
-
-				/* IVSHMEM memory is always on BAR2 */
-				res = &dev->mem_resource[2];
-
-				/* if we don't have a BAR2 */
-				if (res->len == 0)
-					continue;
-
-				/* construct pci device path */
-				snprintf(path, sizeof(path), IVSHMEM_RESOURCE_PATH,
-						dev->addr.domain, dev->addr.bus, dev->addr.devid,
-						dev->addr.function);
-
-				/* try to find memseg */
-				fd = open(path, O_RDWR);
-				if (fd < 0) {
-					RTE_LOG(ERR, EAL, "Could not open %s\n", path);
-					return -1;
-				}
-
-				/* check if it's a DPDK IVSHMEM device */
-				ret = has_ivshmem_metadata(fd, res->len);
-
-				/* is DPDK device */
-				if (ret == 1) {
-
-					/* config file creation is deferred until the first
-					 * DPDK device is found. then, it has to be created
-					 * only once. */
-					if (ivshmem_config == NULL &&
-							create_shared_config() < 0) {
-						RTE_LOG(ERR, EAL, "Could not create IVSHMEM config!\n");
-						close(fd);
-						return -1;
-					}
-
-					if (read_metadata(path, sizeof(path), fd, res->len) < 0) {
-						RTE_LOG(ERR, EAL, "Could not read metadata from"
-								" device %02x:%02x.%x!\n", dev->addr.bus,
-								dev->addr.devid, dev->addr.function);
-						close(fd);
-						return -1;
-					}
-
-					if (ivshmem_config->pci_devs_idx == RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS) {
-						RTE_LOG(WARNING, EAL,
-								"IVSHMEM PCI device limit exceeded. Increase "
-								"CONFIG_RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS  in "
-								"your config file.\n");
-						break;
-					}
-
-					RTE_LOG(INFO, EAL, "Found IVSHMEM device %02x:%02x.%x\n",
-							dev->addr.bus, dev->addr.devid, dev->addr.function);
-
-					ivshmem_config->pci_devs[ivshmem_config->pci_devs_idx].ioremap_addr = res->phys_addr;
-					snprintf(ivshmem_config->pci_devs[ivshmem_config->pci_devs_idx].path,
-							sizeof(ivshmem_config->pci_devs[ivshmem_config->pci_devs_idx].path),
-							"%s", path);
-
-					ivshmem_config->pci_devs_idx++;
-				}
-				/* failed to read */
-				else if (ret < 0) {
-					RTE_LOG(ERR, EAL, "Could not read IVSHMEM device: %s\n",
-							strerror(errno));
-					close(fd);
-					return -1;
-				}
-				/* not a DPDK device */
-				else
-					RTE_LOG(DEBUG, EAL, "Skipping non-DPDK IVSHMEM device\n");
-
-				/* close the BAR fd */
-				close(fd);
-			}
-		}
-	}
-
-	/* ivshmem_config is not NULL only if config was created and/or mapped */
-	if (ivshmem_config) {
-		if (map_all_segments() < 0) {
-			RTE_LOG(ERR, EAL, "Mapping IVSHMEM segments failed!\n");
-			return -1;
-		}
-	}
-	else {
-		RTE_LOG(DEBUG, EAL, "No IVSHMEM configuration found! \n");
-	}
-
-	return 0;
-}
-
-#endif
diff --git a/lib/librte_eal/linuxapp/eal/eal_log.c b/lib/librte_eal/linuxapp/eal/eal_log.c
index d3911004..e3a50aa3 100644
--- a/lib/librte_eal/linuxapp/eal/eal_log.c
+++ b/lib/librte_eal/linuxapp/eal/eal_log.c
@@ -97,45 +97,7 @@ rte_eal_log_init(const char *id, int facility)
 
 	openlog(id, LOG_NDELAY | LOG_PID, facility);
 
-	if (rte_eal_common_log_init(log_stream) < 0)
-		return -1;
-
-	return 0;
-}
-
-/* early logs */
-
-/*
- * early log function, used before rte_eal_log_init
- */
-static ssize_t
-early_log_write(__attribute__((unused)) void *c, const char *buf, size_t size)
-{
-	ssize_t ret;
-	ret = fwrite(buf, size, 1, stdout);
-	fflush(stdout);
-	if (ret == 0)
-		return -1;
-	return ret;
-}
-
-static cookie_io_functions_t early_log_func = {
-	.write = early_log_write,
-};
-static FILE *early_log_stream;
+	eal_log_set_default(log_stream);
 
-/*
- * init the log library, called by rte_eal_init() to enable early
- * logs
- */
-int
-rte_eal_log_early_init(void)
-{
-	early_log_stream = fopencookie(NULL, "w+", early_log_func);
-	if (early_log_stream == NULL) {
-		printf("Cannot configure early_log_stream\n");
-		return -1;
-	}
-	rte_openlog_stream(early_log_stream);
 	return 0;
 }
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
index c04cff0c..a956bb22 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -376,25 +376,15 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl,
 	void *vma_addr = NULL;
 	size_t vma_len = 0;
 
-#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
-	RTE_SET_USED(vma_len);
-#endif
-
 	for (i = 0; i < hpi->num_pages[0]; i++) {
 		uint64_t hugepage_sz = hpi->hugepage_sz;
 
 		if (orig) {
 			hugepg_tbl[i].file_id = i;
 			hugepg_tbl[i].size = hugepage_sz;
-#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
-			eal_get_hugefile_temp_path(hugepg_tbl[i].filepath,
-					sizeof(hugepg_tbl[i].filepath), hpi->hugedir,
-					hugepg_tbl[i].file_id);
-#else
 			eal_get_hugefile_path(hugepg_tbl[i].filepath,
 					sizeof(hugepg_tbl[i].filepath), hpi->hugedir,
 					hugepg_tbl[i].file_id);
-#endif
 			hugepg_tbl[i].filepath[sizeof(hugepg_tbl[i].filepath) - 1] = '\0';
 		}
 #ifndef RTE_ARCH_64
@@ -408,8 +398,6 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl,
 			continue;
 		}
 #endif
-
-#ifndef RTE_EAL_SINGLE_FILE_SEGMENTS
 		else if (vma_len == 0) {
 			unsigned j, num_pages;
 
@@ -439,10 +427,9 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl,
 			if (vma_addr == NULL)
 				vma_len = hugepage_sz;
 		}
-#endif
 
 		/* try to create hugepage file */
-		fd = open(hugepg_tbl[i].filepath, O_CREAT | O_RDWR, 0755);
+		fd = open(hugepg_tbl[i].filepath, O_CREAT | O_RDWR, 0600);
 		if (fd < 0) {
 			RTE_LOG(DEBUG, EAL, "%s(): open failed: %s\n", __func__,
 					strerror(errno));
@@ -505,169 +492,6 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl,
 	return i;
 }
 
-#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
-
-/*
- * Remaps all hugepages into single file segments
- */
-static int
-remap_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
-{
-	int fd;
-	unsigned i = 0, j, num_pages, page_idx = 0;
-	void *vma_addr = NULL, *old_addr = NULL, *page_addr = NULL;
-	size_t vma_len = 0;
-	size_t hugepage_sz = hpi->hugepage_sz;
-	size_t total_size, offset;
-	char filepath[MAX_HUGEPAGE_PATH];
-	phys_addr_t physaddr;
-	int socket;
-
-	while (i < hpi->num_pages[0]) {
-
-#ifndef RTE_ARCH_64
-		/* for 32-bit systems, don't remap 1G pages and 16G pages,
-		 * just reuse original map address as final map address.
-		 */
-		if ((hugepage_sz == RTE_PGSIZE_1G)
-			|| (hugepage_sz == RTE_PGSIZE_16G)) {
-			hugepg_tbl[i].final_va = hugepg_tbl[i].orig_va;
-			hugepg_tbl[i].orig_va = NULL;
-			i++;
-			continue;
-		}
-#endif
-
-		/* reserve a virtual area for next contiguous
-		 * physical block: count the number of
-		 * contiguous physical pages. */
-		for (j = i+1; j < hpi->num_pages[0] ; j++) {
-#ifdef RTE_ARCH_PPC_64
-			/* The physical addresses are sorted in descending
-			 * order on PPC64 */
-			if (hugepg_tbl[j].physaddr !=
-				hugepg_tbl[j-1].physaddr - hugepage_sz)
-				break;
-#else
-			if (hugepg_tbl[j].physaddr !=
-				hugepg_tbl[j-1].physaddr + hugepage_sz)
-				break;
-#endif
-		}
-		num_pages = j - i;
-		vma_len = num_pages * hugepage_sz;
-
-		socket = hugepg_tbl[i].socket_id;
-
-		/* get the biggest virtual memory area up to
-		 * vma_len. If it fails, vma_addr is NULL, so
-		 * let the kernel provide the address. */
-		vma_addr = get_virtual_area(&vma_len, hpi->hugepage_sz);
-
-		/* If we can't find a big enough virtual area, work out how many pages
-		 * we are going to get */
-		if (vma_addr == NULL)
-			j = i + 1;
-		else if (vma_len != num_pages * hugepage_sz) {
-			num_pages = vma_len / hugepage_sz;
-			j = i + num_pages;
-
-		}
-
-		hugepg_tbl[page_idx].file_id = page_idx;
-		eal_get_hugefile_path(filepath,
-				sizeof(filepath),
-				hpi->hugedir,
-				hugepg_tbl[page_idx].file_id);
-
-		/* try to create hugepage file */
-		fd = open(filepath, O_CREAT | O_RDWR, 0755);
-		if (fd < 0) {
-			RTE_LOG(ERR, EAL, "%s(): open failed: %s\n", __func__, strerror(errno));
-			return -1;
-		}
-
-		total_size = 0;
-		for (;i < j; i++) {
-
-			/* unmap current segment */
-			if (total_size > 0)
-				munmap(vma_addr, total_size);
-
-			/* unmap original page */
-			munmap(hugepg_tbl[i].orig_va, hugepage_sz);
-			unlink(hugepg_tbl[i].filepath);
-
-			total_size += hugepage_sz;
-
-			old_addr = vma_addr;
-
-			/* map new, bigger segment, and populate page tables,
-			 * the kernel fills this segment with zeros */
-			vma_addr = mmap(vma_addr, total_size,
-					PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, 0);
-
-			if (vma_addr == MAP_FAILED || vma_addr != old_addr) {
-				RTE_LOG(ERR, EAL, "%s(): mmap failed: %s\n", __func__, strerror(errno));
-				close(fd);
-				return -1;
-			}
-		}
-
-		/* set shared flock on the file. */
-		if (flock(fd, LOCK_SH | LOCK_NB) == -1) {
-			RTE_LOG(ERR, EAL, "%s(): Locking file failed:%s \n",
-				__func__, strerror(errno));
-			close(fd);
-			return -1;
-		}
-
-		snprintf(hugepg_tbl[page_idx].filepath, MAX_HUGEPAGE_PATH, "%s",
-				filepath);
-
-		physaddr = rte_mem_virt2phy(vma_addr);
-
-		if (physaddr == RTE_BAD_PHYS_ADDR)
-			return -1;
-
-		hugepg_tbl[page_idx].final_va = vma_addr;
-
-		hugepg_tbl[page_idx].physaddr = physaddr;
-
-		hugepg_tbl[page_idx].repeated = num_pages;
-
-		hugepg_tbl[page_idx].socket_id = socket;
-
-		close(fd);
-
-		/* verify the memory segment - that is, check that every VA corresponds
-		 * to the physical address we expect to see
-		 */
-		for (offset = 0; offset < vma_len; offset += hugepage_sz) {
-			uint64_t expected_physaddr;
-
-			expected_physaddr = hugepg_tbl[page_idx].physaddr + offset;
-			page_addr = RTE_PTR_ADD(vma_addr, offset);
-			physaddr = rte_mem_virt2phy(page_addr);
-
-			if (physaddr != expected_physaddr) {
-				RTE_LOG(ERR, EAL, "Segment sanity check failed: wrong physaddr "
-						"at %p (offset 0x%" PRIx64 ": 0x%" PRIx64
-						" (expected 0x%" PRIx64 ")\n",
-						page_addr, offset, physaddr, expected_physaddr);
-				return -1;
-			}
-		}
-
-		page_idx++;
-	}
-
-	/* zero out the rest */
-	memset(&hugepg_tbl[page_idx], 0, (hpi->num_pages[0] - page_idx) * sizeof(struct hugepage_file));
-	return page_idx;
-}
-#else/* RTE_EAL_SINGLE_FILE_SEGMENTS=n */
-
 /* Unmap all hugepages from original mapping */
 static int
 unmap_all_hugepages_orig(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
@@ -681,7 +505,6 @@ unmap_all_hugepages_orig(struct hugepage_file *hugepg_tbl, struct hugepage_info
         }
         return 0;
 }
-#endif /* RTE_EAL_SINGLE_FILE_SEGMENTS */
 
 /*
  * Parse /proc/self/numa_maps to get the NUMA socket ID for each huge
@@ -875,12 +698,6 @@ unmap_unneeded_hugepages(struct hugepage_file *hugepg_tbl,
 			for (page = 0; page < nrpages; page++) {
 				struct hugepage_file *hp = &hugepg_tbl[page];
 
-#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
-				/* if this page was already cleared */
-				if (hp->final_va == NULL)
-					continue;
-#endif
-
 				/* find a page that matches the criteria */
 				if ((hp->size == hpi[size].hugepage_sz) &&
 						(hp->socket_id == (int) socket)) {
@@ -889,11 +706,7 @@ unmap_unneeded_hugepages(struct hugepage_file *hugepg_tbl,
 					if (pages_found == hpi[size].num_pages[socket]) {
 						uint64_t unmap_len;
 
-#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
-						unmap_len = hp->size * hp->repeated;
-#else
 						unmap_len = hp->size;
-#endif
 
 						/* get start addr and len of the remaining segment */
 						munmap(hp->final_va, (size_t) unmap_len);
@@ -904,50 +717,10 @@ unmap_unneeded_hugepages(struct hugepage_file *hugepg_tbl,
 									__func__, hp->filepath, strerror(errno));
 							return -1;
 						}
-					}
-#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
-					/* else, check how much do we need to map */
-					else {
-						int nr_pg_left =
-								hpi[size].num_pages[socket] - pages_found;
-
-						/* if we need enough memory to fit into the segment */
-						if (hp->repeated <= nr_pg_left) {
-							pages_found += hp->repeated;
-						}
-						/* truncate the segment */
-						else {
-							uint64_t final_size = nr_pg_left * hp->size;
-							uint64_t seg_size = hp->repeated * hp->size;
-
-							void * unmap_va = RTE_PTR_ADD(hp->final_va,
-									final_size);
-							int fd;
-
-							munmap(unmap_va, seg_size - final_size);
-
-							fd = open(hp->filepath, O_RDWR);
-							if (fd < 0) {
-								RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
-										hp->filepath, strerror(errno));
-								return -1;
-							}
-							if (ftruncate(fd, final_size) < 0) {
-								RTE_LOG(ERR, EAL, "Cannot truncate %s: %s\n",
-										hp->filepath, strerror(errno));
-								return -1;
-							}
-							close(fd);
-
-							pages_found += nr_pg_left;
-							hp->repeated = nr_pg_left;
-						}
-					}
-#else
-					/* else, lock the page and skip */
-					else
+					} else {
+						/* lock the page and skip */
 						pages_found++;
-#endif
+					}
 
 				} /* match page */
 			} /* foreach page */
@@ -1177,9 +950,6 @@ rte_eal_hugepage_init(void)
 	int i, j, new_memseg;
 	int nr_hugefiles, nr_hugepages = 0;
 	void *addr;
-#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
-	int new_pages_count[MAX_HUGEPAGE_SIZES];
-#endif
 
 	test_proc_pagemap_readable();
 
@@ -1260,13 +1030,6 @@ rte_eal_hugepage_init(void)
 		pages_old = hpi->num_pages[0];
 		pages_new = map_all_hugepages(&tmp_hp[hp_offset], hpi, 1);
 		if (pages_new < pages_old) {
-#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
-			RTE_LOG(ERR, EAL,
-				"%d not %d hugepages of size %u MB allocated\n",
-				pages_new, pages_old,
-				(unsigned)(hpi->hugepage_sz / 0x100000));
-			goto fail;
-#else
 			RTE_LOG(DEBUG, EAL,
 				"%d not %d hugepages of size %u MB allocated\n",
 				pages_new, pages_old,
@@ -1278,7 +1041,6 @@ rte_eal_hugepage_init(void)
 			hpi->num_pages[0] = pages_new;
 			if (pages_new == 0)
 				continue;
-#endif
 		}
 
 		/* find physical addresses and sockets for each hugepage */
@@ -1297,18 +1059,6 @@ rte_eal_hugepage_init(void)
 		qsort(&tmp_hp[hp_offset], hpi->num_pages[0],
 		      sizeof(struct hugepage_file), cmp_physaddr);
 
-#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
-		/* remap all hugepages into single file segments */
-		new_pages_count[i] = remap_all_hugepages(&tmp_hp[hp_offset], hpi);
-		if (new_pages_count[i] < 0){
-			RTE_LOG(DEBUG, EAL, "Failed to remap %u MB pages\n",
-					(unsigned)(hpi->hugepage_sz / 0x100000));
-			goto fail;
-		}
-
-		/* we have processed a num of hugepages of this size, so inc offset */
-		hp_offset += new_pages_count[i];
-#else
 		/* remap all hugepages */
 		if (map_all_hugepages(&tmp_hp[hp_offset], hpi, 0) !=
 		    hpi->num_pages[0]) {
@@ -1323,7 +1073,6 @@ rte_eal_hugepage_init(void)
 
 		/* we have processed a num of hugepages of this size, so inc offset */
 		hp_offset += hpi->num_pages[0];
-#endif
 	}
 
 	huge_recover_sigbus();
@@ -1331,14 +1080,7 @@ rte_eal_hugepage_init(void)
 	if (internal_config.memory == 0 && internal_config.force_sockets == 0)
 		internal_config.memory = eal_get_hugepage_mem_size();
 
-#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
-	nr_hugefiles = 0;
-	for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) {
-		nr_hugefiles += new_pages_count[i];
-	}
-#else
 	nr_hugefiles = nr_hugepages;
-#endif
 
 
 	/* clean out the numbers of pages */
@@ -1356,12 +1098,7 @@ rte_eal_hugepage_init(void)
 		for (j = 0; j < nb_hpsizes; j++) {
 			if (tmp_hp[i].size ==
 					internal_config.hugepage_info[j].hugepage_sz) {
-#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
-					internal_config.hugepage_info[j].num_pages[socket] +=
-						tmp_hp[i].repeated;
-#else
 				internal_config.hugepage_info[j].num_pages[socket]++;
-#endif
 			}
 		}
 	}
@@ -1436,15 +1173,8 @@ rte_eal_hugepage_init(void)
 	free(tmp_hp);
 	tmp_hp = NULL;
 
-	/* find earliest free memseg - this is needed because in case of IVSHMEM,
-	 * segments might have already been initialized */
-	for (j = 0; j < RTE_MAX_MEMSEG; j++)
-		if (mcfg->memseg[j].addr == NULL) {
-			/* move to previous segment and exit loop */
-			j--;
-			break;
-		}
-
+	/* first memseg index shall be 0 after incrementing it below */
+	j = -1;
 	for (i = 0; i < nr_hugefiles; i++) {
 		new_memseg = 0;
 
@@ -1482,11 +1212,7 @@ rte_eal_hugepage_init(void)
 
 			mcfg->memseg[j].phys_addr = hugepage[i].physaddr;
 			mcfg->memseg[j].addr = hugepage[i].final_va;
-#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
-			mcfg->memseg[j].len = hugepage[i].size * hugepage[i].repeated;
-#else
 			mcfg->memseg[j].len = hugepage[i].size;
-#endif
 			mcfg->memseg[j].socket_id = hugepage[i].socket_id;
 			mcfg->memseg[j].hugepage_sz = hugepage[i].size;
 		}
@@ -1598,15 +1324,6 @@ rte_eal_hugepage_attach(void)
 		if (mcfg->memseg[s].len == 0)
 			break;
 
-#ifdef RTE_LIBRTE_IVSHMEM
-		/*
-		 * if segment has ioremap address set, it's an IVSHMEM segment and
-		 * doesn't need mapping as it was already mapped earlier
-		 */
-		if (mcfg->memseg[s].ioremap_addr != 0)
-			continue;
-#endif
-
 		/*
 		 * fdzero is mmapped to get a contiguous block of virtual
 		 * addresses of the appropriate memseg size.
@@ -1616,13 +1333,21 @@ rte_eal_hugepage_attach(void)
 				 PROT_READ, MAP_PRIVATE, fd_zero, 0);
 		if (base_addr == MAP_FAILED ||
 		    base_addr != mcfg->memseg[s].addr) {
-			RTE_LOG(ERR, EAL, "Could not mmap %llu bytes "
-				"in /dev/zero to requested address [%p]: '%s'\n",
-				(unsigned long long)mcfg->memseg[s].len,
-				mcfg->memseg[s].addr, strerror(errno));
 			max_seg = s;
-			if (base_addr != MAP_FAILED)
+			if (base_addr != MAP_FAILED) {
+				/* errno is stale, don't use */
+				RTE_LOG(ERR, EAL, "Could not mmap %llu bytes "
+					"in /dev/zero at [%p], got [%p] - "
+					"please use '--base-virtaddr' option\n",
+					(unsigned long long)mcfg->memseg[s].len,
+					mcfg->memseg[s].addr, base_addr);
 				munmap(base_addr, mcfg->memseg[s].len);
+			} else {
+				RTE_LOG(ERR, EAL, "Could not mmap %llu bytes "
+					"in /dev/zero at [%p]: '%s'\n",
+					(unsigned long long)mcfg->memseg[s].len,
+					mcfg->memseg[s].addr, strerror(errno));
+			}
 			if (aslr_enabled() > 0) {
 				RTE_LOG(ERR, EAL, "It is recommended to "
 					"disable ASLR in the kernel "
@@ -1648,16 +1373,6 @@ rte_eal_hugepage_attach(void)
 		void *addr, *base_addr;
 		uintptr_t offset = 0;
 		size_t mapping_size;
-#ifdef RTE_LIBRTE_IVSHMEM
-		/*
-		 * if segment has ioremap address set, it's an IVSHMEM segment and
-		 * doesn't need mapping as it was already mapped earlier
-		 */
-		if (mcfg->memseg[s].ioremap_addr != 0) {
-			s++;
-			continue;
-		}
-#endif
 		/*
 		 * free previously mapped memory so we can map the
 		 * hugepages into the space
@@ -1676,11 +1391,7 @@ rte_eal_hugepage_attach(void)
 						hp[i].filepath);
 					goto error;
 				}
-#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
-				mapping_size = hp[i].size * hp[i].repeated;
-#else
 				mapping_size = hp[i].size;
-#endif
 				addr = mmap(RTE_PTR_ADD(base_addr, offset),
 						mapping_size, PROT_READ | PROT_WRITE,
 						MAP_SHARED, fd, 0);
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c
index cd9de7cc..876ba381 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -350,13 +350,13 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus,
 		 dirname);
 	if (access(filename, R_OK) != 0) {
 		/* if no NUMA support, set default to 0 */
-		dev->numa_node = 0;
+		dev->device.numa_node = 0;
 	} else {
 		if (eal_parse_sysfs_value(filename, &tmp) < 0) {
 			free(dev);
 			return -1;
 		}
-		dev->numa_node = tmp;
+		dev->device.numa_node = tmp;
 	}
 
 	/* parse resources */
@@ -390,6 +390,7 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus,
 
 	/* device is valid, add in list (sorted) */
 	if (TAILQ_EMPTY(&pci_device_list)) {
+		rte_eal_device_insert(&dev->device);
 		TAILQ_INSERT_TAIL(&pci_device_list, dev, next);
 	} else {
 		struct rte_pci_device *dev2;
@@ -402,6 +403,7 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus,
 
 			if (ret < 0) {
 				TAILQ_INSERT_BEFORE(dev2, dev, next);
+				rte_eal_device_insert(&dev->device);
 			} else { /* already registered */
 				dev2->kdrv = dev->kdrv;
 				dev2->max_vfs = dev->max_vfs;
@@ -411,12 +413,26 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus,
 			}
 			return 0;
 		}
+		rte_eal_device_insert(&dev->device);
 		TAILQ_INSERT_TAIL(&pci_device_list, dev, next);
 	}
 
 	return 0;
 }
 
+int
+pci_update_device(const struct rte_pci_addr *addr)
+{
+	char filename[PATH_MAX];
+
+	snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT,
+		 pci_get_sysfs_path(), addr->domain, addr->bus, addr->devid,
+		 addr->function);
+
+	return pci_scan_one(filename, addr->domain, addr->bus, addr->devid,
+				addr->function);
+}
+
 /*
  * split up a pci address into its constituent parts.
  */
@@ -743,9 +759,6 @@ rte_eal_pci_ioport_unmap(struct rte_pci_ioport *p)
 int
 rte_eal_pci_init(void)
 {
-	TAILQ_INIT(&pci_driver_list);
-	TAILQ_INIT(&pci_device_list);
-
 	/* for debug purposes, PCI can be disabled */
 	if (internal_config.no_pci)
 		return 0;
diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
index 3dacbff8..d459bf48 100644
--- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
+++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
@@ -82,6 +82,7 @@ struct rte_epoll_event {
 
 /** Handle for interrupts. */
 struct rte_intr_handle {
+	RTE_STD_C11
 	union {
 		int vfio_dev_fd;  /**< VFIO device file descriptor */
 		int uio_cfg_fd;  /**< UIO config file descriptor
diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
index 2acdfd9b..09713b0c 100644
--- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
+++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
@@ -61,6 +61,9 @@
 
 #ifdef __KERNEL__
 #include <linux/if.h>
+#define RTE_STD_C11
+#else
+#include <rte_common.h>
 #endif
 
 /**
@@ -85,6 +88,7 @@ enum rte_kni_req_id {
  */
 struct rte_kni_request {
 	uint32_t req_id;             /**< Request id */
+	RTE_STD_C11
 	union {
 		uint32_t new_mtu;    /**< New MTU */
 		uint8_t if_up;       /**< 1: interface up, 0: interface down */
@@ -102,7 +106,7 @@ struct rte_kni_fifo {
 	volatile unsigned read;      /**< Next position to be read */
 	unsigned len;                /**< Circular buffer length */
 	unsigned elem_size;          /**< Pointer size - for 32/64 bit OS */
-	void * volatile buffer[0];   /**< The buffer contains mbuf pointers */
+	void *volatile buffer[];     /**< The buffer contains mbuf pointers */
 };
 
 /*
@@ -111,7 +115,8 @@ struct rte_kni_fifo {
  */
 struct rte_kni_mbuf {
 	void *buf_addr __attribute__((__aligned__(RTE_CACHE_LINE_SIZE)));
-	char pad0[10];
+	uint64_t buf_physaddr;
+	char pad0[2];
 	uint16_t data_off;      /**< Start address of data in segment buffer. */
 	char pad1[2];
 	uint8_t nb_segs;        /**< Number of segments. */
@@ -159,6 +164,7 @@ struct rte_kni_device_info {
 	uint16_t group_id;            /**< Group ID */
 	uint32_t core_id;             /**< core ID to bind for kernel thread */
 
+	__extension__
 	uint8_t force_bind : 1;       /**< Flag for kernel thread binding */
 
 	/* mbuf size */
diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
index db8c9845..83721ba5 100644
--- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
+++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
@@ -166,3 +166,15 @@ DPDK_16.07 {
 	rte_thread_setname;
 
 } DPDK_16.04;
+
+DPDK_16.11 {
+	global:
+
+	rte_delay_us_block;
+	rte_delay_us_callback_register;
+	rte_eal_dev_attach;
+	rte_eal_dev_detach;
+	rte_eal_vdrv_register;
+	rte_eal_vdrv_unregister;
+
+} DPDK_16.07;
diff --git a/lib/librte_eal/linuxapp/kni/Makefile b/lib/librte_eal/linuxapp/kni/Makefile
index 8cc6b61c..4e99e07e 100644
--- a/lib/librte_eal/linuxapp/kni/Makefile
+++ b/lib/librte_eal/linuxapp/kni/Makefile
@@ -76,14 +76,9 @@ SRCS-y += ethtool/igb/e1000_mbx.c
 SRCS-y += ethtool/igb/e1000_nvm.c
 SRCS-y += ethtool/igb/e1000_phy.c
 SRCS-y += ethtool/igb/igb_ethtool.c
-SRCS-y += ethtool/igb/igb_hwmon.c
 SRCS-y += ethtool/igb/igb_main.c
-SRCS-y += ethtool/igb/igb_debugfs.c
 SRCS-y += ethtool/igb/igb_param.c
-SRCS-y += ethtool/igb/igb_procfs.c
 SRCS-y += ethtool/igb/igb_vmdq.c
-#SRCS-y += ethtool/igb/igb_ptp.c
-#SRCS-y += ethtool/igb/kcompat.c
 
 SRCS-y += kni_misc.c
 SRCS-y += kni_net.c
diff --git a/lib/librte_eal/linuxapp/kni/compat.h b/lib/librte_eal/linuxapp/kni/compat.h
index 647ba3ce..78da08e5 100644
--- a/lib/librte_eal/linuxapp/kni/compat.h
+++ b/lib/librte_eal/linuxapp/kni/compat.h
@@ -19,13 +19,25 @@
 #endif
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35)
-#define sk_sleep(s) (s)->sk_sleep
+#define sk_sleep(s) ((s)->sk_sleep)
+#else
+#define HAVE_SOCKET_WQ
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)
+#define HAVE_STATIC_SOCK_MAP_FD
+#else
+#define kni_sock_map_fd(s) sock_map_fd(s, 0)
 #endif
 
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
 #define HAVE_CHANGE_CARRIER_CB
 #endif
 
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0)
+#define ether_addr_copy(dst, src) memcpy(dst, src, ETH_ALEN)
+#endif
+
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)
 #define HAVE_IOV_ITER_MSGHDR
 #endif
@@ -35,6 +47,23 @@
 #define HAVE_REBUILD_HEADER
 #endif
 
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0)
+#define HAVE_SK_ALLOC_KERN_PARAM
+#endif
+
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
 #define HAVE_TRANS_START_HELPER
 #endif
+
+/*
+ * KNI uses NET_NAME_UNKNOWN macro to select correct version of alloc_netdev()
+ * For old kernels just backported the commit that enables the macro
+ * (685343fc3ba6) but still uses old API, it is required to undefine macro to
+ * select correct version of API, this is safe since KNI doesn't use the value.
+ * This fix is specific to RedHat/CentOS kernels.
+ */
+#if (defined(RHEL_RELEASE_CODE) && \
+	(RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 8)) && \
+	(LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 34)))
+#undef NET_NAME_UNKNOWN
+#endif
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/COPYING b/lib/librte_eal/linuxapp/kni/ethtool/igb/COPYING
deleted file mode 100644
index 5f297e5b..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/COPYING
+++ /dev/null
@@ -1,339 +0,0 @@
-
-"This software program is licensed subject to the GNU General Public License 
-(GPL). Version 2, June 1991, available at 
-<http://www.fsf.org/copyleft/gpl.html>"
-
-GNU General Public License 
-
-Version 2, June 1991
-
-Copyright (C) 1989, 1991 Free Software Foundation, Inc.  
-59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
-
-Everyone is permitted to copy and distribute verbatim copies of this license
-document, but changing it is not allowed.
-
-Preamble
-
-The licenses for most software are designed to take away your freedom to 
-share and change it. By contrast, the GNU General Public License is intended
-to guarantee your freedom to share and change free software--to make sure 
-the software is free for all its users. This General Public License applies 
-to most of the Free Software Foundation's software and to any other program 
-whose authors commit to using it. (Some other Free Software Foundation 
-software is covered by the GNU Library General Public License instead.) You 
-can apply it to your programs, too.
-
-When we speak of free software, we are referring to freedom, not price. Our
-General Public Licenses are designed to make sure that you have the freedom 
-to distribute copies of free software (and charge for this service if you 
-wish), that you receive source code or can get it if you want it, that you 
-can change the software or use pieces of it in new free programs; and that 
-you know you can do these things.
-
-To protect your rights, we need to make restrictions that forbid anyone to 
-deny you these rights or to ask you to surrender the rights. These 
-restrictions translate to certain responsibilities for you if you distribute
-copies of the software, or if you modify it.
-
-For example, if you distribute copies of such a program, whether gratis or 
-for a fee, you must give the recipients all the rights that you have. You 
-must make sure that they, too, receive or can get the source code. And you 
-must show them these terms so they know their rights.
- 
-We protect your rights with two steps: (1) copyright the software, and (2) 
-offer you this license which gives you legal permission to copy, distribute 
-and/or modify the software. 
-
-Also, for each author's protection and ours, we want to make certain that 
-everyone understands that there is no warranty for this free software. If 
-the software is modified by someone else and passed on, we want its 
-recipients to know that what they have is not the original, so that any 
-problems introduced by others will not reflect on the original authors' 
-reputations. 
-
-Finally, any free program is threatened constantly by software patents. We 
-wish to avoid the danger that redistributors of a free program will 
-individually obtain patent licenses, in effect making the program 
-proprietary. To prevent this, we have made it clear that any patent must be 
-licensed for everyone's free use or not licensed at all. 
-
-The precise terms and conditions for copying, distribution and modification 
-follow. 
-
-TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
-
-0. This License applies to any program or other work which contains a notice
-   placed by the copyright holder saying it may be distributed under the 
-   terms of this General Public License. The "Program", below, refers to any
-   such program or work, and a "work based on the Program" means either the 
-   Program or any derivative work under copyright law: that is to say, a 
-   work containing the Program or a portion of it, either verbatim or with 
-   modifications and/or translated into another language. (Hereinafter, 
-   translation is included without limitation in the term "modification".) 
-   Each licensee is addressed as "you". 
-
-   Activities other than copying, distribution and modification are not 
-   covered by this License; they are outside its scope. The act of running 
-   the Program is not restricted, and the output from the Program is covered 
-   only if its contents constitute a work based on the Program (independent 
-   of having been made by running the Program). Whether that is true depends
-   on what the Program does. 
-
-1. You may copy and distribute verbatim copies of the Program's source code 
-   as you receive it, in any medium, provided that you conspicuously and 
-   appropriately publish on each copy an appropriate copyright notice and 
-   disclaimer of warranty; keep intact all the notices that refer to this 
-   License and to the absence of any warranty; and give any other recipients 
-   of the Program a copy of this License along with the Program. 
-
-   You may charge a fee for the physical act of transferring a copy, and you 
-   may at your option offer warranty protection in exchange for a fee. 
-
-2. You may modify your copy or copies of the Program or any portion of it, 
-   thus forming a work based on the Program, and copy and distribute such 
-   modifications or work under the terms of Section 1 above, provided that 
-   you also meet all of these conditions: 
-
-   * a) You must cause the modified files to carry prominent notices stating 
-        that you changed the files and the date of any change. 
-
-   * b) You must cause any work that you distribute or publish, that in 
-        whole or in part contains or is derived from the Program or any part 
-        thereof, to be licensed as a whole at no charge to all third parties
-        under the terms of this License. 
-
-   * c) If the modified program normally reads commands interactively when 
-        run, you must cause it, when started running for such interactive 
-        use in the most ordinary way, to print or display an announcement 
-        including an appropriate copyright notice and a notice that there is
-        no warranty (or else, saying that you provide a warranty) and that 
-        users may redistribute the program under these conditions, and 
-        telling the user how to view a copy of this License. (Exception: if 
-        the Program itself is interactive but does not normally print such 
-        an announcement, your work based on the Program is not required to 
-        print an announcement.) 
-
-   These requirements apply to the modified work as a whole. If identifiable 
-   sections of that work are not derived from the Program, and can be 
-   reasonably considered independent and separate works in themselves, then 
-   this License, and its terms, do not apply to those sections when you 
-   distribute them as separate works. But when you distribute the same 
-   sections as part of a whole which is a work based on the Program, the 
-   distribution of the whole must be on the terms of this License, whose 
-   permissions for other licensees extend to the entire whole, and thus to 
-   each and every part regardless of who wrote it. 
-
-   Thus, it is not the intent of this section to claim rights or contest 
-   your rights to work written entirely by you; rather, the intent is to 
-   exercise the right to control the distribution of derivative or 
-   collective works based on the Program. 
-
-   In addition, mere aggregation of another work not based on the Program 
-   with the Program (or with a work based on the Program) on a volume of a 
-   storage or distribution medium does not bring the other work under the 
-   scope of this License. 
-
-3. You may copy and distribute the Program (or a work based on it, under 
-   Section 2) in object code or executable form under the terms of Sections 
-   1 and 2 above provided that you also do one of the following: 
-
-   * a) Accompany it with the complete corresponding machine-readable source 
-        code, which must be distributed under the terms of Sections 1 and 2 
-        above on a medium customarily used for software interchange; or, 
-
-   * b) Accompany it with a written offer, valid for at least three years, 
-        to give any third party, for a charge no more than your cost of 
-        physically performing source distribution, a complete machine-
-        readable copy of the corresponding source code, to be distributed 
-        under the terms of Sections 1 and 2 above on a medium customarily 
-        used for software interchange; or, 
-
-   * c) Accompany it with the information you received as to the offer to 
-        distribute corresponding source code. (This alternative is allowed 
-        only for noncommercial distribution and only if you received the 
-        program in object code or executable form with such an offer, in 
-        accord with Subsection b above.) 
-
-   The source code for a work means the preferred form of the work for 
-   making modifications to it. For an executable work, complete source code 
-   means all the source code for all modules it contains, plus any 
-   associated interface definition files, plus the scripts used to control 
-   compilation and installation of the executable. However, as a special 
-   exception, the source code distributed need not include anything that is 
-   normally distributed (in either source or binary form) with the major 
-   components (compiler, kernel, and so on) of the operating system on which
-   the executable runs, unless that component itself accompanies the 
-   executable. 
-
-   If distribution of executable or object code is made by offering access 
-   to copy from a designated place, then offering equivalent access to copy 
-   the source code from the same place counts as distribution of the source 
-   code, even though third parties are not compelled to copy the source 
-   along with the object code. 
-
-4. You may not copy, modify, sublicense, or distribute the Program except as
-   expressly provided under this License. Any attempt otherwise to copy, 
-   modify, sublicense or distribute the Program is void, and will 
-   automatically terminate your rights under this License. However, parties 
-   who have received copies, or rights, from you under this License will not
-   have their licenses terminated so long as such parties remain in full 
-   compliance. 
-
-5. You are not required to accept this License, since you have not signed 
-   it. However, nothing else grants you permission to modify or distribute 
-   the Program or its derivative works. These actions are prohibited by law 
-   if you do not accept this License. Therefore, by modifying or 
-   distributing the Program (or any work based on the Program), you 
-   indicate your acceptance of this License to do so, and all its terms and
-   conditions for copying, distributing or modifying the Program or works 
-   based on it. 
-
-6. Each time you redistribute the Program (or any work based on the 
-   Program), the recipient automatically receives a license from the 
-   original licensor to copy, distribute or modify the Program subject to 
-   these terms and conditions. You may not impose any further restrictions 
-   on the recipients' exercise of the rights granted herein. You are not 
-   responsible for enforcing compliance by third parties to this License. 
-
-7. If, as a consequence of a court judgment or allegation of patent 
-   infringement or for any other reason (not limited to patent issues), 
-   conditions are imposed on you (whether by court order, agreement or 
-   otherwise) that contradict the conditions of this License, they do not 
-   excuse you from the conditions of this License. If you cannot distribute 
-   so as to satisfy simultaneously your obligations under this License and 
-   any other pertinent obligations, then as a consequence you may not 
-   distribute the Program at all. For example, if a patent license would 
-   not permit royalty-free redistribution of the Program by all those who 
-   receive copies directly or indirectly through you, then the only way you 
-   could satisfy both it and this License would be to refrain entirely from 
-   distribution of the Program. 
-
-   If any portion of this section is held invalid or unenforceable under any
-   particular circumstance, the balance of the section is intended to apply
-   and the section as a whole is intended to apply in other circumstances. 
-
-   It is not the purpose of this section to induce you to infringe any 
-   patents or other property right claims or to contest validity of any 
-   such claims; this section has the sole purpose of protecting the 
-   integrity of the free software distribution system, which is implemented 
-   by public license practices. Many people have made generous contributions
-   to the wide range of software distributed through that system in 
-   reliance on consistent application of that system; it is up to the 
-   author/donor to decide if he or she is willing to distribute software 
-   through any other system and a licensee cannot impose that choice. 
-
-   This section is intended to make thoroughly clear what is believed to be 
-   a consequence of the rest of this License. 
-
-8. If the distribution and/or use of the Program is restricted in certain 
-   countries either by patents or by copyrighted interfaces, the original 
-   copyright holder who places the Program under this License may add an 
-   explicit geographical distribution limitation excluding those countries, 
-   so that distribution is permitted only in or among countries not thus 
-   excluded. In such case, this License incorporates the limitation as if 
-   written in the body of this License. 
-
-9. The Free Software Foundation may publish revised and/or new versions of 
-   the General Public License from time to time. Such new versions will be 
-   similar in spirit to the present version, but may differ in detail to 
-   address new problems or concerns. 
-
-   Each version is given a distinguishing version number. If the Program 
-   specifies a version number of this License which applies to it and "any 
-   later version", you have the option of following the terms and 
-   conditions either of that version or of any later version published by 
-   the Free Software Foundation. If the Program does not specify a version 
-   number of this License, you may choose any version ever published by the 
-   Free Software Foundation. 
-
-10. If you wish to incorporate parts of the Program into other free programs
-    whose distribution conditions are different, write to the author to ask 
-    for permission. For software which is copyrighted by the Free Software 
-    Foundation, write to the Free Software Foundation; we sometimes make 
-    exceptions for this. Our decision will be guided by the two goals of 
-    preserving the free status of all derivatives of our free software and 
-    of promoting the sharing and reuse of software generally. 
-
-   NO WARRANTY
-
-11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 
-    FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 
-    OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 
-    PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER 
-    EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
-    WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE 
-    ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH 
-    YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL 
-    NECESSARY SERVICING, REPAIR OR CORRECTION. 
-
-12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 
-    WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 
-    REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR 
-    DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL 
-    DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM 
-    (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED 
-    INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF 
-    THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR 
-    OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 
-
-END OF TERMS AND CONDITIONS
-
-How to Apply These Terms to Your New Programs
-
-If you develop a new program, and you want it to be of the greatest 
-possible use to the public, the best way to achieve this is to make it free 
-software which everyone can redistribute and change under these terms. 
-
-To do so, attach the following notices to the program. It is safest to 
-attach them to the start of each source file to most effectively convey the
-exclusion of warranty; and each file should have at least the "copyright" 
-line and a pointer to where the full notice is found. 
-
-one line to give the program's name and an idea of what it does.
-Copyright (C) yyyy  name of author
-
-This program is free software; you can redistribute it and/or modify it 
-under the terms of the GNU General Public License as published by the Free 
-Software Foundation; either version 2 of the License, or (at your option) 
-any later version.
-
-This program is distributed in the hope that it will be useful, but WITHOUT 
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
-FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for 
-more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 
-Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-
-Also add information on how to contact you by electronic and paper mail. 
-
-If the program is interactive, make it output a short notice like this when 
-it starts in an interactive mode: 
-
-Gnomovision version 69, Copyright (C) year name of author Gnomovision comes 
-with ABSOLUTELY NO WARRANTY; for details type 'show w'.  This is free 
-software, and you are welcome to redistribute it under certain conditions; 
-type 'show c' for details.
-
-The hypothetical commands 'show w' and 'show c' should show the appropriate 
-parts of the General Public License. Of course, the commands you use may be 
-called something other than 'show w' and 'show c'; they could even be 
-mouse-clicks or menu items--whatever suits your program. 
-
-You should also get your employer (if you work as a programmer) or your 
-school, if any, to sign a "copyright disclaimer" for the program, if 
-necessary. Here is a sample; alter the names: 
-
-Yoyodyne, Inc., hereby disclaims all copyright interest in the program 
-'Gnomovision' (which makes passes at compilers) written by James Hacker.
-
-signature of Ty Coon, 1 April 1989
-Ty Coon, President of Vice
-
-This General Public License does not permit incorporating your program into 
-proprietary programs. If your program is a subroutine library, you may 
-consider it more useful to permit linking proprietary applications with the 
-library. If this is what you want to do, use the GNU Library General Public 
-License instead of this License.
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c
index b8c9a13f..d558af20 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.h
index 1aec75ab..185ccdf1 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.h
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.c
index 6095d3b4..220c9a40 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.c
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.h
index b21294ec..55c8a5f4 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.h
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_defines.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_defines.h
index 63b228c5..d42c7998 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_defines.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_defines.h
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_hw.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_hw.h
index 347cef71..35886e93 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_hw.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_hw.h
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.c
index 1e9f3e6e..7e4c20a9 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.c
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.h
index 57b2eb56..b8fa70d0 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.h
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.c
index 4ee59ba9..74319def 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.c
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.h
index 6a1b0f52..3bcdd88c 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.h
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.c
index a1700398..51dfae5d 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.c
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.h
index c94b2185..0627f271 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.h
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.c
index 3ef0d98b..bd64429f 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.c
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.h
index bbf838c8..64685d9d 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.h
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.c
index 6188d007..1ce59154 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.c
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.h
index fe62785a..17bc53c3 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.h
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_osdep.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_osdep.h
index d1cf98e2..c1ab60c4 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_osdep.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_osdep.h
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c
index 140a2a47..d8a77c45 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.h
index 5387c5e7..db24fb0b 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.h
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_regs.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_regs.h
index 0e083c54..830ec991 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_regs.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_regs.h
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb.h
index e5554ca3..d077b49e 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb.h
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_debugfs.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_debugfs.c
deleted file mode 100644
index c07f9f53..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_debugfs.c
+++ /dev/null
@@ -1,28 +0,0 @@
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2013 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-#include "igb.h"
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c
index af7e68a5..d7a987d5 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_hwmon.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_hwmon.c
deleted file mode 100644
index 07a1ae07..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_hwmon.c
+++ /dev/null
@@ -1,260 +0,0 @@
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2013 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-#include "igb.h"
-#include "e1000_82575.h"
-#include "e1000_hw.h"
-#ifdef IGB_HWMON
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/sysfs.h>
-#include <linux/kobject.h>
-#include <linux/device.h>
-#include <linux/netdevice.h>
-#include <linux/hwmon.h>
-#include <linux/pci.h>
-
-#ifdef HAVE_I2C_SUPPORT
-static struct i2c_board_info i350_sensor_info = {
-	I2C_BOARD_INFO("i350bb", (0Xf8 >> 1)),
-};
-#endif /* HAVE_I2C_SUPPORT */
-
-/* hwmon callback functions */
-static ssize_t igb_hwmon_show_location(struct device *dev,
-					 struct device_attribute *attr,
-					 char *buf)
-{
-	struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr,
-						     dev_attr);
-	return sprintf(buf, "loc%u\n",
-		       igb_attr->sensor->location);
-}
-
-static ssize_t igb_hwmon_show_temp(struct device *dev,
-				     struct device_attribute *attr,
-				     char *buf)
-{
-	struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr,
-						     dev_attr);
-	unsigned int value;
-
-	/* reset the temp field */
-	igb_attr->hw->mac.ops.get_thermal_sensor_data(igb_attr->hw);
-
-	value = igb_attr->sensor->temp;
-
-	/* display millidegree */
-	value *= 1000;
-
-	return sprintf(buf, "%u\n", value);
-}
-
-static ssize_t igb_hwmon_show_cautionthresh(struct device *dev,
-				     struct device_attribute *attr,
-				     char *buf)
-{
-	struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr,
-						     dev_attr);
-	unsigned int value = igb_attr->sensor->caution_thresh;
-
-	/* display millidegree */
-	value *= 1000;
-
-	return sprintf(buf, "%u\n", value);
-}
-
-static ssize_t igb_hwmon_show_maxopthresh(struct device *dev,
-				     struct device_attribute *attr,
-				     char *buf)
-{
-	struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr,
-						     dev_attr);
-	unsigned int value = igb_attr->sensor->max_op_thresh;
-
-	/* display millidegree */
-	value *= 1000;
-
-	return sprintf(buf, "%u\n", value);
-}
-
-/* igb_add_hwmon_attr - Create hwmon attr table for a hwmon sysfs file.
- * @ adapter: pointer to the adapter structure
- * @ offset: offset in the eeprom sensor data table
- * @ type: type of sensor data to display
- *
- * For each file we want in hwmon's sysfs interface we need a device_attribute
- * This is included in our hwmon_attr struct that contains the references to
- * the data structures we need to get the data to display.
- */
-static int igb_add_hwmon_attr(struct igb_adapter *adapter,
-				unsigned int offset, int type) {
-	int rc;
-	unsigned int n_attr;
-	struct hwmon_attr *igb_attr;
-
-	n_attr = adapter->igb_hwmon_buff.n_hwmon;
-	igb_attr = &adapter->igb_hwmon_buff.hwmon_list[n_attr];
-
-	switch (type) {
-	case IGB_HWMON_TYPE_LOC:
-		igb_attr->dev_attr.show = igb_hwmon_show_location;
-		snprintf(igb_attr->name, sizeof(igb_attr->name),
-			 "temp%u_label", offset);
-		break;
-	case IGB_HWMON_TYPE_TEMP:
-		igb_attr->dev_attr.show = igb_hwmon_show_temp;
-		snprintf(igb_attr->name, sizeof(igb_attr->name),
-			 "temp%u_input", offset);
-		break;
-	case IGB_HWMON_TYPE_CAUTION:
-		igb_attr->dev_attr.show = igb_hwmon_show_cautionthresh;
-		snprintf(igb_attr->name, sizeof(igb_attr->name),
-			 "temp%u_max", offset);
-		break;
-	case IGB_HWMON_TYPE_MAX:
-		igb_attr->dev_attr.show = igb_hwmon_show_maxopthresh;
-		snprintf(igb_attr->name, sizeof(igb_attr->name),
-			 "temp%u_crit", offset);
-		break;
-	default:
-		rc = -EPERM;
-		return rc;
-	}
-
-	/* These always the same regardless of type */
-	igb_attr->sensor =
-		&adapter->hw.mac.thermal_sensor_data.sensor[offset];
-	igb_attr->hw = &adapter->hw;
-	igb_attr->dev_attr.store = NULL;
-	igb_attr->dev_attr.attr.mode = S_IRUGO;
-	igb_attr->dev_attr.attr.name = igb_attr->name;
-	sysfs_attr_init(&igb_attr->dev_attr.attr);
-	rc = device_create_file(&adapter->pdev->dev,
-				&igb_attr->dev_attr);
-	if (rc == 0)
-		++adapter->igb_hwmon_buff.n_hwmon;
-
-	return rc;
-}
-
-static void igb_sysfs_del_adapter(struct igb_adapter *adapter)
-{
-	int i;
-
-	if (adapter == NULL)
-		return;
-
-	for (i = 0; i < adapter->igb_hwmon_buff.n_hwmon; i++) {
-		device_remove_file(&adapter->pdev->dev,
-			   &adapter->igb_hwmon_buff.hwmon_list[i].dev_attr);
-	}
-
-	kfree(adapter->igb_hwmon_buff.hwmon_list);
-
-	if (adapter->igb_hwmon_buff.device)
-		hwmon_device_unregister(adapter->igb_hwmon_buff.device);
-}
-
-/* called from igb_main.c */
-void igb_sysfs_exit(struct igb_adapter *adapter)
-{
-	igb_sysfs_del_adapter(adapter);
-}
-
-/* called from igb_main.c */
-int igb_sysfs_init(struct igb_adapter *adapter)
-{
-	struct hwmon_buff *igb_hwmon = &adapter->igb_hwmon_buff;
-	unsigned int i;
-	int n_attrs;
-	int rc = 0;
-#ifdef HAVE_I2C_SUPPORT
-	struct i2c_client *client = NULL;
-#endif /* HAVE_I2C_SUPPORT */
-
-	/* If this method isn't defined we don't support thermals */
-	if (adapter->hw.mac.ops.init_thermal_sensor_thresh == NULL)
-		goto exit;
-
-	/* Don't create thermal hwmon interface if no sensors present */
-	rc = (adapter->hw.mac.ops.init_thermal_sensor_thresh(&adapter->hw));
-		if (rc)
-			goto exit;
-#ifdef HAVE_I2C_SUPPORT
-	/* init i2c_client */
-	client = i2c_new_device(&adapter->i2c_adap, &i350_sensor_info);
-	if (client == NULL) {
-		dev_info(&adapter->pdev->dev,
-			"Failed to create new i2c device..\n");
-		goto exit;
-	}
-	adapter->i2c_client = client;
-#endif /* HAVE_I2C_SUPPORT */
-
-	/* Allocation space for max attributes
-	 * max num sensors * values (loc, temp, max, caution)
-	 */
-	n_attrs = E1000_MAX_SENSORS * 4;
-	igb_hwmon->hwmon_list = kcalloc(n_attrs, sizeof(struct hwmon_attr),
-					  GFP_KERNEL);
-	if (!igb_hwmon->hwmon_list) {
-		rc = -ENOMEM;
-		goto err;
-	}
-
-	igb_hwmon->device = hwmon_device_register(&adapter->pdev->dev);
-	if (IS_ERR(igb_hwmon->device)) {
-		rc = PTR_ERR(igb_hwmon->device);
-		goto err;
-	}
-
-	for (i = 0; i < E1000_MAX_SENSORS; i++) {
-
-		/* Only create hwmon sysfs entries for sensors that have
-		 * meaningful data.
-		 */
-		if (adapter->hw.mac.thermal_sensor_data.sensor[i].location == 0)
-			continue;
-
-		/* Bail if any hwmon attr struct fails to initialize */
-		rc = igb_add_hwmon_attr(adapter, i, IGB_HWMON_TYPE_CAUTION);
-		rc |= igb_add_hwmon_attr(adapter, i, IGB_HWMON_TYPE_LOC);
-		rc |= igb_add_hwmon_attr(adapter, i, IGB_HWMON_TYPE_TEMP);
-		rc |= igb_add_hwmon_attr(adapter, i, IGB_HWMON_TYPE_MAX);
-		if (rc)
-			goto err;
-	}
-
-	goto exit;
-
-err:
-	igb_sysfs_del_adapter(adapter);
-exit:
-	return rc;
-}
-#endif /* IGB_HWMON */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c
index f1dcc95b..f4dca5a3 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
@@ -1562,6 +1562,7 @@ static void igb_check_swap_media(struct igb_adapter *adapter)
 	ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
 	connsw = E1000_READ_REG(hw, E1000_CONNSW);
 	link = igb_has_link(adapter);
+	(void) link;
 
 	/* need to live swap if current media is copper and we have fiber/serdes
 	 * to go to.
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_param.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_param.c
index f79ce7c1..c922ca2f 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_param.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_param.c
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_procfs.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_procfs.c
deleted file mode 100644
index 66236d29..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_procfs.c
+++ /dev/null
@@ -1,363 +0,0 @@
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2013 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-#include "igb.h"
-#include "e1000_82575.h"
-#include "e1000_hw.h"
-
-#ifdef IGB_PROCFS
-#ifndef IGB_HWMON
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/proc_fs.h>
-#include <linux/device.h>
-#include <linux/netdevice.h>
-
-static struct proc_dir_entry *igb_top_dir = NULL;
-
-
-bool igb_thermal_present(struct igb_adapter *adapter)
-{
-	s32 status;
-	struct e1000_hw *hw;
-
-	if (adapter == NULL)
-		return false;
-	hw = &adapter->hw;
-
-	/*
-	 * Only set I2C bit-bang mode if an external thermal sensor is
-	 * supported on this device.
-	 */
-	if (adapter->ets) {
-		status = e1000_set_i2c_bb(hw);
-		if (status != E1000_SUCCESS)
-			return false;
-	}
-
-	status = hw->mac.ops.init_thermal_sensor_thresh(hw);
-	if (status != E1000_SUCCESS)
-		return false;
-
-	return true;
-}
-
-
-static int igb_macburn(char *page, char **start, off_t off, int count,
-			int *eof, void *data)
-{
-	struct e1000_hw *hw;
-	struct igb_adapter *adapter = (struct igb_adapter *)data;
-	if (adapter == NULL)
-		return snprintf(page, count, "error: no adapter\n");
-
-	hw = &adapter->hw;
-	if (hw == NULL)
-		return snprintf(page, count, "error: no hw data\n");
-
-	return snprintf(page, count, "0x%02X%02X%02X%02X%02X%02X\n",
-		       (unsigned int)hw->mac.perm_addr[0],
-		       (unsigned int)hw->mac.perm_addr[1],
-		       (unsigned int)hw->mac.perm_addr[2],
-		       (unsigned int)hw->mac.perm_addr[3],
-		       (unsigned int)hw->mac.perm_addr[4],
-		       (unsigned int)hw->mac.perm_addr[5]);
-}
-
-static int igb_macadmn(char *page, char **start, off_t off,
-		       int count, int *eof, void *data)
-{
-	struct e1000_hw *hw;
-	struct igb_adapter *adapter = (struct igb_adapter *)data;
-	if (adapter == NULL)
-		return snprintf(page, count, "error: no adapter\n");
-
-	hw = &adapter->hw;
-	if (hw == NULL)
-		return snprintf(page, count, "error: no hw data\n");
-
-	return snprintf(page, count, "0x%02X%02X%02X%02X%02X%02X\n",
-		       (unsigned int)hw->mac.addr[0],
-		       (unsigned int)hw->mac.addr[1],
-		       (unsigned int)hw->mac.addr[2],
-		       (unsigned int)hw->mac.addr[3],
-		       (unsigned int)hw->mac.addr[4],
-		       (unsigned int)hw->mac.addr[5]);
-}
-
-static int igb_numeports(char *page, char **start, off_t off, int count,
-			 int *eof, void *data)
-{
-	struct e1000_hw *hw;
-	int ports;
-	struct igb_adapter *adapter = (struct igb_adapter *)data;
-	if (adapter == NULL)
-		return snprintf(page, count, "error: no adapter\n");
-
-	hw = &adapter->hw;
-	if (hw == NULL)
-		return snprintf(page, count, "error: no hw data\n");
-
-	ports = 4;
-
-	return snprintf(page, count, "%d\n", ports);
-}
-
-static int igb_porttype(char *page, char **start, off_t off, int count,
-			int *eof, void *data)
-{
-	struct igb_adapter *adapter = (struct igb_adapter *)data;
-	if (adapter == NULL)
-		return snprintf(page, count, "error: no adapter\n");
-
-	return snprintf(page, count, "%d\n",
-			test_bit(__IGB_DOWN, &adapter->state));
-}
-
-static int igb_therm_location(char *page, char **start, off_t off,
-				     int count, int *eof, void *data)
-{
-	struct igb_therm_proc_data *therm_data =
-		(struct igb_therm_proc_data *)data;
-
-	if (therm_data == NULL)
-		return snprintf(page, count, "error: no therm_data\n");
-
-	return snprintf(page, count, "%d\n", therm_data->sensor_data->location);
-}
-
-static int igb_therm_maxopthresh(char *page, char **start, off_t off,
-				    int count, int *eof, void *data)
-{
-	struct igb_therm_proc_data *therm_data =
-		(struct igb_therm_proc_data *)data;
-
-	if (therm_data == NULL)
-		return snprintf(page, count, "error: no therm_data\n");
-
-	return snprintf(page, count, "%d\n",
-			therm_data->sensor_data->max_op_thresh);
-}
-
-static int igb_therm_cautionthresh(char *page, char **start, off_t off,
-				      int count, int *eof, void *data)
-{
-	struct igb_therm_proc_data *therm_data =
-		(struct igb_therm_proc_data *)data;
-
-	if (therm_data == NULL)
-		return snprintf(page, count, "error: no therm_data\n");
-
-	return snprintf(page, count, "%d\n",
-			therm_data->sensor_data->caution_thresh);
-}
-
-static int igb_therm_temp(char *page, char **start, off_t off,
-			     int count, int *eof, void *data)
-{
-	s32 status;
-	struct igb_therm_proc_data *therm_data =
-		(struct igb_therm_proc_data *)data;
-
-	if (therm_data == NULL)
-		return snprintf(page, count, "error: no therm_data\n");
-
-	status = e1000_get_thermal_sensor_data(therm_data->hw);
-	if (status != E1000_SUCCESS)
-		snprintf(page, count, "error: status %d returned\n", status);
-
-	return snprintf(page, count, "%d\n", therm_data->sensor_data->temp);
-}
-
-struct igb_proc_type{
-	char name[32];
-	int (*read)(char*, char**, off_t, int, int*, void*);
-};
-
-struct igb_proc_type igb_proc_entries[] = {
-	{"numeports", &igb_numeports},
-	{"porttype", &igb_porttype},
-	{"macburn", &igb_macburn},
-	{"macadmn", &igb_macadmn},
-	{"", NULL}
-};
-
-struct igb_proc_type igb_internal_entries[] = {
-	{"location", &igb_therm_location},
-	{"temp", &igb_therm_temp},
-	{"cautionthresh", &igb_therm_cautionthresh},
-	{"maxopthresh", &igb_therm_maxopthresh},
-	{"", NULL}
-};
-
-void igb_del_proc_entries(struct igb_adapter *adapter)
-{
-	int index, i;
-	char buf[16];	/* much larger than the sensor number will ever be */
-
-	if (igb_top_dir == NULL)
-		return;
-
-	for (i = 0; i < E1000_MAX_SENSORS; i++) {
-		if (adapter->therm_dir[i] == NULL)
-			continue;
-
-		for (index = 0; ; index++) {
-			if (igb_internal_entries[index].read == NULL)
-				break;
-
-			 remove_proc_entry(igb_internal_entries[index].name,
-					   adapter->therm_dir[i]);
-		}
-		snprintf(buf, sizeof(buf), "sensor_%d", i);
-		remove_proc_entry(buf, adapter->info_dir);
-	}
-
-	if (adapter->info_dir != NULL) {
-		for (index = 0; ; index++) {
-			if (igb_proc_entries[index].read == NULL)
-				break;
-		        remove_proc_entry(igb_proc_entries[index].name,
-					  adapter->info_dir);
-		}
-		remove_proc_entry("info", adapter->eth_dir);
-	}
-
-	if (adapter->eth_dir != NULL)
-		remove_proc_entry(pci_name(adapter->pdev), igb_top_dir);
-}
-
-/* called from igb_main.c */
-void igb_procfs_exit(struct igb_adapter *adapter)
-{
-	igb_del_proc_entries(adapter);
-}
-
-int igb_procfs_topdir_init(void)
-{
-	igb_top_dir = proc_mkdir("driver/igb", NULL);
-	if (igb_top_dir == NULL)
-		return -ENOMEM;
-
-	return 0;
-}
-
-void igb_procfs_topdir_exit(void)
-{
-	remove_proc_entry("driver/igb", NULL);
-}
-
-/* called from igb_main.c */
-int igb_procfs_init(struct igb_adapter *adapter)
-{
-	int rc = 0;
-	int i;
-	int index;
-	char buf[16];	/* much larger than the sensor number will ever be */
-
-	adapter->eth_dir = NULL;
-	adapter->info_dir = NULL;
-	for (i = 0; i < E1000_MAX_SENSORS; i++)
-		adapter->therm_dir[i] = NULL;
-
-	if ( igb_top_dir == NULL ) {
-		rc = -ENOMEM;
-		goto fail;
-	}
-
-	adapter->eth_dir = proc_mkdir(pci_name(adapter->pdev), igb_top_dir);
-	if (adapter->eth_dir == NULL) {
-		rc = -ENOMEM;
-		goto fail;
-	}
-
-	adapter->info_dir = proc_mkdir("info", adapter->eth_dir);
-	if (adapter->info_dir == NULL) {
-		rc = -ENOMEM;
-		goto fail;
-	}
-	for (index = 0; ; index++) {
-		if (igb_proc_entries[index].read == NULL) {
-			break;
-		}
-		if (!(create_proc_read_entry(igb_proc_entries[index].name,
-					   0444,
-					   adapter->info_dir,
-					   igb_proc_entries[index].read,
-					   adapter))) {
-
-			rc = -ENOMEM;
-			goto fail;
-		}
-	}
-	if (igb_thermal_present(adapter) == false)
-		goto exit;
-
-	for (i = 0; i < E1000_MAX_SENSORS; i++) {
-
-		 if (adapter->hw.mac.thermal_sensor_data.sensor[i].location== 0)
-			continue;
-
-		snprintf(buf, sizeof(buf), "sensor_%d", i);
-		adapter->therm_dir[i] = proc_mkdir(buf, adapter->info_dir);
-		if (adapter->therm_dir[i] == NULL) {
-			rc = -ENOMEM;
-			goto fail;
-		}
-		for (index = 0; ; index++) {
-			if (igb_internal_entries[index].read == NULL)
-				break;
-			/*
-			 * therm_data struct contains pointer the read func
-			 * will be needing
-			 */
-			adapter->therm_data[i].hw = &adapter->hw;
-			adapter->therm_data[i].sensor_data =
-				&adapter->hw.mac.thermal_sensor_data.sensor[i];
-
-			if (!(create_proc_read_entry(
-					   igb_internal_entries[index].name,
-					   0444,
-					   adapter->therm_dir[i],
-					   igb_internal_entries[index].read,
-					   &adapter->therm_data[i]))) {
-				rc = -ENOMEM;
-				goto fail;
-			}
-		}
-	}
-	goto exit;
-
-fail:
-	igb_del_proc_entries(adapter);
-exit:
-	return rc;
-}
-
-#endif /* !IGB_HWMON */
-#endif /* IGB_PROCFS */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ptp.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ptp.c
deleted file mode 100644
index 454b70ce..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ptp.c
+++ /dev/null
@@ -1,944 +0,0 @@
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2013 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-/******************************************************************************
- Copyright(c) 2011 Richard Cochran <richardcochran@gmail.com> for some of the
- 82576 and 82580 code
-******************************************************************************/
-
-#include "igb.h"
-
-#include <linux/module.h>
-#include <linux/device.h>
-#include <linux/pci.h>
-#include <linux/ptp_classify.h>
-
-#define INCVALUE_MASK		0x7fffffff
-#define ISGN			0x80000000
-
-/*
- * The 82580 timesync updates the system timer every 8ns by 8ns,
- * and this update value cannot be reprogrammed.
- *
- * Neither the 82576 nor the 82580 offer registers wide enough to hold
- * nanoseconds time values for very long. For the 82580, SYSTIM always
- * counts nanoseconds, but the upper 24 bits are not available. The
- * frequency is adjusted by changing the 32 bit fractional nanoseconds
- * register, TIMINCA.
- *
- * For the 82576, the SYSTIM register time unit is affect by the
- * choice of the 24 bit TININCA:IV (incvalue) field. Five bits of this
- * field are needed to provide the nominal 16 nanosecond period,
- * leaving 19 bits for fractional nanoseconds.
- *
- * We scale the NIC clock cycle by a large factor so that relatively
- * small clock corrections can be added or subtracted at each clock
- * tick. The drawbacks of a large factor are a) that the clock
- * register overflows more quickly (not such a big deal) and b) that
- * the increment per tick has to fit into 24 bits.  As a result we
- * need to use a shift of 19 so we can fit a value of 16 into the
- * TIMINCA register.
- *
- *
- *             SYSTIMH            SYSTIML
- *        +--------------+   +---+---+------+
- *  82576 |      32      |   | 8 | 5 |  19  |
- *        +--------------+   +---+---+------+
- *         \________ 45 bits _______/  fract
- *
- *        +----------+---+   +--------------+
- *  82580 |    24    | 8 |   |      32      |
- *        +----------+---+   +--------------+
- *          reserved  \______ 40 bits _____/
- *
- *
- * The 45 bit 82576 SYSTIM overflows every
- *   2^45 * 10^-9 / 3600 = 9.77 hours.
- *
- * The 40 bit 82580 SYSTIM overflows every
- *   2^40 * 10^-9 /  60  = 18.3 minutes.
- */
-
-#define IGB_SYSTIM_OVERFLOW_PERIOD	(HZ * 60 * 9)
-#define IGB_PTP_TX_TIMEOUT		(HZ * 15)
-#define INCPERIOD_82576			(1 << E1000_TIMINCA_16NS_SHIFT)
-#define INCVALUE_82576_MASK		((1 << E1000_TIMINCA_16NS_SHIFT) - 1)
-#define INCVALUE_82576			(16 << IGB_82576_TSYNC_SHIFT)
-#define IGB_NBITS_82580			40
-
-/*
- * SYSTIM read access for the 82576
- */
-
-static cycle_t igb_ptp_read_82576(const struct cyclecounter *cc)
-{
-	struct igb_adapter *igb = container_of(cc, struct igb_adapter, cc);
-	struct e1000_hw *hw = &igb->hw;
-	u64 val;
-	u32 lo, hi;
-
-	lo = E1000_READ_REG(hw, E1000_SYSTIML);
-	hi = E1000_READ_REG(hw, E1000_SYSTIMH);
-
-	val = ((u64) hi) << 32;
-	val |= lo;
-
-	return val;
-}
-
-/*
- * SYSTIM read access for the 82580
- */
-
-static cycle_t igb_ptp_read_82580(const struct cyclecounter *cc)
-{
-	struct igb_adapter *igb = container_of(cc, struct igb_adapter, cc);
-	struct e1000_hw *hw = &igb->hw;
-	u64 val;
-	u32 lo, hi;
-
-	/* The timestamp latches on lowest register read. For the 82580
-	 * the lowest register is SYSTIMR instead of SYSTIML.  However we only
-	 * need to provide nanosecond resolution, so we just ignore it.
-	 */
-	E1000_READ_REG(hw, E1000_SYSTIMR);
-	lo = E1000_READ_REG(hw, E1000_SYSTIML);
-	hi = E1000_READ_REG(hw, E1000_SYSTIMH);
-
-	val = ((u64) hi) << 32;
-	val |= lo;
-
-	return val;
-}
-
-/*
- * SYSTIM read access for I210/I211
- */
-
-static void igb_ptp_read_i210(struct igb_adapter *adapter, struct timespec *ts)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	u32 sec, nsec;
-
-	/* The timestamp latches on lowest register read. For I210/I211, the
-	 * lowest register is SYSTIMR. Since we only need to provide nanosecond
-	 * resolution, we can ignore it.
-	 */
-	E1000_READ_REG(hw, E1000_SYSTIMR);
-	nsec = E1000_READ_REG(hw, E1000_SYSTIML);
-	sec = E1000_READ_REG(hw, E1000_SYSTIMH);
-
-	ts->tv_sec = sec;
-	ts->tv_nsec = nsec;
-}
-
-static void igb_ptp_write_i210(struct igb_adapter *adapter,
-			       const struct timespec *ts)
-{
-	struct e1000_hw *hw = &adapter->hw;
-
-	/*
-	 * Writing the SYSTIMR register is not necessary as it only provides
-	 * sub-nanosecond resolution.
-	 */
-	E1000_WRITE_REG(hw, E1000_SYSTIML, ts->tv_nsec);
-	E1000_WRITE_REG(hw, E1000_SYSTIMH, ts->tv_sec);
-}
-
-/**
- * igb_ptp_systim_to_hwtstamp - convert system time value to hw timestamp
- * @adapter: board private structure
- * @hwtstamps: timestamp structure to update
- * @systim: unsigned 64bit system time value.
- *
- * We need to convert the system time value stored in the RX/TXSTMP registers
- * into a hwtstamp which can be used by the upper level timestamping functions.
- *
- * The 'tmreg_lock' spinlock is used to protect the consistency of the
- * system time value. This is needed because reading the 64 bit time
- * value involves reading two (or three) 32 bit registers. The first
- * read latches the value. Ditto for writing.
- *
- * In addition, here have extended the system time with an overflow
- * counter in software.
- **/
-static void igb_ptp_systim_to_hwtstamp(struct igb_adapter *adapter,
-				       struct skb_shared_hwtstamps *hwtstamps,
-				       u64 systim)
-{
-	unsigned long flags;
-	u64 ns;
-
-	switch (adapter->hw.mac.type) {
-	case e1000_82576:
-	case e1000_82580:
-	case e1000_i350:
-	case e1000_i354:
-		spin_lock_irqsave(&adapter->tmreg_lock, flags);
-
-		ns = timecounter_cyc2time(&adapter->tc, systim);
-
-		spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
-
-		memset(hwtstamps, 0, sizeof(*hwtstamps));
-		hwtstamps->hwtstamp = ns_to_ktime(ns);
-		break;
-	case e1000_i210:
-	case e1000_i211:
-		memset(hwtstamps, 0, sizeof(*hwtstamps));
-		/* Upper 32 bits contain s, lower 32 bits contain ns. */
-		hwtstamps->hwtstamp = ktime_set(systim >> 32,
-						systim & 0xFFFFFFFF);
-		break;
-	default:
-		break;
-	}
-}
-
-/*
- * PTP clock operations
- */
-
-static int igb_ptp_adjfreq_82576(struct ptp_clock_info *ptp, s32 ppb)
-{
-	struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
-					       ptp_caps);
-	struct e1000_hw *hw = &igb->hw;
-	int neg_adj = 0;
-	u64 rate;
-	u32 incvalue;
-
-	if (ppb < 0) {
-		neg_adj = 1;
-		ppb = -ppb;
-	}
-	rate = ppb;
-	rate <<= 14;
-	rate = div_u64(rate, 1953125);
-
-	incvalue = 16 << IGB_82576_TSYNC_SHIFT;
-
-	if (neg_adj)
-		incvalue -= rate;
-	else
-		incvalue += rate;
-
-	E1000_WRITE_REG(hw, E1000_TIMINCA, INCPERIOD_82576 | (incvalue & INCVALUE_82576_MASK));
-
-	return 0;
-}
-
-static int igb_ptp_adjfreq_82580(struct ptp_clock_info *ptp, s32 ppb)
-{
-	struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
-					       ptp_caps);
-	struct e1000_hw *hw = &igb->hw;
-	int neg_adj = 0;
-	u64 rate;
-	u32 inca;
-
-	if (ppb < 0) {
-		neg_adj = 1;
-		ppb = -ppb;
-	}
-	rate = ppb;
-	rate <<= 26;
-	rate = div_u64(rate, 1953125);
-
-	/* At 2.5G speeds, the TIMINCA register on I354 updates the clock 2.5x
-	 * as quickly. Account for this by dividing the adjustment by 2.5.
-	 */
-	if (hw->mac.type == e1000_i354) {
-		u32 status = E1000_READ_REG(hw, E1000_STATUS);
-
-		if ((status & E1000_STATUS_2P5_SKU) &&
-		    !(status & E1000_STATUS_2P5_SKU_OVER)) {
-			rate <<= 1;
-			rate = div_u64(rate, 5);
-		}
-	}
-
-	inca = rate & INCVALUE_MASK;
-	if (neg_adj)
-		inca |= ISGN;
-
-	E1000_WRITE_REG(hw, E1000_TIMINCA, inca);
-
-	return 0;
-}
-
-static int igb_ptp_adjtime_82576(struct ptp_clock_info *ptp, s64 delta)
-{
-	struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
-					       ptp_caps);
-	unsigned long flags;
-	s64 now;
-
-	spin_lock_irqsave(&igb->tmreg_lock, flags);
-
-	now = timecounter_read(&igb->tc);
-	now += delta;
-	timecounter_init(&igb->tc, &igb->cc, now);
-
-	spin_unlock_irqrestore(&igb->tmreg_lock, flags);
-
-	return 0;
-}
-
-static int igb_ptp_adjtime_i210(struct ptp_clock_info *ptp, s64 delta)
-{
-	struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
-					       ptp_caps);
-	unsigned long flags;
-	struct timespec now, then = ns_to_timespec(delta);
-
-	spin_lock_irqsave(&igb->tmreg_lock, flags);
-
-	igb_ptp_read_i210(igb, &now);
-	now = timespec_add(now, then);
-	igb_ptp_write_i210(igb, (const struct timespec *)&now);
-
-	spin_unlock_irqrestore(&igb->tmreg_lock, flags);
-
-	return 0;
-}
-
-static int igb_ptp_gettime_82576(struct ptp_clock_info *ptp,
-				 struct timespec *ts)
-{
-	struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
-					       ptp_caps);
-	unsigned long flags;
-	u64 ns;
-	u32 remainder;
-
-	spin_lock_irqsave(&igb->tmreg_lock, flags);
-
-	ns = timecounter_read(&igb->tc);
-
-	spin_unlock_irqrestore(&igb->tmreg_lock, flags);
-
-	ts->tv_sec = div_u64_rem(ns, 1000000000, &remainder);
-	ts->tv_nsec = remainder;
-
-	return 0;
-}
-
-static int igb_ptp_gettime_i210(struct ptp_clock_info *ptp,
-				struct timespec *ts)
-{
-	struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
-					       ptp_caps);
-	unsigned long flags;
-
-	spin_lock_irqsave(&igb->tmreg_lock, flags);
-
-	igb_ptp_read_i210(igb, ts);
-
-	spin_unlock_irqrestore(&igb->tmreg_lock, flags);
-
-	return 0;
-}
-
-static int igb_ptp_settime_82576(struct ptp_clock_info *ptp,
-				 const struct timespec *ts)
-{
-	struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
-					       ptp_caps);
-	unsigned long flags;
-	u64 ns;
-
-	ns = ts->tv_sec * 1000000000ULL;
-	ns += ts->tv_nsec;
-
-	spin_lock_irqsave(&igb->tmreg_lock, flags);
-
-	timecounter_init(&igb->tc, &igb->cc, ns);
-
-	spin_unlock_irqrestore(&igb->tmreg_lock, flags);
-
-	return 0;
-}
-
-static int igb_ptp_settime_i210(struct ptp_clock_info *ptp,
-				const struct timespec *ts)
-{
-	struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
-					       ptp_caps);
-	unsigned long flags;
-
-	spin_lock_irqsave(&igb->tmreg_lock, flags);
-
-	igb_ptp_write_i210(igb, ts);
-
-	spin_unlock_irqrestore(&igb->tmreg_lock, flags);
-
-	return 0;
-}
-
-static int igb_ptp_enable(struct ptp_clock_info *ptp,
-			  struct ptp_clock_request *rq, int on)
-{
-	return -EOPNOTSUPP;
-}
-
-/**
- * igb_ptp_tx_work
- * @work: pointer to work struct
- *
- * This work function polls the TSYNCTXCTL valid bit to determine when a
- * timestamp has been taken for the current stored skb.
- */
-void igb_ptp_tx_work(struct work_struct *work)
-{
-	struct igb_adapter *adapter = container_of(work, struct igb_adapter,
-						   ptp_tx_work);
-	struct e1000_hw *hw = &adapter->hw;
-	u32 tsynctxctl;
-
-	if (!adapter->ptp_tx_skb)
-		return;
-
-	if (time_is_before_jiffies(adapter->ptp_tx_start +
-				   IGB_PTP_TX_TIMEOUT)) {
-		dev_kfree_skb_any(adapter->ptp_tx_skb);
-		adapter->ptp_tx_skb = NULL;
-		adapter->tx_hwtstamp_timeouts++;
-		dev_warn(&adapter->pdev->dev, "clearing Tx timestamp hang");
-		return;
-	}
-
-	tsynctxctl = E1000_READ_REG(hw, E1000_TSYNCTXCTL);
-	if (tsynctxctl & E1000_TSYNCTXCTL_VALID)
-		igb_ptp_tx_hwtstamp(adapter);
-	else
-		/* reschedule to check later */
-		schedule_work(&adapter->ptp_tx_work);
-}
-
-static void igb_ptp_overflow_check(struct work_struct *work)
-{
-	struct igb_adapter *igb =
-		container_of(work, struct igb_adapter, ptp_overflow_work.work);
-	struct timespec ts;
-
-	igb->ptp_caps.gettime(&igb->ptp_caps, &ts);
-
-	pr_debug("igb overflow check at %ld.%09lu\n", ts.tv_sec, ts.tv_nsec);
-
-	schedule_delayed_work(&igb->ptp_overflow_work,
-			      IGB_SYSTIM_OVERFLOW_PERIOD);
-}
-
-/**
- * igb_ptp_rx_hang - detect error case when Rx timestamp registers latched
- * @adapter: private network adapter structure
- *
- * This watchdog task is scheduled to detect error case where hardware has
- * dropped an Rx packet that was timestamped when the ring is full. The
- * particular error is rare but leaves the device in a state unable to timestamp
- * any future packets.
- */
-void igb_ptp_rx_hang(struct igb_adapter *adapter)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	struct igb_ring *rx_ring;
-	u32 tsyncrxctl = E1000_READ_REG(hw, E1000_TSYNCRXCTL);
-	unsigned long rx_event;
-	int n;
-
-	if (hw->mac.type != e1000_82576)
-		return;
-
-	/* If we don't have a valid timestamp in the registers, just update the
-	 * timeout counter and exit
-	 */
-	if (!(tsyncrxctl & E1000_TSYNCRXCTL_VALID)) {
-		adapter->last_rx_ptp_check = jiffies;
-		return;
-	}
-
-	/* Determine the most recent watchdog or rx_timestamp event */
-	rx_event = adapter->last_rx_ptp_check;
-	for (n = 0; n < adapter->num_rx_queues; n++) {
-		rx_ring = adapter->rx_ring[n];
-		if (time_after(rx_ring->last_rx_timestamp, rx_event))
-			rx_event = rx_ring->last_rx_timestamp;
-	}
-
-	/* Only need to read the high RXSTMP register to clear the lock */
-	if (time_is_before_jiffies(rx_event + 5 * HZ)) {
-		E1000_READ_REG(hw, E1000_RXSTMPH);
-		adapter->last_rx_ptp_check = jiffies;
-		adapter->rx_hwtstamp_cleared++;
-		dev_warn(&adapter->pdev->dev, "clearing Rx timestamp hang");
-	}
-}
-
-/**
- * igb_ptp_tx_hwtstamp - utility function which checks for TX time stamp
- * @adapter: Board private structure.
- *
- * If we were asked to do hardware stamping and such a time stamp is
- * available, then it must have been for this skb here because we only
- * allow only one such packet into the queue.
- */
-void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	struct skb_shared_hwtstamps shhwtstamps;
-	u64 regval;
-
-	regval = E1000_READ_REG(hw, E1000_TXSTMPL);
-	regval |= (u64)E1000_READ_REG(hw, E1000_TXSTMPH) << 32;
-
-	igb_ptp_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
-	skb_tstamp_tx(adapter->ptp_tx_skb, &shhwtstamps);
-	dev_kfree_skb_any(adapter->ptp_tx_skb);
-	adapter->ptp_tx_skb = NULL;
-}
-
-/**
- * igb_ptp_rx_pktstamp - retrieve Rx per packet timestamp
- * @q_vector: Pointer to interrupt specific structure
- * @va: Pointer to address containing Rx buffer
- * @skb: Buffer containing timestamp and packet
- *
- * This function is meant to retrieve a timestamp from the first buffer of an
- * incoming frame.  The value is stored in little endian format starting on
- * byte 8.
- */
-void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector,
-			 unsigned char *va,
-			 struct sk_buff *skb)
-{
-	__le64 *regval = (__le64 *)va;
-
-	/*
-	 * The timestamp is recorded in little endian format.
-	 * DWORD: 0        1        2        3
-	 * Field: Reserved Reserved SYSTIML  SYSTIMH
-	 */
-	igb_ptp_systim_to_hwtstamp(q_vector->adapter, skb_hwtstamps(skb),
-				   le64_to_cpu(regval[1]));
-}
-
-/**
- * igb_ptp_rx_rgtstamp - retrieve Rx timestamp stored in register
- * @q_vector: Pointer to interrupt specific structure
- * @skb: Buffer containing timestamp and packet
- *
- * This function is meant to retrieve a timestamp from the internal registers
- * of the adapter and store it in the skb.
- */
-void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector,
-			 struct sk_buff *skb)
-{
-	struct igb_adapter *adapter = q_vector->adapter;
-	struct e1000_hw *hw = &adapter->hw;
-	u64 regval;
-
-	/*
-	 * If this bit is set, then the RX registers contain the time stamp. No
-	 * other packet will be time stamped until we read these registers, so
-	 * read the registers to make them available again. Because only one
-	 * packet can be time stamped at a time, we know that the register
-	 * values must belong to this one here and therefore we don't need to
-	 * compare any of the additional attributes stored for it.
-	 *
-	 * If nothing went wrong, then it should have a shared tx_flags that we
-	 * can turn into a skb_shared_hwtstamps.
-	 */
-	if (!(E1000_READ_REG(hw, E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
-		return;
-
-	regval = E1000_READ_REG(hw, E1000_RXSTMPL);
-	regval |= (u64)E1000_READ_REG(hw, E1000_RXSTMPH) << 32;
-
-	igb_ptp_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
-}
-
-/**
- * igb_ptp_hwtstamp_ioctl - control hardware time stamping
- * @netdev:
- * @ifreq:
- * @cmd:
- *
- * Outgoing time stamping can be enabled and disabled. Play nice and
- * disable it when requested, although it shouldn't case any overhead
- * when no packet needs it. At most one packet in the queue may be
- * marked for time stamping, otherwise it would be impossible to tell
- * for sure to which packet the hardware time stamp belongs.
- *
- * Incoming time stamping has to be configured via the hardware
- * filters. Not all combinations are supported, in particular event
- * type has to be specified. Matching the kind of event packet is
- * not supported, with the exception of "all V2 events regardless of
- * level 2 or 4".
- *
- **/
-int igb_ptp_hwtstamp_ioctl(struct net_device *netdev,
-			   struct ifreq *ifr, int cmd)
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-	struct e1000_hw *hw = &adapter->hw;
-	struct hwtstamp_config config;
-	u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
-	u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
-	u32 tsync_rx_cfg = 0;
-	bool is_l4 = false;
-	bool is_l2 = false;
-	u32 regval;
-
-	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
-		return -EFAULT;
-
-	/* reserved for future extensions */
-	if (config.flags)
-		return -EINVAL;
-
-	switch (config.tx_type) {
-	case HWTSTAMP_TX_OFF:
-		tsync_tx_ctl = 0;
-	case HWTSTAMP_TX_ON:
-		break;
-	default:
-		return -ERANGE;
-	}
-
-	switch (config.rx_filter) {
-	case HWTSTAMP_FILTER_NONE:
-		tsync_rx_ctl = 0;
-		break;
-	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
-		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
-		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
-		is_l4 = true;
-		break;
-	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
-		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
-		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
-		is_l4 = true;
-		break;
-	case HWTSTAMP_FILTER_PTP_V2_EVENT:
-	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
-	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
-	case HWTSTAMP_FILTER_PTP_V2_SYNC:
-	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
-	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
-	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
-	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
-	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
-		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
-		config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
-		is_l2 = true;
-		is_l4 = true;
-		break;
-	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
-	case HWTSTAMP_FILTER_ALL:
-		/*
-		 * 82576 cannot timestamp all packets, which it needs to do to
-		 * support both V1 Sync and Delay_Req messages
-		 */
-		if (hw->mac.type != e1000_82576) {
-			tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
-			config.rx_filter = HWTSTAMP_FILTER_ALL;
-			break;
-		}
-		/* fall through */
-	default:
-		config.rx_filter = HWTSTAMP_FILTER_NONE;
-		return -ERANGE;
-	}
-
-	if (hw->mac.type == e1000_82575) {
-		if (tsync_rx_ctl | tsync_tx_ctl)
-			return -EINVAL;
-		return 0;
-	}
-
-	/*
-	 * Per-packet timestamping only works if all packets are
-	 * timestamped, so enable timestamping in all packets as
-	 * long as one rx filter was configured.
-	 */
-	if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) {
-		tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
-		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
-		config.rx_filter = HWTSTAMP_FILTER_ALL;
-		is_l2 = true;
-		is_l4 = true;
-
-		if ((hw->mac.type == e1000_i210) ||
-		    (hw->mac.type == e1000_i211)) {
-			regval = E1000_READ_REG(hw, E1000_RXPBS);
-			regval |= E1000_RXPBS_CFG_TS_EN;
-			E1000_WRITE_REG(hw, E1000_RXPBS, regval);
-		}
-	}
-
-	/* enable/disable TX */
-	regval = E1000_READ_REG(hw, E1000_TSYNCTXCTL);
-	regval &= ~E1000_TSYNCTXCTL_ENABLED;
-	regval |= tsync_tx_ctl;
-	E1000_WRITE_REG(hw, E1000_TSYNCTXCTL, regval);
-
-	/* enable/disable RX */
-	regval = E1000_READ_REG(hw, E1000_TSYNCRXCTL);
-	regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
-	regval |= tsync_rx_ctl;
-	E1000_WRITE_REG(hw, E1000_TSYNCRXCTL, regval);
-
-	/* define which PTP packets are time stamped */
-	E1000_WRITE_REG(hw, E1000_TSYNCRXCFG, tsync_rx_cfg);
-
-	/* define ethertype filter for timestamped packets */
-	if (is_l2)
-		E1000_WRITE_REG(hw, E1000_ETQF(3),
-		     (E1000_ETQF_FILTER_ENABLE | /* enable filter */
-		      E1000_ETQF_1588 | /* enable timestamping */
-		      ETH_P_1588));     /* 1588 eth protocol type */
-	else
-		E1000_WRITE_REG(hw, E1000_ETQF(3), 0);
-
-	/* L4 Queue Filter[3]: filter by destination port and protocol */
-	if (is_l4) {
-		u32 ftqf = (IPPROTO_UDP /* UDP */
-			| E1000_FTQF_VF_BP /* VF not compared */
-			| E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
-			| E1000_FTQF_MASK); /* mask all inputs */
-		ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
-
-		E1000_WRITE_REG(hw, E1000_IMIR(3), htons(PTP_EV_PORT));
-		E1000_WRITE_REG(hw, E1000_IMIREXT(3),
-		     (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
-		if (hw->mac.type == e1000_82576) {
-			/* enable source port check */
-			E1000_WRITE_REG(hw, E1000_SPQF(3), htons(PTP_EV_PORT));
-			ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
-		}
-		E1000_WRITE_REG(hw, E1000_FTQF(3), ftqf);
-	} else {
-		E1000_WRITE_REG(hw, E1000_FTQF(3), E1000_FTQF_MASK);
-	}
-	E1000_WRITE_FLUSH(hw);
-
-	/* clear TX/RX time stamp registers, just to be sure */
-	regval = E1000_READ_REG(hw, E1000_TXSTMPL);
-	regval = E1000_READ_REG(hw, E1000_TXSTMPH);
-	regval = E1000_READ_REG(hw, E1000_RXSTMPL);
-	regval = E1000_READ_REG(hw, E1000_RXSTMPH);
-
-	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
-		-EFAULT : 0;
-}
-
-void igb_ptp_init(struct igb_adapter *adapter)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	struct net_device *netdev = adapter->netdev;
-
-	switch (hw->mac.type) {
-	case e1000_82576:
-		snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr);
-		adapter->ptp_caps.owner = THIS_MODULE;
-		adapter->ptp_caps.max_adj = 999999881;
-		adapter->ptp_caps.n_ext_ts = 0;
-		adapter->ptp_caps.pps = 0;
-		adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82576;
-		adapter->ptp_caps.adjtime = igb_ptp_adjtime_82576;
-		adapter->ptp_caps.gettime = igb_ptp_gettime_82576;
-		adapter->ptp_caps.settime = igb_ptp_settime_82576;
-		adapter->ptp_caps.enable = igb_ptp_enable;
-		adapter->cc.read = igb_ptp_read_82576;
-		adapter->cc.mask = CLOCKSOURCE_MASK(64);
-		adapter->cc.mult = 1;
-		adapter->cc.shift = IGB_82576_TSYNC_SHIFT;
-		/* Dial the nominal frequency. */
-		E1000_WRITE_REG(hw, E1000_TIMINCA, INCPERIOD_82576 |
-						   INCVALUE_82576);
-		break;
-	case e1000_82580:
-	case e1000_i350:
-	case e1000_i354:
-		snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr);
-		adapter->ptp_caps.owner = THIS_MODULE;
-		adapter->ptp_caps.max_adj = 62499999;
-		adapter->ptp_caps.n_ext_ts = 0;
-		adapter->ptp_caps.pps = 0;
-		adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82580;
-		adapter->ptp_caps.adjtime = igb_ptp_adjtime_82576;
-		adapter->ptp_caps.gettime = igb_ptp_gettime_82576;
-		adapter->ptp_caps.settime = igb_ptp_settime_82576;
-		adapter->ptp_caps.enable = igb_ptp_enable;
-		adapter->cc.read = igb_ptp_read_82580;
-		adapter->cc.mask = CLOCKSOURCE_MASK(IGB_NBITS_82580);
-		adapter->cc.mult = 1;
-		adapter->cc.shift = 0;
-		/* Enable the timer functions by clearing bit 31. */
-		E1000_WRITE_REG(hw, E1000_TSAUXC, 0x0);
-		break;
-	case e1000_i210:
-	case e1000_i211:
-		snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr);
-		adapter->ptp_caps.owner = THIS_MODULE;
-		adapter->ptp_caps.max_adj = 62499999;
-		adapter->ptp_caps.n_ext_ts = 0;
-		adapter->ptp_caps.pps = 0;
-		adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82580;
-		adapter->ptp_caps.adjtime = igb_ptp_adjtime_i210;
-		adapter->ptp_caps.gettime = igb_ptp_gettime_i210;
-		adapter->ptp_caps.settime = igb_ptp_settime_i210;
-		adapter->ptp_caps.enable = igb_ptp_enable;
-		/* Enable the timer functions by clearing bit 31. */
-		E1000_WRITE_REG(hw, E1000_TSAUXC, 0x0);
-		break;
-	default:
-		adapter->ptp_clock = NULL;
-		return;
-	}
-
-	E1000_WRITE_FLUSH(hw);
-
-	spin_lock_init(&adapter->tmreg_lock);
-	INIT_WORK(&adapter->ptp_tx_work, igb_ptp_tx_work);
-
-	/* Initialize the clock and overflow work for devices that need it. */
-	if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) {
-		struct timespec ts = ktime_to_timespec(ktime_get_real());
-
-		igb_ptp_settime_i210(&adapter->ptp_caps, &ts);
-	} else {
-		timecounter_init(&adapter->tc, &adapter->cc,
-				 ktime_to_ns(ktime_get_real()));
-
-		INIT_DELAYED_WORK(&adapter->ptp_overflow_work,
-				  igb_ptp_overflow_check);
-
-		schedule_delayed_work(&adapter->ptp_overflow_work,
-				      IGB_SYSTIM_OVERFLOW_PERIOD);
-	}
-
-	/* Initialize the time sync interrupts for devices that support it. */
-	if (hw->mac.type >= e1000_82580) {
-		E1000_WRITE_REG(hw, E1000_TSIM, E1000_TSIM_TXTS);
-		E1000_WRITE_REG(hw, E1000_IMS, E1000_IMS_TS);
-	}
-
-	adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps,
-						&adapter->pdev->dev);
-	if (IS_ERR(adapter->ptp_clock)) {
-		adapter->ptp_clock = NULL;
-		dev_err(&adapter->pdev->dev, "ptp_clock_register failed\n");
-	} else {
-		dev_info(&adapter->pdev->dev, "added PHC on %s\n",
-			 adapter->netdev->name);
-		adapter->flags |= IGB_FLAG_PTP;
-	}
-}
-
-/**
- * igb_ptp_stop - Disable PTP device and stop the overflow check.
- * @adapter: Board private structure.
- *
- * This function stops the PTP support and cancels the delayed work.
- **/
-void igb_ptp_stop(struct igb_adapter *adapter)
-{
-	switch (adapter->hw.mac.type) {
-	case e1000_82576:
-	case e1000_82580:
-	case e1000_i350:
-	case e1000_i354:
-		cancel_delayed_work_sync(&adapter->ptp_overflow_work);
-		break;
-	case e1000_i210:
-	case e1000_i211:
-		/* No delayed work to cancel. */
-		break;
-	default:
-		return;
-	}
-
-	cancel_work_sync(&adapter->ptp_tx_work);
-	if (adapter->ptp_tx_skb) {
-		dev_kfree_skb_any(adapter->ptp_tx_skb);
-		adapter->ptp_tx_skb = NULL;
-	}
-
-	if (adapter->ptp_clock) {
-		ptp_clock_unregister(adapter->ptp_clock);
-		dev_info(&adapter->pdev->dev, "removed PHC on %s\n",
-			 adapter->netdev->name);
-		adapter->flags &= ~IGB_FLAG_PTP;
-	}
-}
-
-/**
- * igb_ptp_reset - Re-enable the adapter for PTP following a reset.
- * @adapter: Board private structure.
- *
- * This function handles the reset work required to re-enable the PTP device.
- **/
-void igb_ptp_reset(struct igb_adapter *adapter)
-{
-	struct e1000_hw *hw = &adapter->hw;
-
-	if (!(adapter->flags & IGB_FLAG_PTP))
-		return;
-
-	switch (adapter->hw.mac.type) {
-	case e1000_82576:
-		/* Dial the nominal frequency. */
-		E1000_WRITE_REG(hw, E1000_TIMINCA, INCPERIOD_82576 |
-						   INCVALUE_82576);
-		break;
-	case e1000_82580:
-	case e1000_i350:
-	case e1000_i354:
-	case e1000_i210:
-	case e1000_i211:
-		/* Enable the timer functions and interrupts. */
-		E1000_WRITE_REG(hw, E1000_TSAUXC, 0x0);
-		E1000_WRITE_REG(hw, E1000_TSIM, E1000_TSIM_TXTS);
-		E1000_WRITE_REG(hw, E1000_IMS, E1000_IMS_TS);
-		break;
-	default:
-		/* No work to do. */
-		return;
-	}
-
-	/* Re-initialize the timer. */
-	if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) {
-		struct timespec ts = ktime_to_timespec(ktime_get_real());
-
-		igb_ptp_settime_i210(&adapter->ptp_caps, &ts);
-	} else {
-		timecounter_init(&adapter->tc, &adapter->cc,
-				 ktime_to_ns(ktime_get_real()));
-	}
-}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_regtest.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_regtest.h
index 18da64a3..9d49b45e 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_regtest.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_regtest.h
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.c
index 015c8952..205da562 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.c
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.h
index e51e7c4e..c6d4c568 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.h
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.c
deleted file mode 100644
index bde3a83c..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.c
+++ /dev/null
@@ -1,1482 +0,0 @@
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2013 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-#include "igb.h"
-#include "kcompat.h"
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,8) )
-/* From lib/vsprintf.c */
-#include <asm/div64.h>
-
-static int skip_atoi(const char **s)
-{
-	int i=0;
-
-	while (isdigit(**s))
-		i = i*10 + *((*s)++) - '0';
-	return i;
-}
-
-#define _kc_ZEROPAD	1		/* pad with zero */
-#define _kc_SIGN	2		/* unsigned/signed long */
-#define _kc_PLUS	4		/* show plus */
-#define _kc_SPACE	8		/* space if plus */
-#define _kc_LEFT	16		/* left justified */
-#define _kc_SPECIAL	32		/* 0x */
-#define _kc_LARGE	64		/* use 'ABCDEF' instead of 'abcdef' */
-
-static char * number(char * buf, char * end, long long num, int base, int size, int precision, int type)
-{
-	char c,sign,tmp[66];
-	const char *digits;
-	const char small_digits[] = "0123456789abcdefghijklmnopqrstuvwxyz";
-	const char large_digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
-	int i;
-
-	digits = (type & _kc_LARGE) ? large_digits : small_digits;
-	if (type & _kc_LEFT)
-		type &= ~_kc_ZEROPAD;
-	if (base < 2 || base > 36)
-		return 0;
-	c = (type & _kc_ZEROPAD) ? '0' : ' ';
-	sign = 0;
-	if (type & _kc_SIGN) {
-		if (num < 0) {
-			sign = '-';
-			num = -num;
-			size--;
-		} else if (type & _kc_PLUS) {
-			sign = '+';
-			size--;
-		} else if (type & _kc_SPACE) {
-			sign = ' ';
-			size--;
-		}
-	}
-	if (type & _kc_SPECIAL) {
-		if (base == 16)
-			size -= 2;
-		else if (base == 8)
-			size--;
-	}
-	i = 0;
-	if (num == 0)
-		tmp[i++]='0';
-	else while (num != 0)
-		tmp[i++] = digits[do_div(num,base)];
-	if (i > precision)
-		precision = i;
-	size -= precision;
-	if (!(type&(_kc_ZEROPAD+_kc_LEFT))) {
-		while(size-->0) {
-			if (buf <= end)
-				*buf = ' ';
-			++buf;
-		}
-	}
-	if (sign) {
-		if (buf <= end)
-			*buf = sign;
-		++buf;
-	}
-	if (type & _kc_SPECIAL) {
-		if (base==8) {
-			if (buf <= end)
-				*buf = '0';
-			++buf;
-		} else if (base==16) {
-			if (buf <= end)
-				*buf = '0';
-			++buf;
-			if (buf <= end)
-				*buf = digits[33];
-			++buf;
-		}
-	}
-	if (!(type & _kc_LEFT)) {
-		while (size-- > 0) {
-			if (buf <= end)
-				*buf = c;
-			++buf;
-		}
-	}
-	while (i < precision--) {
-		if (buf <= end)
-			*buf = '0';
-		++buf;
-	}
-	while (i-- > 0) {
-		if (buf <= end)
-			*buf = tmp[i];
-		++buf;
-	}
-	while (size-- > 0) {
-		if (buf <= end)
-			*buf = ' ';
-		++buf;
-	}
-	return buf;
-}
-
-int _kc_vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
-{
-	int len;
-	unsigned long long num;
-	int i, base;
-	char *str, *end, c;
-	const char *s;
-
-	int flags;		/* flags to number() */
-
-	int field_width;	/* width of output field */
-	int precision;		/* min. # of digits for integers; max
-				   number of chars for from string */
-	int qualifier;		/* 'h', 'l', or 'L' for integer fields */
-				/* 'z' support added 23/7/1999 S.H.    */
-				/* 'z' changed to 'Z' --davidm 1/25/99 */
-
-	str = buf;
-	end = buf + size - 1;
-
-	if (end < buf - 1) {
-		end = ((void *) -1);
-		size = end - buf + 1;
-	}
-
-	for (; *fmt ; ++fmt) {
-		if (*fmt != '%') {
-			if (str <= end)
-				*str = *fmt;
-			++str;
-			continue;
-		}
-
-		/* process flags */
-		flags = 0;
-		repeat:
-			++fmt;		/* this also skips first '%' */
-			switch (*fmt) {
-				case '-': flags |= _kc_LEFT; goto repeat;
-				case '+': flags |= _kc_PLUS; goto repeat;
-				case ' ': flags |= _kc_SPACE; goto repeat;
-				case '#': flags |= _kc_SPECIAL; goto repeat;
-				case '0': flags |= _kc_ZEROPAD; goto repeat;
-			}
-
-		/* get field width */
-		field_width = -1;
-		if (isdigit(*fmt))
-			field_width = skip_atoi(&fmt);
-		else if (*fmt == '*') {
-			++fmt;
-			/* it's the next argument */
-			field_width = va_arg(args, int);
-			if (field_width < 0) {
-				field_width = -field_width;
-				flags |= _kc_LEFT;
-			}
-		}
-
-		/* get the precision */
-		precision = -1;
-		if (*fmt == '.') {
-			++fmt;
-			if (isdigit(*fmt))
-				precision = skip_atoi(&fmt);
-			else if (*fmt == '*') {
-				++fmt;
-				/* it's the next argument */
-				precision = va_arg(args, int);
-			}
-			if (precision < 0)
-				precision = 0;
-		}
-
-		/* get the conversion qualifier */
-		qualifier = -1;
-		if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || *fmt =='Z') {
-			qualifier = *fmt;
-			++fmt;
-		}
-
-		/* default base */
-		base = 10;
-
-		switch (*fmt) {
-			case 'c':
-				if (!(flags & _kc_LEFT)) {
-					while (--field_width > 0) {
-						if (str <= end)
-							*str = ' ';
-						++str;
-					}
-				}
-				c = (unsigned char) va_arg(args, int);
-				if (str <= end)
-					*str = c;
-				++str;
-				while (--field_width > 0) {
-					if (str <= end)
-						*str = ' ';
-					++str;
-				}
-				continue;
-
-			case 's':
-				s = va_arg(args, char *);
-				if (!s)
-					s = "<NULL>";
-
-				len = strnlen(s, precision);
-
-				if (!(flags & _kc_LEFT)) {
-					while (len < field_width--) {
-						if (str <= end)
-							*str = ' ';
-						++str;
-					}
-				}
-				for (i = 0; i < len; ++i) {
-					if (str <= end)
-						*str = *s;
-					++str; ++s;
-				}
-				while (len < field_width--) {
-					if (str <= end)
-						*str = ' ';
-					++str;
-				}
-				continue;
-
-			case 'p':
-				if (field_width == -1) {
-					field_width = 2*sizeof(void *);
-					flags |= _kc_ZEROPAD;
-				}
-				str = number(str, end,
-						(unsigned long) va_arg(args, void *),
-						16, field_width, precision, flags);
-				continue;
-
-
-			case 'n':
-				/* FIXME:
-				* What does C99 say about the overflow case here? */
-				if (qualifier == 'l') {
-					long * ip = va_arg(args, long *);
-					*ip = (str - buf);
-				} else if (qualifier == 'Z') {
-					size_t * ip = va_arg(args, size_t *);
-					*ip = (str - buf);
-				} else {
-					int * ip = va_arg(args, int *);
-					*ip = (str - buf);
-				}
-				continue;
-
-			case '%':
-				if (str <= end)
-					*str = '%';
-				++str;
-				continue;
-
-				/* integer number formats - set up the flags and "break" */
-			case 'o':
-				base = 8;
-				break;
-
-			case 'X':
-				flags |= _kc_LARGE;
-			case 'x':
-				base = 16;
-				break;
-
-			case 'd':
-			case 'i':
-				flags |= _kc_SIGN;
-			case 'u':
-				break;
-
-			default:
-				if (str <= end)
-					*str = '%';
-				++str;
-				if (*fmt) {
-					if (str <= end)
-						*str = *fmt;
-					++str;
-				} else {
-					--fmt;
-				}
-				continue;
-		}
-		if (qualifier == 'L')
-			num = va_arg(args, long long);
-		else if (qualifier == 'l') {
-			num = va_arg(args, unsigned long);
-			if (flags & _kc_SIGN)
-				num = (signed long) num;
-		} else if (qualifier == 'Z') {
-			num = va_arg(args, size_t);
-		} else if (qualifier == 'h') {
-			num = (unsigned short) va_arg(args, int);
-			if (flags & _kc_SIGN)
-				num = (signed short) num;
-		} else {
-			num = va_arg(args, unsigned int);
-			if (flags & _kc_SIGN)
-				num = (signed int) num;
-		}
-		str = number(str, end, num, base,
-				field_width, precision, flags);
-	}
-	if (str <= end)
-		*str = '\0';
-	else if (size > 0)
-		/* don't write out a null byte if the buf size is zero */
-		*end = '\0';
-	/* the trailing null byte doesn't count towards the total
-	* ++str;
-	*/
-	return str-buf;
-}
-
-int _kc_snprintf(char * buf, size_t size, const char *fmt, ...)
-{
-	va_list args;
-	int i;
-
-	va_start(args, fmt);
-	i = _kc_vsnprintf(buf,size,fmt,args);
-	va_end(args);
-	return i;
-}
-#endif /* < 2.4.8 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,13) )
-
-/**************************************/
-/* PCI DMA MAPPING */
-
-#if defined(CONFIG_HIGHMEM)
-
-#ifndef PCI_DRAM_OFFSET
-#define PCI_DRAM_OFFSET 0
-#endif
-
-u64
-_kc_pci_map_page(struct pci_dev *dev, struct page *page, unsigned long offset,
-                 size_t size, int direction)
-{
-	return (((u64) (page - mem_map) << PAGE_SHIFT) + offset +
-		PCI_DRAM_OFFSET);
-}
-
-#else /* CONFIG_HIGHMEM */
-
-u64
-_kc_pci_map_page(struct pci_dev *dev, struct page *page, unsigned long offset,
-                 size_t size, int direction)
-{
-	return pci_map_single(dev, (void *)page_address(page) + offset, size,
-			      direction);
-}
-
-#endif /* CONFIG_HIGHMEM */
-
-void
-_kc_pci_unmap_page(struct pci_dev *dev, u64 dma_addr, size_t size,
-                   int direction)
-{
-	return pci_unmap_single(dev, dma_addr, size, direction);
-}
-
-#endif /* 2.4.13 => 2.4.3 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,3) )
-
-/**************************************/
-/* PCI DRIVER API */
-
-int
-_kc_pci_set_dma_mask(struct pci_dev *dev, dma_addr_t mask)
-{
-	if (!pci_dma_supported(dev, mask))
-		return -EIO;
-	dev->dma_mask = mask;
-	return 0;
-}
-
-int
-_kc_pci_request_regions(struct pci_dev *dev, char *res_name)
-{
-	int i;
-
-	for (i = 0; i < 6; i++) {
-		if (pci_resource_len(dev, i) == 0)
-			continue;
-
-		if (pci_resource_flags(dev, i) & IORESOURCE_IO) {
-			if (!request_region(pci_resource_start(dev, i), pci_resource_len(dev, i), res_name)) {
-				pci_release_regions(dev);
-				return -EBUSY;
-			}
-		} else if (pci_resource_flags(dev, i) & IORESOURCE_MEM) {
-			if (!request_mem_region(pci_resource_start(dev, i), pci_resource_len(dev, i), res_name)) {
-				pci_release_regions(dev);
-				return -EBUSY;
-			}
-		}
-	}
-	return 0;
-}
-
-void
-_kc_pci_release_regions(struct pci_dev *dev)
-{
-	int i;
-
-	for (i = 0; i < 6; i++) {
-		if (pci_resource_len(dev, i) == 0)
-			continue;
-
-		if (pci_resource_flags(dev, i) & IORESOURCE_IO)
-			release_region(pci_resource_start(dev, i), pci_resource_len(dev, i));
-
-		else if (pci_resource_flags(dev, i) & IORESOURCE_MEM)
-			release_mem_region(pci_resource_start(dev, i), pci_resource_len(dev, i));
-	}
-}
-
-/**************************************/
-/* NETWORK DRIVER API */
-
-struct net_device *
-_kc_alloc_etherdev(int sizeof_priv)
-{
-	struct net_device *dev;
-	int alloc_size;
-
-	alloc_size = sizeof(*dev) + sizeof_priv + IFNAMSIZ + 31;
-	dev = kzalloc(alloc_size, GFP_KERNEL);
-	if (!dev)
-		return NULL;
-
-	if (sizeof_priv)
-		dev->priv = (void *) (((unsigned long)(dev + 1) + 31) & ~31);
-	dev->name[0] = '\0';
-	ether_setup(dev);
-
-	return dev;
-}
-
-int
-_kc_is_valid_ether_addr(u8 *addr)
-{
-	const char zaddr[6] = { 0, };
-
-	return !(addr[0] & 1) && memcmp(addr, zaddr, 6);
-}
-
-#endif /* 2.4.3 => 2.4.0 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,6) )
-
-int
-_kc_pci_set_power_state(struct pci_dev *dev, int state)
-{
-	return 0;
-}
-
-int
-_kc_pci_enable_wake(struct pci_dev *pdev, u32 state, int enable)
-{
-	return 0;
-}
-
-#endif /* 2.4.6 => 2.4.3 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) )
-void _kc_skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page,
-                            int off, int size)
-{
-	skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
-	frag->page = page;
-	frag->page_offset = off;
-	frag->size = size;
-	skb_shinfo(skb)->nr_frags = i + 1;
-}
-
-/*
- * Original Copyright:
- * find_next_bit.c: fallback find next bit implementation
- *
- * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- */
-
-/**
- * find_next_bit - find the next set bit in a memory region
- * @addr: The address to base the search on
- * @offset: The bitnumber to start searching at
- * @size: The maximum size to search
- */
-unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
-                            unsigned long offset)
-{
-	const unsigned long *p = addr + BITOP_WORD(offset);
-	unsigned long result = offset & ~(BITS_PER_LONG-1);
-	unsigned long tmp;
-
-	if (offset >= size)
-		return size;
-	size -= result;
-	offset %= BITS_PER_LONG;
-	if (offset) {
-		tmp = *(p++);
-		tmp &= (~0UL << offset);
-		if (size < BITS_PER_LONG)
-			goto found_first;
-		if (tmp)
-			goto found_middle;
-		size -= BITS_PER_LONG;
-		result += BITS_PER_LONG;
-	}
-	while (size & ~(BITS_PER_LONG-1)) {
-		if ((tmp = *(p++)))
-			goto found_middle;
-		result += BITS_PER_LONG;
-		size -= BITS_PER_LONG;
-	}
-	if (!size)
-		return result;
-	tmp = *p;
-
-found_first:
-	tmp &= (~0UL >> (BITS_PER_LONG - size));
-	if (tmp == 0UL)		/* Are any bits set? */
-		return result + size;	/* Nope. */
-found_middle:
-	return result + ffs(tmp);
-}
-
-size_t _kc_strlcpy(char *dest, const char *src, size_t size)
-{
-	size_t ret = strlen(src);
-
-	if (size) {
-		size_t len = (ret >= size) ? size - 1 : ret;
-		memcpy(dest, src, len);
-		dest[len] = '\0';
-	}
-	return ret;
-}
-
-#ifndef do_div
-#if BITS_PER_LONG == 32
-uint32_t __attribute__((weak)) _kc__div64_32(uint64_t *n, uint32_t base)
-{
-	uint64_t rem = *n;
-	uint64_t b = base;
-	uint64_t res, d = 1;
-	uint32_t high = rem >> 32;
-
-	/* Reduce the thing a bit first */
-	res = 0;
-	if (high >= base) {
-		high /= base;
-		res = (uint64_t) high << 32;
-		rem -= (uint64_t) (high*base) << 32;
-	}
-
-	while ((int64_t)b > 0 && b < rem) {
-		b = b+b;
-		d = d+d;
-	}
-
-	do {
-		if (rem >= b) {
-			rem -= b;
-			res += d;
-		}
-		b >>= 1;
-		d >>= 1;
-	} while (d);
-
-	*n = res;
-	return rem;
-}
-#endif /* BITS_PER_LONG == 32 */
-#endif /* do_div */
-#endif /* 2.6.0 => 2.4.6 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) )
-int _kc_scnprintf(char * buf, size_t size, const char *fmt, ...)
-{
-	va_list args;
-	int i;
-
-	va_start(args, fmt);
-	i = vsnprintf(buf, size, fmt, args);
-	va_end(args);
-	return (i >= size) ? (size - 1) : i;
-}
-#endif /* < 2.6.4 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,10) )
-DECLARE_BITMAP(_kcompat_node_online_map, MAX_NUMNODES) = {1};
-#endif /* < 2.6.10 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,13) )
-char *_kc_kstrdup(const char *s, unsigned int gfp)
-{
-	size_t len;
-	char *buf;
-
-	if (!s)
-		return NULL;
-
-	len = strlen(s) + 1;
-	buf = kmalloc(len, gfp);
-	if (buf)
-		memcpy(buf, s, len);
-	return buf;
-}
-#endif /* < 2.6.13 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14) )
-void *_kc_kzalloc(size_t size, int flags)
-{
-	void *ret = kmalloc(size, flags);
-	if (ret)
-		memset(ret, 0, size);
-	return ret;
-}
-#endif /* <= 2.6.13 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) )
-int _kc_skb_pad(struct sk_buff *skb, int pad)
-{
-	int ntail;
-
-        /* If the skbuff is non linear tailroom is always zero.. */
-        if(!skb_cloned(skb) && skb_tailroom(skb) >= pad) {
-		memset(skb->data+skb->len, 0, pad);
-		return 0;
-        }
-
-	ntail = skb->data_len + pad - (skb->end - skb->tail);
-	if (likely(skb_cloned(skb) || ntail > 0)) {
-		if (pskb_expand_head(skb, 0, ntail, GFP_ATOMIC));
-			goto free_skb;
-	}
-
-#ifdef MAX_SKB_FRAGS
-	if (skb_is_nonlinear(skb) &&
-	    !__pskb_pull_tail(skb, skb->data_len))
-		goto free_skb;
-
-#endif
-	memset(skb->data + skb->len, 0, pad);
-        return 0;
-
-free_skb:
-	kfree_skb(skb);
-	return -ENOMEM;
-}
-
-#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,4)))
-int _kc_pci_save_state(struct pci_dev *pdev)
-{
-	struct net_device *netdev = pci_get_drvdata(pdev);
-	struct adapter_struct *adapter = netdev_priv(netdev);
-	int size = PCI_CONFIG_SPACE_LEN, i;
-	u16 pcie_cap_offset, pcie_link_status;
-
-#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) )
-	/* no ->dev for 2.4 kernels */
-	WARN_ON(pdev->dev.driver_data == NULL);
-#endif
-	pcie_cap_offset = pci_find_capability(pdev, PCI_CAP_ID_EXP);
-	if (pcie_cap_offset) {
-		if (!pci_read_config_word(pdev,
-		                          pcie_cap_offset + PCIE_LINK_STATUS,
-		                          &pcie_link_status))
-		size = PCIE_CONFIG_SPACE_LEN;
-	}
-	pci_config_space_ich8lan();
-#ifdef HAVE_PCI_ERS
-	if (adapter->config_space == NULL)
-#else
-	WARN_ON(adapter->config_space != NULL);
-#endif
-		adapter->config_space = kmalloc(size, GFP_KERNEL);
-	if (!adapter->config_space) {
-		printk(KERN_ERR "Out of memory in pci_save_state\n");
-		return -ENOMEM;
-	}
-	for (i = 0; i < (size / 4); i++)
-		pci_read_config_dword(pdev, i * 4, &adapter->config_space[i]);
-	return 0;
-}
-
-void _kc_pci_restore_state(struct pci_dev *pdev)
-{
-	struct net_device *netdev = pci_get_drvdata(pdev);
-	struct adapter_struct *adapter = netdev_priv(netdev);
-	int size = PCI_CONFIG_SPACE_LEN, i;
-	u16 pcie_cap_offset;
-	u16 pcie_link_status;
-
-	if (adapter->config_space != NULL) {
-		pcie_cap_offset = pci_find_capability(pdev, PCI_CAP_ID_EXP);
-		if (pcie_cap_offset &&
-		    !pci_read_config_word(pdev,
-		                          pcie_cap_offset + PCIE_LINK_STATUS,
-		                          &pcie_link_status))
-			size = PCIE_CONFIG_SPACE_LEN;
-
-		pci_config_space_ich8lan();
-		for (i = 0; i < (size / 4); i++)
-		pci_write_config_dword(pdev, i * 4, adapter->config_space[i]);
-#ifndef HAVE_PCI_ERS
-		kfree(adapter->config_space);
-		adapter->config_space = NULL;
-#endif
-	}
-}
-#endif /* !(RHEL_RELEASE_CODE >= RHEL 5.4) */
-
-#ifdef HAVE_PCI_ERS
-void _kc_free_netdev(struct net_device *netdev)
-{
-	struct adapter_struct *adapter = netdev_priv(netdev);
-
-	if (adapter->config_space != NULL)
-		kfree(adapter->config_space);
-#ifdef CONFIG_SYSFS
-	if (netdev->reg_state == NETREG_UNINITIALIZED) {
-		kfree((char *)netdev - netdev->padded);
-	} else {
-		BUG_ON(netdev->reg_state != NETREG_UNREGISTERED);
-		netdev->reg_state = NETREG_RELEASED;
-		class_device_put(&netdev->class_dev);
-	}
-#else
-	kfree((char *)netdev - netdev->padded);
-#endif
-}
-#endif
-
-void *_kc_kmemdup(const void *src, size_t len, unsigned gfp)
-{
-	void *p;
-
-	p = kzalloc(len, gfp);
-	if (p)
-		memcpy(p, src, len);
-	return p;
-}
-#endif /* <= 2.6.19 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21) )
-struct pci_dev *_kc_netdev_to_pdev(struct net_device *netdev)
-{
-	return ((struct adapter_struct *)netdev_priv(netdev))->pdev;
-}
-#endif /* < 2.6.21 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) )
-/* hexdump code taken from lib/hexdump.c */
-static void _kc_hex_dump_to_buffer(const void *buf, size_t len, int rowsize,
-			int groupsize, unsigned char *linebuf,
-			size_t linebuflen, bool ascii)
-{
-	const u8 *ptr = buf;
-	u8 ch;
-	int j, lx = 0;
-	int ascii_column;
-
-	if (rowsize != 16 && rowsize != 32)
-		rowsize = 16;
-
-	if (!len)
-		goto nil;
-	if (len > rowsize)		/* limit to one line at a time */
-		len = rowsize;
-	if ((len % groupsize) != 0)	/* no mixed size output */
-		groupsize = 1;
-
-	switch (groupsize) {
-	case 8: {
-		const u64 *ptr8 = buf;
-		int ngroups = len / groupsize;
-
-		for (j = 0; j < ngroups; j++)
-			lx += scnprintf((char *)(linebuf + lx), linebuflen - lx,
-				"%s%16.16llx", j ? " " : "",
-				(unsigned long long)*(ptr8 + j));
-		ascii_column = 17 * ngroups + 2;
-		break;
-	}
-
-	case 4: {
-		const u32 *ptr4 = buf;
-		int ngroups = len / groupsize;
-
-		for (j = 0; j < ngroups; j++)
-			lx += scnprintf((char *)(linebuf + lx), linebuflen - lx,
-				"%s%8.8x", j ? " " : "", *(ptr4 + j));
-		ascii_column = 9 * ngroups + 2;
-		break;
-	}
-
-	case 2: {
-		const u16 *ptr2 = buf;
-		int ngroups = len / groupsize;
-
-		for (j = 0; j < ngroups; j++)
-			lx += scnprintf((char *)(linebuf + lx), linebuflen - lx,
-				"%s%4.4x", j ? " " : "", *(ptr2 + j));
-		ascii_column = 5 * ngroups + 2;
-		break;
-	}
-
-	default:
-		for (j = 0; (j < len) && (lx + 3) <= linebuflen; j++) {
-			ch = ptr[j];
-			linebuf[lx++] = hex_asc(ch >> 4);
-			linebuf[lx++] = hex_asc(ch & 0x0f);
-			linebuf[lx++] = ' ';
-		}
-		if (j)
-			lx--;
-
-		ascii_column = 3 * rowsize + 2;
-		break;
-	}
-	if (!ascii)
-		goto nil;
-
-	while (lx < (linebuflen - 1) && lx < (ascii_column - 1))
-		linebuf[lx++] = ' ';
-	for (j = 0; (j < len) && (lx + 2) < linebuflen; j++)
-		linebuf[lx++] = (isascii(ptr[j]) && isprint(ptr[j])) ? ptr[j]
-				: '.';
-nil:
-	linebuf[lx++] = '\0';
-}
-
-void _kc_print_hex_dump(const char *level,
-			const char *prefix_str, int prefix_type,
-			int rowsize, int groupsize,
-			const void *buf, size_t len, bool ascii)
-{
-	const u8 *ptr = buf;
-	int i, linelen, remaining = len;
-	unsigned char linebuf[200];
-
-	if (rowsize != 16 && rowsize != 32)
-		rowsize = 16;
-
-	for (i = 0; i < len; i += rowsize) {
-		linelen = min(remaining, rowsize);
-		remaining -= rowsize;
-		_kc_hex_dump_to_buffer(ptr + i, linelen, rowsize, groupsize,
-				linebuf, sizeof(linebuf), ascii);
-
-		switch (prefix_type) {
-		case DUMP_PREFIX_ADDRESS:
-			printk("%s%s%*p: %s\n", level, prefix_str,
-				(int)(2 * sizeof(void *)), ptr + i, linebuf);
-			break;
-		case DUMP_PREFIX_OFFSET:
-			printk("%s%s%.8x: %s\n", level, prefix_str, i, linebuf);
-			break;
-		default:
-			printk("%s%s%s\n", level, prefix_str, linebuf);
-			break;
-		}
-	}
-}
-
-#ifdef HAVE_I2C_SUPPORT
-struct i2c_client *
-_kc_i2c_new_device(struct i2c_adapter *adap, struct i2c_board_info const *info)
-{
-	struct i2c_client	*client;
-	int			status;
-
-	client = kzalloc(sizeof *client, GFP_KERNEL);
-	if (!client)
-		return NULL;
-
-	client->adapter = adap;
-
-	client->dev.platform_data = info->platform_data;
-
-	client->flags = info->flags;
-	client->addr = info->addr;
-
-	strlcpy(client->name, info->type, sizeof(client->name));
-
-	/* Check for address business */
-	status = i2c_check_addr(adap, client->addr);
-	if (status)
-		goto out_err;
-
-	client->dev.parent = &client->adapter->dev;
-	client->dev.bus = &i2c_bus_type;
-
-	status = i2c_attach_client(client);
-	if (status)
-		goto out_err;
-
-	dev_dbg(&adap->dev, "client [%s] registered with bus id %s\n",
-		client->name, dev_name(&client->dev));
-
-	return client;
-
-out_err:
-	dev_err(&adap->dev, "Failed to register i2c client %s at 0x%02x "
-		"(%d)\n", client->name, client->addr, status);
-	kfree(client);
-	return NULL;
-}
-#endif /* HAVE_I2C_SUPPORT */
-#endif /* < 2.6.22 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) )
-#ifdef NAPI
-struct net_device *napi_to_poll_dev(const struct napi_struct *napi)
-{
-	struct adapter_q_vector *q_vector = container_of(napi,
-	                                                struct adapter_q_vector,
-	                                                napi);
-	return &q_vector->poll_dev;
-}
-
-int __kc_adapter_clean(struct net_device *netdev, int *budget)
-{
-	int work_done;
-	int work_to_do = min(*budget, netdev->quota);
-	/* kcompat.h netif_napi_add puts napi struct in "fake netdev->priv" */
-	struct napi_struct *napi = netdev->priv;
-	work_done = napi->poll(napi, work_to_do);
-	*budget -= work_done;
-	netdev->quota -= work_done;
-	return (work_done >= work_to_do) ? 1 : 0;
-}
-#endif /* NAPI */
-#endif /* <= 2.6.24 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26) )
-void _kc_pci_disable_link_state(struct pci_dev *pdev, int state)
-{
-	struct pci_dev *parent = pdev->bus->self;
-	u16 link_state;
-	int pos;
-
-	if (!parent)
-		return;
-
-	pos = pci_find_capability(parent, PCI_CAP_ID_EXP);
-	if (pos) {
-		pci_read_config_word(parent, pos + PCI_EXP_LNKCTL, &link_state);
-		link_state &= ~state;
-		pci_write_config_word(parent, pos + PCI_EXP_LNKCTL, link_state);
-	}
-}
-#endif /* < 2.6.26 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27) )
-#ifdef HAVE_TX_MQ
-void _kc_netif_tx_stop_all_queues(struct net_device *netdev)
-{
-	struct adapter_struct *adapter = netdev_priv(netdev);
-	int i;
-
-	netif_stop_queue(netdev);
-	if (netif_is_multiqueue(netdev))
-		for (i = 0; i < adapter->num_tx_queues; i++)
-			netif_stop_subqueue(netdev, i);
-}
-void _kc_netif_tx_wake_all_queues(struct net_device *netdev)
-{
-	struct adapter_struct *adapter = netdev_priv(netdev);
-	int i;
-
-	netif_wake_queue(netdev);
-	if (netif_is_multiqueue(netdev))
-		for (i = 0; i < adapter->num_tx_queues; i++)
-			netif_wake_subqueue(netdev, i);
-}
-void _kc_netif_tx_start_all_queues(struct net_device *netdev)
-{
-	struct adapter_struct *adapter = netdev_priv(netdev);
-	int i;
-
-	netif_start_queue(netdev);
-	if (netif_is_multiqueue(netdev))
-		for (i = 0; i < adapter->num_tx_queues; i++)
-			netif_start_subqueue(netdev, i);
-}
-#endif /* HAVE_TX_MQ */
-
-#ifndef __WARN_printf
-void __kc_warn_slowpath(const char *file, int line, const char *fmt, ...)
-{
-	va_list args;
-
-	printk(KERN_WARNING "------------[ cut here ]------------\n");
-	printk(KERN_WARNING "WARNING: at %s:%d %s()\n", file, line);
-	va_start(args, fmt);
-	vprintk(fmt, args);
-	va_end(args);
-
-	dump_stack();
-}
-#endif /* __WARN_printf */
-#endif /* < 2.6.27 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) )
-
-int
-_kc_pci_prepare_to_sleep(struct pci_dev *dev)
-{
-	pci_power_t target_state;
-	int error;
-
-	target_state = pci_choose_state(dev, PMSG_SUSPEND);
-
-	pci_enable_wake(dev, target_state, true);
-
-	error = pci_set_power_state(dev, target_state);
-
-	if (error)
-		pci_enable_wake(dev, target_state, false);
-
-	return error;
-}
-
-int
-_kc_pci_wake_from_d3(struct pci_dev *dev, bool enable)
-{
-	int err;
-
-	err = pci_enable_wake(dev, PCI_D3cold, enable);
-	if (err)
-		goto out;
-
-	err = pci_enable_wake(dev, PCI_D3hot, enable);
-
-out:
-	return err;
-}
-#endif /* < 2.6.28 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,29) )
-static void __kc_pci_set_master(struct pci_dev *pdev, bool enable)
-{
-	u16 old_cmd, cmd;
-
-	pci_read_config_word(pdev, PCI_COMMAND, &old_cmd);
-	if (enable)
-		cmd = old_cmd | PCI_COMMAND_MASTER;
-	else
-		cmd = old_cmd & ~PCI_COMMAND_MASTER;
-	if (cmd != old_cmd) {
-		dev_dbg(pci_dev_to_dev(pdev), "%s bus mastering\n",
-			enable ? "enabling" : "disabling");
-		pci_write_config_word(pdev, PCI_COMMAND, cmd);
-	}
-#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,7) )
-	pdev->is_busmaster = enable;
-#endif
-}
-
-void _kc_pci_clear_master(struct pci_dev *dev)
-{
-	__kc_pci_set_master(dev, false);
-}
-#endif /* < 2.6.29 */
-
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,34) )
-#if (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(6,0))
-int _kc_pci_num_vf(struct pci_dev *dev)
-{
-	int num_vf = 0;
-#ifdef CONFIG_PCI_IOV
-	struct pci_dev *vfdev;
-
-	/* loop through all ethernet devices starting at PF dev */
-	vfdev = pci_get_class(PCI_CLASS_NETWORK_ETHERNET << 8, NULL);
-	while (vfdev) {
-		if (vfdev->is_virtfn && vfdev->physfn == dev)
-			num_vf++;
-
-		vfdev = pci_get_class(PCI_CLASS_NETWORK_ETHERNET << 8, vfdev);
-	}
-
-#endif
-	return num_vf;
-}
-#endif /* RHEL_RELEASE_CODE */
-#endif /* < 2.6.34 */
-
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35) )
-#ifdef HAVE_TX_MQ
-#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,0)))
-#ifndef CONFIG_NETDEVICES_MULTIQUEUE
-void _kc_netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
-{
-	unsigned int real_num = dev->real_num_tx_queues;
-	struct Qdisc *qdisc;
-	int i;
-
-	if (unlikely(txq > dev->num_tx_queues))
-		;
-	else if (txq > real_num)
-		dev->real_num_tx_queues = txq;
-	else if ( txq < real_num) {
-		dev->real_num_tx_queues = txq;
-		for (i = txq; i < dev->num_tx_queues; i++) {
-			qdisc = netdev_get_tx_queue(dev, i)->qdisc;
-			if (qdisc) {
-				spin_lock_bh(qdisc_lock(qdisc));
-				qdisc_reset(qdisc);
-				spin_unlock_bh(qdisc_lock(qdisc));
-			}
-		}
-	}
-}
-#endif /* CONFIG_NETDEVICES_MULTIQUEUE */
-#endif /* !(RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,0)) */
-#endif /* HAVE_TX_MQ */
-
-ssize_t _kc_simple_write_to_buffer(void *to, size_t available, loff_t *ppos,
-				   const void __user *from, size_t count)
-{
-        loff_t pos = *ppos;
-        size_t res;
-
-        if (pos < 0)
-                return -EINVAL;
-        if (pos >= available || !count)
-                return 0;
-        if (count > available - pos)
-                count = available - pos;
-        res = copy_from_user(to + pos, from, count);
-        if (res == count)
-                return -EFAULT;
-        count -= res;
-        *ppos = pos + count;
-        return count;
-}
-
-#endif /* < 2.6.35 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) )
-static const u32 _kc_flags_dup_features =
-	(ETH_FLAG_LRO | ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH);
-
-u32 _kc_ethtool_op_get_flags(struct net_device *dev)
-{
-	return dev->features & _kc_flags_dup_features;
-}
-
-int _kc_ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported)
-{
-	if (data & ~supported)
-		return -EINVAL;
-
-	dev->features = ((dev->features & ~_kc_flags_dup_features) |
-			 (data & _kc_flags_dup_features));
-	return 0;
-}
-#endif /* < 2.6.36 */
-
-/******************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39) )
-#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(6,0)))
-
-
-
-#endif /* !(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(6,0)) */
-#endif /* < 2.6.39 */
-
-/******************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0) )
-void _kc_skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page,
-			 int off, int size, unsigned int truesize)
-{
-	skb_fill_page_desc(skb, i, page, off, size);
-	skb->len += size;
-	skb->data_len += size;
-	skb->truesize += truesize;
-}
-
-int _kc_simple_open(struct inode *inode, struct file *file)
-{
-        if (inode->i_private)
-                file->private_data = inode->i_private;
-
-        return 0;
-}
-
-#endif /* < 3.4.0 */
-
-/******************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,7,0) )
-#if !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,3,0)) && \
-    !(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,5))
-static inline int __kc_pcie_cap_version(struct pci_dev *dev)
-{
-	int pos;
-	u16 reg16;
-
-	pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
-	if (!pos)
-		return 0;
-	pci_read_config_word(dev, pos + PCI_EXP_FLAGS, &reg16);
-	return reg16 & PCI_EXP_FLAGS_VERS;
-}
-
-static inline bool __kc_pcie_cap_has_devctl(const struct pci_dev __always_unused *dev)
-{
-	return true;
-}
-
-static inline bool __kc_pcie_cap_has_lnkctl(struct pci_dev *dev)
-{
-	int type = pci_pcie_type(dev);
-
-	return __kc_pcie_cap_version(dev) > 1 ||
-	       type == PCI_EXP_TYPE_ROOT_PORT ||
-	       type == PCI_EXP_TYPE_ENDPOINT ||
-	       type == PCI_EXP_TYPE_LEG_END;
-}
-
-static inline bool __kc_pcie_cap_has_sltctl(struct pci_dev *dev)
-{
-	int type = pci_pcie_type(dev);
-	int pos;
-	u16 pcie_flags_reg;
-
-	pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
-	if (!pos)
-		return 0;
-	pci_read_config_word(dev, pos + PCI_EXP_FLAGS, &pcie_flags_reg);
-
-	return __kc_pcie_cap_version(dev) > 1 ||
-	       type == PCI_EXP_TYPE_ROOT_PORT ||
-	       (type == PCI_EXP_TYPE_DOWNSTREAM &&
-		pcie_flags_reg & PCI_EXP_FLAGS_SLOT);
-}
-
-static inline bool __kc_pcie_cap_has_rtctl(struct pci_dev *dev)
-{
-	int type = pci_pcie_type(dev);
-
-	return __kc_pcie_cap_version(dev) > 1 ||
-	       type == PCI_EXP_TYPE_ROOT_PORT ||
-	       type == PCI_EXP_TYPE_RC_EC;
-}
-
-static bool __kc_pcie_capability_reg_implemented(struct pci_dev *dev, int pos)
-{
-	if (!pci_is_pcie(dev))
-		return false;
-
-	switch (pos) {
-	case PCI_EXP_FLAGS_TYPE:
-		return true;
-	case PCI_EXP_DEVCAP:
-	case PCI_EXP_DEVCTL:
-	case PCI_EXP_DEVSTA:
-		return __kc_pcie_cap_has_devctl(dev);
-	case PCI_EXP_LNKCAP:
-	case PCI_EXP_LNKCTL:
-	case PCI_EXP_LNKSTA:
-		return __kc_pcie_cap_has_lnkctl(dev);
-	case PCI_EXP_SLTCAP:
-	case PCI_EXP_SLTCTL:
-	case PCI_EXP_SLTSTA:
-		return __kc_pcie_cap_has_sltctl(dev);
-	case PCI_EXP_RTCTL:
-	case PCI_EXP_RTCAP:
-	case PCI_EXP_RTSTA:
-		return __kc_pcie_cap_has_rtctl(dev);
-	case PCI_EXP_DEVCAP2:
-	case PCI_EXP_DEVCTL2:
-	case PCI_EXP_LNKCAP2:
-	case PCI_EXP_LNKCTL2:
-	case PCI_EXP_LNKSTA2:
-		return __kc_pcie_cap_version(dev) > 1;
-	default:
-		return false;
-	}
-}
-
-/*
- * Note that these accessor functions are only for the "PCI Express
- * Capability" (see PCIe spec r3.0, sec 7.8).  They do not apply to the
- * other "PCI Express Extended Capabilities" (AER, VC, ACS, MFVC, etc.)
- */
-int __kc_pcie_capability_read_word(struct pci_dev *dev, int pos, u16 *val)
-{
-	int ret;
-
-	*val = 0;
-	if (pos & 1)
-		return -EINVAL;
-
-	if (__kc_pcie_capability_reg_implemented(dev, pos)) {
-		ret = pci_read_config_word(dev, pci_pcie_cap(dev) + pos, val);
-		/*
-		 * Reset *val to 0 if pci_read_config_word() fails, it may
-		 * have been written as 0xFFFF if hardware error happens
-		 * during pci_read_config_word().
-		 */
-		if (ret)
-			*val = 0;
-		return ret;
-	}
-
-	/*
-	 * For Functions that do not implement the Slot Capabilities,
-	 * Slot Status, and Slot Control registers, these spaces must
-	 * be hardwired to 0b, with the exception of the Presence Detect
-	 * State bit in the Slot Status register of Downstream Ports,
-	 * which must be hardwired to 1b.  (PCIe Base Spec 3.0, sec 7.8)
-	 */
-	if (pci_is_pcie(dev) && pos == PCI_EXP_SLTSTA &&
-	    pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM) {
-		*val = PCI_EXP_SLTSTA_PDS;
-	}
-
-	return 0;
-}
-
-int __kc_pcie_capability_write_word(struct pci_dev *dev, int pos, u16 val)
-{
-	if (pos & 1)
-		return -EINVAL;
-
-	if (!__kc_pcie_capability_reg_implemented(dev, pos))
-		return 0;
-
-	return pci_write_config_word(dev, pci_pcie_cap(dev) + pos, val);
-}
-
-int __kc_pcie_capability_clear_and_set_word(struct pci_dev *dev, int pos,
-					    u16 clear, u16 set)
-{
-	int ret;
-	u16 val;
-
-	ret = __kc_pcie_capability_read_word(dev, pos, &val);
-	if (!ret) {
-		val &= ~clear;
-		val |= set;
-		ret = __kc_pcie_capability_write_word(dev, pos, val);
-	}
-
-	return ret;
-}
-#endif /* !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,3,0)) && \
-          !(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,5)) */
-#endif /* < 3.7.0 */
-
-/******************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,9,0) )
-#endif /* 3.9.0 */
-
-/*****************************************************************************/
-#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) )
-#ifdef CONFIG_PCI_IOV
-int __kc_pci_vfs_assigned(struct pci_dev *dev)
-{
-	unsigned int vfs_assigned = 0;
-#ifdef HAVE_PCI_DEV_FLAGS_ASSIGNED
-	int pos;
-	struct pci_dev *vfdev;
-	unsigned short dev_id;
-
-	/* only search if we are a PF */
-	if (!dev->is_physfn)
-		return 0;
-
-	/* find SR-IOV capability */
-	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV);
-	if (!pos)
-		return 0;
-
-	/*
-	 * determine the device ID for the VFs, the vendor ID will be the
-	 * same as the PF so there is no need to check for that one
-	 */
-	pci_read_config_word(dev, pos + PCI_SRIOV_VF_DID, &dev_id);
-
-	/* loop through all the VFs to see if we own any that are assigned */
-	vfdev = pci_get_device(dev->vendor, dev_id, NULL);
-	while (vfdev) {
-		/*
-		 * It is considered assigned if it is a virtual function with
-		 * our dev as the physical function and the assigned bit is set
-		 */
-		if (vfdev->is_virtfn && (vfdev->physfn == dev) &&
-		    (vfdev->dev_flags & PCI_DEV_FLAGS_ASSIGNED))
-			vfs_assigned++;
-
-		vfdev = pci_get_device(dev->vendor, dev_id, vfdev);
-	}
-
-#endif /* HAVE_PCI_DEV_FLAGS_ASSIGNED */
-	return vfs_assigned;
-}
-
-#endif /* CONFIG_PCI_IOV */
-#endif /* 3.10.0 */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h
index de3b8dc9..84826b26 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
@@ -3891,7 +3891,7 @@ skb_set_hash(struct sk_buff *skb, __u32 hash, __always_unused int type)
 #if (( LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0) ) \
     || ( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,2) ))
 #define HAVE_NDO_DFLT_BRIDGE_ADD_MASK
-#if (!( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,2) ))
+#if ( RHEL_RELEASE_CODE != RHEL_RELEASE_VERSION(7,2) )
 #define HAVE_NDO_FDB_ADD_VID
 #endif /* !RHEL 7.2 */
 #endif /* >= 3.19.0 */
@@ -3901,12 +3901,13 @@ skb_set_hash(struct sk_buff *skb, __u32 hash, __always_unused int type)
 /* vlan_tx_xx functions got renamed to skb_vlan */
 #define vlan_tx_tag_get skb_vlan_tag_get
 #define vlan_tx_tag_present skb_vlan_tag_present
-#if (!( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,2) ))
+#if ( RHEL_RELEASE_CODE != RHEL_RELEASE_VERSION(7,2) )
 #define HAVE_NDO_BRIDGE_SET_DEL_LINK_FLAGS
 #endif /* !RHEL 7.2 */
 #endif /* 4.0.0 */
 
-#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(4,1,0) )
+#if (( LINUX_VERSION_CODE >= KERNEL_VERSION(4,1,0) ) \
+    || ( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,3) ))
 /* ndo_bridge_getlink adds new nlflags parameter */
 #define HAVE_NDO_BRIDGE_GETLINK_NLFLAGS
 #endif /* >= 4.1.0 */
@@ -3916,6 +3917,18 @@ skb_set_hash(struct sk_buff *skb, __u32 hash, __always_unused int type)
 #define HAVE_NDO_BRIDGE_GETLINK_FILTER_MASK_VLAN_FILL
 #endif /* >= 4.2.0 */
 
+/*
+ * vlan_tx_tag_* macros renamed to skb_vlan_tag_* (Linux commit: df8a39defad4)
+ * For older kernels backported this commit, need to use renamed functions.
+ * This fix is specific to RedHat/CentOS kernels.
+ */
+#if (defined(RHEL_RELEASE_CODE) && \
+	(RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 8)) && \
+	(LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 34)))
+#define vlan_tx_tag_get skb_vlan_tag_get
+#define vlan_tx_tag_present skb_vlan_tag_present
+#endif
+
 #if ( LINUX_VERSION_CODE >= KERNEL_VERSION(4,9,0) )
 #define HAVE_VF_VLAN_PROTO
 #endif /* >= 4.9.0 */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat_ethtool.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat_ethtool.c
deleted file mode 100644
index e1a89388..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat_ethtool.c
+++ /dev/null
@@ -1,1171 +0,0 @@
-/*******************************************************************************
-
-  Intel(R) Gigabit Ethernet Linux driver
-  Copyright(c) 2007-2013 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-/*
- * net/core/ethtool.c - Ethtool ioctl handler
- * Copyright (c) 2003 Matthew Wilcox <matthew@wil.cx>
- *
- * This file is where we call all the ethtool_ops commands to get
- * the information ethtool needs.  We fall back to calling do_ioctl()
- * for drivers which haven't been converted to ethtool_ops yet.
- *
- * It's GPL, stupid.
- *
- * Modification by sfeldma@pobox.com to work as backward compat
- * solution for pre-ethtool_ops kernels.
- *	- copied struct ethtool_ops from ethtool.h
- *	- defined SET_ETHTOOL_OPS
- *	- put in some #ifndef NETIF_F_xxx wrappers
- *	- changes refs to dev->ethtool_ops to ethtool_ops
- *	- changed dev_ethtool to ethtool_ioctl
- *      - remove EXPORT_SYMBOL()s
- *      - added _kc_ prefix in built-in ethtool_op_xxx ops.
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/mii.h>
-#include <linux/ethtool.h>
-#include <linux/netdevice.h>
-#include <asm/uaccess.h>
-
-#include "kcompat.h"
-
-#undef SUPPORTED_10000baseT_Full
-#define SUPPORTED_10000baseT_Full	(1 << 12)
-#undef ADVERTISED_10000baseT_Full
-#define ADVERTISED_10000baseT_Full	(1 << 12)
-#undef SPEED_10000
-#define SPEED_10000		10000
-
-#undef ethtool_ops
-#define ethtool_ops _kc_ethtool_ops
-
-struct _kc_ethtool_ops {
-	int  (*get_settings)(struct net_device *, struct ethtool_cmd *);
-	int  (*set_settings)(struct net_device *, struct ethtool_cmd *);
-	void (*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *);
-	int  (*get_regs_len)(struct net_device *);
-	void (*get_regs)(struct net_device *, struct ethtool_regs *, void *);
-	void (*get_wol)(struct net_device *, struct ethtool_wolinfo *);
-	int  (*set_wol)(struct net_device *, struct ethtool_wolinfo *);
-	u32  (*get_msglevel)(struct net_device *);
-	void (*set_msglevel)(struct net_device *, u32);
-	int  (*nway_reset)(struct net_device *);
-	u32  (*get_link)(struct net_device *);
-	int  (*get_eeprom_len)(struct net_device *);
-	int  (*get_eeprom)(struct net_device *, struct ethtool_eeprom *, u8 *);
-	int  (*set_eeprom)(struct net_device *, struct ethtool_eeprom *, u8 *);
-	int  (*get_coalesce)(struct net_device *, struct ethtool_coalesce *);
-	int  (*set_coalesce)(struct net_device *, struct ethtool_coalesce *);
-	void (*get_ringparam)(struct net_device *, struct ethtool_ringparam *);
-	int  (*set_ringparam)(struct net_device *, struct ethtool_ringparam *);
-	void (*get_pauseparam)(struct net_device *,
-	                       struct ethtool_pauseparam*);
-	int  (*set_pauseparam)(struct net_device *,
-	                       struct ethtool_pauseparam*);
-	u32  (*get_rx_csum)(struct net_device *);
-	int  (*set_rx_csum)(struct net_device *, u32);
-	u32  (*get_tx_csum)(struct net_device *);
-	int  (*set_tx_csum)(struct net_device *, u32);
-	u32  (*get_sg)(struct net_device *);
-	int  (*set_sg)(struct net_device *, u32);
-	u32  (*get_tso)(struct net_device *);
-	int  (*set_tso)(struct net_device *, u32);
-	int  (*self_test_count)(struct net_device *);
-	void (*self_test)(struct net_device *, struct ethtool_test *, u64 *);
-	void (*get_strings)(struct net_device *, u32 stringset, u8 *);
-	int  (*phys_id)(struct net_device *, u32);
-	int  (*get_stats_count)(struct net_device *);
-	void (*get_ethtool_stats)(struct net_device *, struct ethtool_stats *,
-	                          u64 *);
-} *ethtool_ops = NULL;
-
-#undef SET_ETHTOOL_OPS
-#define SET_ETHTOOL_OPS(netdev, ops) (ethtool_ops = (ops))
-
-/*
- * Some useful ethtool_ops methods that are device independent. If we find that
- * all drivers want to do the same thing here, we can turn these into dev_()
- * function calls.
- */
-
-#undef ethtool_op_get_link
-#define ethtool_op_get_link _kc_ethtool_op_get_link
-u32 _kc_ethtool_op_get_link(struct net_device *dev)
-{
-	return netif_carrier_ok(dev) ? 1 : 0;
-}
-
-#undef ethtool_op_get_tx_csum
-#define ethtool_op_get_tx_csum _kc_ethtool_op_get_tx_csum
-u32 _kc_ethtool_op_get_tx_csum(struct net_device *dev)
-{
-#ifdef NETIF_F_IP_CSUM
-	return (dev->features & NETIF_F_IP_CSUM) != 0;
-#else
-	return 0;
-#endif
-}
-
-#undef ethtool_op_set_tx_csum
-#define ethtool_op_set_tx_csum _kc_ethtool_op_set_tx_csum
-int _kc_ethtool_op_set_tx_csum(struct net_device *dev, u32 data)
-{
-#ifdef NETIF_F_IP_CSUM
-	if (data)
-#ifdef NETIF_F_IPV6_CSUM
-		dev->features |= (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
-	else
-		dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
-#else
-		dev->features |= NETIF_F_IP_CSUM;
-	else
-		dev->features &= ~NETIF_F_IP_CSUM;
-#endif
-#endif
-
-	return 0;
-}
-
-#undef ethtool_op_get_sg
-#define ethtool_op_get_sg _kc_ethtool_op_get_sg
-u32 _kc_ethtool_op_get_sg(struct net_device *dev)
-{
-#ifdef NETIF_F_SG
-	return (dev->features & NETIF_F_SG) != 0;
-#else
-	return 0;
-#endif
-}
-
-#undef ethtool_op_set_sg
-#define ethtool_op_set_sg _kc_ethtool_op_set_sg
-int _kc_ethtool_op_set_sg(struct net_device *dev, u32 data)
-{
-#ifdef NETIF_F_SG
-	if (data)
-		dev->features |= NETIF_F_SG;
-	else
-		dev->features &= ~NETIF_F_SG;
-#endif
-
-	return 0;
-}
-
-#undef ethtool_op_get_tso
-#define ethtool_op_get_tso _kc_ethtool_op_get_tso
-u32 _kc_ethtool_op_get_tso(struct net_device *dev)
-{
-#ifdef NETIF_F_TSO
-	return (dev->features & NETIF_F_TSO) != 0;
-#else
-	return 0;
-#endif
-}
-
-#undef ethtool_op_set_tso
-#define ethtool_op_set_tso _kc_ethtool_op_set_tso
-int _kc_ethtool_op_set_tso(struct net_device *dev, u32 data)
-{
-#ifdef NETIF_F_TSO
-	if (data)
-		dev->features |= NETIF_F_TSO;
-	else
-		dev->features &= ~NETIF_F_TSO;
-#endif
-
-	return 0;
-}
-
-/* Handlers for each ethtool command */
-
-static int ethtool_get_settings(struct net_device *dev, void *useraddr)
-{
-	struct ethtool_cmd cmd = { ETHTOOL_GSET };
-	int err;
-
-	if (!ethtool_ops->get_settings)
-		return -EOPNOTSUPP;
-
-	err = ethtool_ops->get_settings(dev, &cmd);
-	if (err < 0)
-		return err;
-
-	if (copy_to_user(useraddr, &cmd, sizeof(cmd)))
-		return -EFAULT;
-	return 0;
-}
-
-static int ethtool_set_settings(struct net_device *dev, void *useraddr)
-{
-	struct ethtool_cmd cmd;
-
-	if (!ethtool_ops->set_settings)
-		return -EOPNOTSUPP;
-
-	if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
-		return -EFAULT;
-
-	return ethtool_ops->set_settings(dev, &cmd);
-}
-
-static int ethtool_get_drvinfo(struct net_device *dev, void *useraddr)
-{
-	struct ethtool_drvinfo info;
-	struct ethtool_ops *ops = ethtool_ops;
-
-	if (!ops->get_drvinfo)
-		return -EOPNOTSUPP;
-
-	memset(&info, 0, sizeof(info));
-	info.cmd = ETHTOOL_GDRVINFO;
-	ops->get_drvinfo(dev, &info);
-
-	if (ops->self_test_count)
-		info.testinfo_len = ops->self_test_count(dev);
-	if (ops->get_stats_count)
-		info.n_stats = ops->get_stats_count(dev);
-	if (ops->get_regs_len)
-		info.regdump_len = ops->get_regs_len(dev);
-	if (ops->get_eeprom_len)
-		info.eedump_len = ops->get_eeprom_len(dev);
-
-	if (copy_to_user(useraddr, &info, sizeof(info)))
-		return -EFAULT;
-	return 0;
-}
-
-static int ethtool_get_regs(struct net_device *dev, char *useraddr)
-{
-	struct ethtool_regs regs;
-	struct ethtool_ops *ops = ethtool_ops;
-	void *regbuf;
-	int reglen, ret;
-
-	if (!ops->get_regs || !ops->get_regs_len)
-		return -EOPNOTSUPP;
-
-	if (copy_from_user(&regs, useraddr, sizeof(regs)))
-		return -EFAULT;
-
-	reglen = ops->get_regs_len(dev);
-	if (regs.len > reglen)
-		regs.len = reglen;
-
-	regbuf = kmalloc(reglen, GFP_USER);
-	if (!regbuf)
-		return -ENOMEM;
-
-	ops->get_regs(dev, &regs, regbuf);
-
-	ret = -EFAULT;
-	if (copy_to_user(useraddr, &regs, sizeof(regs)))
-		goto out;
-	useraddr += offsetof(struct ethtool_regs, data);
-	if (copy_to_user(useraddr, regbuf, reglen))
-		goto out;
-	ret = 0;
-
-out:
-	kfree(regbuf);
-	return ret;
-}
-
-static int ethtool_get_wol(struct net_device *dev, char *useraddr)
-{
-	struct ethtool_wolinfo wol = { ETHTOOL_GWOL };
-
-	if (!ethtool_ops->get_wol)
-		return -EOPNOTSUPP;
-
-	ethtool_ops->get_wol(dev, &wol);
-
-	if (copy_to_user(useraddr, &wol, sizeof(wol)))
-		return -EFAULT;
-	return 0;
-}
-
-static int ethtool_set_wol(struct net_device *dev, char *useraddr)
-{
-	struct ethtool_wolinfo wol;
-
-	if (!ethtool_ops->set_wol)
-		return -EOPNOTSUPP;
-
-	if (copy_from_user(&wol, useraddr, sizeof(wol)))
-		return -EFAULT;
-
-	return ethtool_ops->set_wol(dev, &wol);
-}
-
-static int ethtool_get_msglevel(struct net_device *dev, char *useraddr)
-{
-	struct ethtool_value edata = { ETHTOOL_GMSGLVL };
-
-	if (!ethtool_ops->get_msglevel)
-		return -EOPNOTSUPP;
-
-	edata.data = ethtool_ops->get_msglevel(dev);
-
-	if (copy_to_user(useraddr, &edata, sizeof(edata)))
-		return -EFAULT;
-	return 0;
-}
-
-static int ethtool_set_msglevel(struct net_device *dev, char *useraddr)
-{
-	struct ethtool_value edata;
-
-	if (!ethtool_ops->set_msglevel)
-		return -EOPNOTSUPP;
-
-	if (copy_from_user(&edata, useraddr, sizeof(edata)))
-		return -EFAULT;
-
-	ethtool_ops->set_msglevel(dev, edata.data);
-	return 0;
-}
-
-static int ethtool_nway_reset(struct net_device *dev)
-{
-	if (!ethtool_ops->nway_reset)
-		return -EOPNOTSUPP;
-
-	return ethtool_ops->nway_reset(dev);
-}
-
-static int ethtool_get_link(struct net_device *dev, void *useraddr)
-{
-	struct ethtool_value edata = { ETHTOOL_GLINK };
-
-	if (!ethtool_ops->get_link)
-		return -EOPNOTSUPP;
-
-	edata.data = ethtool_ops->get_link(dev);
-
-	if (copy_to_user(useraddr, &edata, sizeof(edata)))
-		return -EFAULT;
-	return 0;
-}
-
-static int ethtool_get_eeprom(struct net_device *dev, void *useraddr)
-{
-	struct ethtool_eeprom eeprom;
-	struct ethtool_ops *ops = ethtool_ops;
-	u8 *data;
-	int ret;
-
-	if (!ops->get_eeprom || !ops->get_eeprom_len)
-		return -EOPNOTSUPP;
-
-	if (copy_from_user(&eeprom, useraddr, sizeof(eeprom)))
-		return -EFAULT;
-
-	/* Check for wrap and zero */
-	if (eeprom.offset + eeprom.len <= eeprom.offset)
-		return -EINVAL;
-
-	/* Check for exceeding total eeprom len */
-	if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev))
-		return -EINVAL;
-
-	data = kmalloc(eeprom.len, GFP_USER);
-	if (!data)
-		return -ENOMEM;
-
-	ret = -EFAULT;
-	if (copy_from_user(data, useraddr + sizeof(eeprom), eeprom.len))
-		goto out;
-
-	ret = ops->get_eeprom(dev, &eeprom, data);
-	if (ret)
-		goto out;
-
-	ret = -EFAULT;
-	if (copy_to_user(useraddr, &eeprom, sizeof(eeprom)))
-		goto out;
-	if (copy_to_user(useraddr + sizeof(eeprom), data, eeprom.len))
-		goto out;
-	ret = 0;
-
-out:
-	kfree(data);
-	return ret;
-}
-
-static int ethtool_set_eeprom(struct net_device *dev, void *useraddr)
-{
-	struct ethtool_eeprom eeprom;
-	struct ethtool_ops *ops = ethtool_ops;
-	u8 *data;
-	int ret;
-
-	if (!ops->set_eeprom || !ops->get_eeprom_len)
-		return -EOPNOTSUPP;
-
-	if (copy_from_user(&eeprom, useraddr, sizeof(eeprom)))
-		return -EFAULT;
-
-	/* Check for wrap and zero */
-	if (eeprom.offset + eeprom.len <= eeprom.offset)
-		return -EINVAL;
-
-	/* Check for exceeding total eeprom len */
-	if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev))
-		return -EINVAL;
-
-	data = kmalloc(eeprom.len, GFP_USER);
-	if (!data)
-		return -ENOMEM;
-
-	ret = -EFAULT;
-	if (copy_from_user(data, useraddr + sizeof(eeprom), eeprom.len))
-		goto out;
-
-	ret = ops->set_eeprom(dev, &eeprom, data);
-	if (ret)
-		goto out;
-
-	if (copy_to_user(useraddr + sizeof(eeprom), data, eeprom.len))
-		ret = -EFAULT;
-
-out:
-	kfree(data);
-	return ret;
-}
-
-static int ethtool_get_coalesce(struct net_device *dev, void *useraddr)
-{
-	struct ethtool_coalesce coalesce = { ETHTOOL_GCOALESCE };
-
-	if (!ethtool_ops->get_coalesce)
-		return -EOPNOTSUPP;
-
-	ethtool_ops->get_coalesce(dev, &coalesce);
-
-	if (copy_to_user(useraddr, &coalesce, sizeof(coalesce)))
-		return -EFAULT;
-	return 0;
-}
-
-static int ethtool_set_coalesce(struct net_device *dev, void *useraddr)
-{
-	struct ethtool_coalesce coalesce;
-
-	if (!ethtool_ops->get_coalesce)
-		return -EOPNOTSUPP;
-
-	if (copy_from_user(&coalesce, useraddr, sizeof(coalesce)))
-		return -EFAULT;
-
-	return ethtool_ops->set_coalesce(dev, &coalesce);
-}
-
-static int ethtool_get_ringparam(struct net_device *dev, void *useraddr)
-{
-	struct ethtool_ringparam ringparam = { ETHTOOL_GRINGPARAM };
-
-	if (!ethtool_ops->get_ringparam)
-		return -EOPNOTSUPP;
-
-	ethtool_ops->get_ringparam(dev, &ringparam);
-
-	if (copy_to_user(useraddr, &ringparam, sizeof(ringparam)))
-		return -EFAULT;
-	return 0;
-}
-
-static int ethtool_set_ringparam(struct net_device *dev, void *useraddr)
-{
-	struct ethtool_ringparam ringparam;
-
-	if (!ethtool_ops->get_ringparam)
-		return -EOPNOTSUPP;
-
-	if (copy_from_user(&ringparam, useraddr, sizeof(ringparam)))
-		return -EFAULT;
-
-	return ethtool_ops->set_ringparam(dev, &ringparam);
-}
-
-static int ethtool_get_pauseparam(struct net_device *dev, void *useraddr)
-{
-	struct ethtool_pauseparam pauseparam = { ETHTOOL_GPAUSEPARAM };
-
-	if (!ethtool_ops->get_pauseparam)
-		return -EOPNOTSUPP;
-
-	ethtool_ops->get_pauseparam(dev, &pauseparam);
-
-	if (copy_to_user(useraddr, &pauseparam, sizeof(pauseparam)))
-		return -EFAULT;
-	return 0;
-}
-
-static int ethtool_set_pauseparam(struct net_device *dev, void *useraddr)
-{
-	struct ethtool_pauseparam pauseparam;
-
-	if (!ethtool_ops->get_pauseparam)
-		return -EOPNOTSUPP;
-
-	if (copy_from_user(&pauseparam, useraddr, sizeof(pauseparam)))
-		return -EFAULT;
-
-	return ethtool_ops->set_pauseparam(dev, &pauseparam);
-}
-
-static int ethtool_get_rx_csum(struct net_device *dev, char *useraddr)
-{
-	struct ethtool_value edata = { ETHTOOL_GRXCSUM };
-
-	if (!ethtool_ops->get_rx_csum)
-		return -EOPNOTSUPP;
-
-	edata.data = ethtool_ops->get_rx_csum(dev);
-
-	if (copy_to_user(useraddr, &edata, sizeof(edata)))
-		return -EFAULT;
-	return 0;
-}
-
-static int ethtool_set_rx_csum(struct net_device *dev, char *useraddr)
-{
-	struct ethtool_value edata;
-
-	if (!ethtool_ops->set_rx_csum)
-		return -EOPNOTSUPP;
-
-	if (copy_from_user(&edata, useraddr, sizeof(edata)))
-		return -EFAULT;
-
-	ethtool_ops->set_rx_csum(dev, edata.data);
-	return 0;
-}
-
-static int ethtool_get_tx_csum(struct net_device *dev, char *useraddr)
-{
-	struct ethtool_value edata = { ETHTOOL_GTXCSUM };
-
-	if (!ethtool_ops->get_tx_csum)
-		return -EOPNOTSUPP;
-
-	edata.data = ethtool_ops->get_tx_csum(dev);
-
-	if (copy_to_user(useraddr, &edata, sizeof(edata)))
-		return -EFAULT;
-	return 0;
-}
-
-static int ethtool_set_tx_csum(struct net_device *dev, char *useraddr)
-{
-	struct ethtool_value edata;
-
-	if (!ethtool_ops->set_tx_csum)
-		return -EOPNOTSUPP;
-
-	if (copy_from_user(&edata, useraddr, sizeof(edata)))
-		return -EFAULT;
-
-	return ethtool_ops->set_tx_csum(dev, edata.data);
-}
-
-static int ethtool_get_sg(struct net_device *dev, char *useraddr)
-{
-	struct ethtool_value edata = { ETHTOOL_GSG };
-
-	if (!ethtool_ops->get_sg)
-		return -EOPNOTSUPP;
-
-	edata.data = ethtool_ops->get_sg(dev);
-
-	if (copy_to_user(useraddr, &edata, sizeof(edata)))
-		return -EFAULT;
-	return 0;
-}
-
-static int ethtool_set_sg(struct net_device *dev, char *useraddr)
-{
-	struct ethtool_value edata;
-
-	if (!ethtool_ops->set_sg)
-		return -EOPNOTSUPP;
-
-	if (copy_from_user(&edata, useraddr, sizeof(edata)))
-		return -EFAULT;
-
-	return ethtool_ops->set_sg(dev, edata.data);
-}
-
-static int ethtool_get_tso(struct net_device *dev, char *useraddr)
-{
-	struct ethtool_value edata = { ETHTOOL_GTSO };
-
-	if (!ethtool_ops->get_tso)
-		return -EOPNOTSUPP;
-
-	edata.data = ethtool_ops->get_tso(dev);
-
-	if (copy_to_user(useraddr, &edata, sizeof(edata)))
-		return -EFAULT;
-	return 0;
-}
-
-static int ethtool_set_tso(struct net_device *dev, char *useraddr)
-{
-	struct ethtool_value edata;
-
-	if (!ethtool_ops->set_tso)
-		return -EOPNOTSUPP;
-
-	if (copy_from_user(&edata, useraddr, sizeof(edata)))
-		return -EFAULT;
-
-	return ethtool_ops->set_tso(dev, edata.data);
-}
-
-static int ethtool_self_test(struct net_device *dev, char *useraddr)
-{
-	struct ethtool_test test;
-	struct ethtool_ops *ops = ethtool_ops;
-	u64 *data;
-	int ret;
-
-	if (!ops->self_test || !ops->self_test_count)
-		return -EOPNOTSUPP;
-
-	if (copy_from_user(&test, useraddr, sizeof(test)))
-		return -EFAULT;
-
-	test.len = ops->self_test_count(dev);
-	data = kmalloc(test.len * sizeof(u64), GFP_USER);
-	if (!data)
-		return -ENOMEM;
-
-	ops->self_test(dev, &test, data);
-
-	ret = -EFAULT;
-	if (copy_to_user(useraddr, &test, sizeof(test)))
-		goto out;
-	useraddr += sizeof(test);
-	if (copy_to_user(useraddr, data, test.len * sizeof(u64)))
-		goto out;
-	ret = 0;
-
-out:
-	kfree(data);
-	return ret;
-}
-
-static int ethtool_get_strings(struct net_device *dev, void *useraddr)
-{
-	struct ethtool_gstrings gstrings;
-	struct ethtool_ops *ops = ethtool_ops;
-	u8 *data;
-	int ret;
-
-	if (!ops->get_strings)
-		return -EOPNOTSUPP;
-
-	if (copy_from_user(&gstrings, useraddr, sizeof(gstrings)))
-		return -EFAULT;
-
-	switch (gstrings.string_set) {
-	case ETH_SS_TEST:
-		if (!ops->self_test_count)
-			return -EOPNOTSUPP;
-		gstrings.len = ops->self_test_count(dev);
-		break;
-	case ETH_SS_STATS:
-		if (!ops->get_stats_count)
-			return -EOPNOTSUPP;
-		gstrings.len = ops->get_stats_count(dev);
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER);
-	if (!data)
-		return -ENOMEM;
-
-	ops->get_strings(dev, gstrings.string_set, data);
-
-	ret = -EFAULT;
-	if (copy_to_user(useraddr, &gstrings, sizeof(gstrings)))
-		goto out;
-	useraddr += sizeof(gstrings);
-	if (copy_to_user(useraddr, data, gstrings.len * ETH_GSTRING_LEN))
-		goto out;
-	ret = 0;
-
-out:
-	kfree(data);
-	return ret;
-}
-
-static int ethtool_phys_id(struct net_device *dev, void *useraddr)
-{
-	struct ethtool_value id;
-
-	if (!ethtool_ops->phys_id)
-		return -EOPNOTSUPP;
-
-	if (copy_from_user(&id, useraddr, sizeof(id)))
-		return -EFAULT;
-
-	return ethtool_ops->phys_id(dev, id.data);
-}
-
-static int ethtool_get_stats(struct net_device *dev, void *useraddr)
-{
-	struct ethtool_stats stats;
-	struct ethtool_ops *ops = ethtool_ops;
-	u64 *data;
-	int ret;
-
-	if (!ops->get_ethtool_stats || !ops->get_stats_count)
-		return -EOPNOTSUPP;
-
-	if (copy_from_user(&stats, useraddr, sizeof(stats)))
-		return -EFAULT;
-
-	stats.n_stats = ops->get_stats_count(dev);
-	data = kmalloc(stats.n_stats * sizeof(u64), GFP_USER);
-	if (!data)
-		return -ENOMEM;
-
-	ops->get_ethtool_stats(dev, &stats, data);
-
-	ret = -EFAULT;
-	if (copy_to_user(useraddr, &stats, sizeof(stats)))
-		goto out;
-	useraddr += sizeof(stats);
-	if (copy_to_user(useraddr, data, stats.n_stats * sizeof(u64)))
-		goto out;
-	ret = 0;
-
-out:
-	kfree(data);
-	return ret;
-}
-
-/* The main entry point in this file.  Called from net/core/dev.c */
-
-#define ETHTOOL_OPS_COMPAT
-int ethtool_ioctl(struct ifreq *ifr)
-{
-	struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
-	void *useraddr = (void *) ifr->ifr_data;
-	u32 ethcmd;
-
-	/*
-	 * XXX: This can be pushed down into the ethtool_* handlers that
-	 * need it.  Keep existing behavior for the moment.
-	 */
-	if (!capable(CAP_NET_ADMIN))
-		return -EPERM;
-
-	if (!dev || !netif_device_present(dev))
-		return -ENODEV;
-
-	if (copy_from_user(&ethcmd, useraddr, sizeof (ethcmd)))
-		return -EFAULT;
-
-	switch (ethcmd) {
-	case ETHTOOL_GSET:
-		return ethtool_get_settings(dev, useraddr);
-	case ETHTOOL_SSET:
-		return ethtool_set_settings(dev, useraddr);
-	case ETHTOOL_GDRVINFO:
-		return ethtool_get_drvinfo(dev, useraddr);
-	case ETHTOOL_GREGS:
-		return ethtool_get_regs(dev, useraddr);
-	case ETHTOOL_GWOL:
-		return ethtool_get_wol(dev, useraddr);
-	case ETHTOOL_SWOL:
-		return ethtool_set_wol(dev, useraddr);
-	case ETHTOOL_GMSGLVL:
-		return ethtool_get_msglevel(dev, useraddr);
-	case ETHTOOL_SMSGLVL:
-		return ethtool_set_msglevel(dev, useraddr);
-	case ETHTOOL_NWAY_RST:
-		return ethtool_nway_reset(dev);
-	case ETHTOOL_GLINK:
-		return ethtool_get_link(dev, useraddr);
-	case ETHTOOL_GEEPROM:
-		return ethtool_get_eeprom(dev, useraddr);
-	case ETHTOOL_SEEPROM:
-		return ethtool_set_eeprom(dev, useraddr);
-	case ETHTOOL_GCOALESCE:
-		return ethtool_get_coalesce(dev, useraddr);
-	case ETHTOOL_SCOALESCE:
-		return ethtool_set_coalesce(dev, useraddr);
-	case ETHTOOL_GRINGPARAM:
-		return ethtool_get_ringparam(dev, useraddr);
-	case ETHTOOL_SRINGPARAM:
-		return ethtool_set_ringparam(dev, useraddr);
-	case ETHTOOL_GPAUSEPARAM:
-		return ethtool_get_pauseparam(dev, useraddr);
-	case ETHTOOL_SPAUSEPARAM:
-		return ethtool_set_pauseparam(dev, useraddr);
-	case ETHTOOL_GRXCSUM:
-		return ethtool_get_rx_csum(dev, useraddr);
-	case ETHTOOL_SRXCSUM:
-		return ethtool_set_rx_csum(dev, useraddr);
-	case ETHTOOL_GTXCSUM:
-		return ethtool_get_tx_csum(dev, useraddr);
-	case ETHTOOL_STXCSUM:
-		return ethtool_set_tx_csum(dev, useraddr);
-	case ETHTOOL_GSG:
-		return ethtool_get_sg(dev, useraddr);
-	case ETHTOOL_SSG:
-		return ethtool_set_sg(dev, useraddr);
-	case ETHTOOL_GTSO:
-		return ethtool_get_tso(dev, useraddr);
-	case ETHTOOL_STSO:
-		return ethtool_set_tso(dev, useraddr);
-	case ETHTOOL_TEST:
-		return ethtool_self_test(dev, useraddr);
-	case ETHTOOL_GSTRINGS:
-		return ethtool_get_strings(dev, useraddr);
-	case ETHTOOL_PHYS_ID:
-		return ethtool_phys_id(dev, useraddr);
-	case ETHTOOL_GSTATS:
-		return ethtool_get_stats(dev, useraddr);
-	default:
-		return -EOPNOTSUPP;
-	}
-
-	return -EOPNOTSUPP;
-}
-
-#define mii_if_info _kc_mii_if_info
-struct _kc_mii_if_info {
-	int phy_id;
-	int advertising;
-	int phy_id_mask;
-	int reg_num_mask;
-
-	unsigned int full_duplex : 1;	/* is full duplex? */
-	unsigned int force_media : 1;	/* is autoneg. disabled? */
-
-	struct net_device *dev;
-	int (*mdio_read) (struct net_device *dev, int phy_id, int location);
-	void (*mdio_write) (struct net_device *dev, int phy_id, int location, int val);
-};
-
-struct ethtool_cmd;
-struct mii_ioctl_data;
-
-#undef mii_link_ok
-#define mii_link_ok _kc_mii_link_ok
-#undef mii_nway_restart
-#define mii_nway_restart _kc_mii_nway_restart
-#undef mii_ethtool_gset
-#define mii_ethtool_gset _kc_mii_ethtool_gset
-#undef mii_ethtool_sset
-#define mii_ethtool_sset _kc_mii_ethtool_sset
-#undef mii_check_link
-#define mii_check_link _kc_mii_check_link
-extern int _kc_mii_link_ok (struct mii_if_info *mii);
-extern int _kc_mii_nway_restart (struct mii_if_info *mii);
-extern int _kc_mii_ethtool_gset(struct mii_if_info *mii,
-                                struct ethtool_cmd *ecmd);
-extern int _kc_mii_ethtool_sset(struct mii_if_info *mii,
-                                struct ethtool_cmd *ecmd);
-extern void _kc_mii_check_link (struct mii_if_info *mii);
-#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,4,6) )
-#undef generic_mii_ioctl
-#define generic_mii_ioctl _kc_generic_mii_ioctl
-extern int _kc_generic_mii_ioctl(struct mii_if_info *mii_if,
-                                 struct mii_ioctl_data *mii_data, int cmd,
-                                 unsigned int *duplex_changed);
-#endif /* > 2.4.6 */
-
-
-struct _kc_pci_dev_ext {
-	struct pci_dev *dev;
-	void *pci_drvdata;
-	struct pci_driver *driver;
-};
-
-struct _kc_net_dev_ext {
-	struct net_device *dev;
-	unsigned int carrier;
-};
-
-
-/**************************************/
-/* mii support */
-
-int _kc_mii_ethtool_gset(struct mii_if_info *mii, struct ethtool_cmd *ecmd)
-{
-	struct net_device *dev = mii->dev;
-	u32 advert, bmcr, lpa, nego;
-
-	ecmd->supported =
-	    (SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full |
-	     SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full |
-	     SUPPORTED_Autoneg | SUPPORTED_TP | SUPPORTED_MII);
-
-	/* only supports twisted-pair */
-	ecmd->port = PORT_MII;
-
-	/* only supports internal transceiver */
-	ecmd->transceiver = XCVR_INTERNAL;
-
-	/* this isn't fully supported at higher layers */
-	ecmd->phy_address = mii->phy_id;
-
-	ecmd->advertising = ADVERTISED_TP | ADVERTISED_MII;
-	advert = mii->mdio_read(dev, mii->phy_id, MII_ADVERTISE);
-	if (advert & ADVERTISE_10HALF)
-		ecmd->advertising |= ADVERTISED_10baseT_Half;
-	if (advert & ADVERTISE_10FULL)
-		ecmd->advertising |= ADVERTISED_10baseT_Full;
-	if (advert & ADVERTISE_100HALF)
-		ecmd->advertising |= ADVERTISED_100baseT_Half;
-	if (advert & ADVERTISE_100FULL)
-		ecmd->advertising |= ADVERTISED_100baseT_Full;
-
-	bmcr = mii->mdio_read(dev, mii->phy_id, MII_BMCR);
-	lpa = mii->mdio_read(dev, mii->phy_id, MII_LPA);
-	if (bmcr & BMCR_ANENABLE) {
-		ecmd->advertising |= ADVERTISED_Autoneg;
-		ecmd->autoneg = AUTONEG_ENABLE;
-
-		nego = mii_nway_result(advert & lpa);
-		if (nego == LPA_100FULL || nego == LPA_100HALF)
-			ecmd->speed = SPEED_100;
-		else
-			ecmd->speed = SPEED_10;
-		if (nego == LPA_100FULL || nego == LPA_10FULL) {
-			ecmd->duplex = DUPLEX_FULL;
-			mii->full_duplex = 1;
-		} else {
-			ecmd->duplex = DUPLEX_HALF;
-			mii->full_duplex = 0;
-		}
-	} else {
-		ecmd->autoneg = AUTONEG_DISABLE;
-
-		ecmd->speed = (bmcr & BMCR_SPEED100) ? SPEED_100 : SPEED_10;
-		ecmd->duplex = (bmcr & BMCR_FULLDPLX) ? DUPLEX_FULL : DUPLEX_HALF;
-	}
-
-	/* ignore maxtxpkt, maxrxpkt for now */
-
-	return 0;
-}
-
-int _kc_mii_ethtool_sset(struct mii_if_info *mii, struct ethtool_cmd *ecmd)
-{
-	struct net_device *dev = mii->dev;
-
-	if (ecmd->speed != SPEED_10 && ecmd->speed != SPEED_100)
-		return -EINVAL;
-	if (ecmd->duplex != DUPLEX_HALF && ecmd->duplex != DUPLEX_FULL)
-		return -EINVAL;
-	if (ecmd->port != PORT_MII)
-		return -EINVAL;
-	if (ecmd->transceiver != XCVR_INTERNAL)
-		return -EINVAL;
-	if (ecmd->phy_address != mii->phy_id)
-		return -EINVAL;
-	if (ecmd->autoneg != AUTONEG_DISABLE && ecmd->autoneg != AUTONEG_ENABLE)
-		return -EINVAL;
-
-	/* ignore supported, maxtxpkt, maxrxpkt */
-
-	if (ecmd->autoneg == AUTONEG_ENABLE) {
-		u32 bmcr, advert, tmp;
-
-		if ((ecmd->advertising & (ADVERTISED_10baseT_Half |
-					  ADVERTISED_10baseT_Full |
-					  ADVERTISED_100baseT_Half |
-					  ADVERTISED_100baseT_Full)) == 0)
-			return -EINVAL;
-
-		/* advertise only what has been requested */
-		advert = mii->mdio_read(dev, mii->phy_id, MII_ADVERTISE);
-		tmp = advert & ~(ADVERTISE_ALL | ADVERTISE_100BASE4);
-		if (ADVERTISED_10baseT_Half)
-			tmp |= ADVERTISE_10HALF;
-		if (ADVERTISED_10baseT_Full)
-			tmp |= ADVERTISE_10FULL;
-		if (ADVERTISED_100baseT_Half)
-			tmp |= ADVERTISE_100HALF;
-		if (ADVERTISED_100baseT_Full)
-			tmp |= ADVERTISE_100FULL;
-		if (advert != tmp) {
-			mii->mdio_write(dev, mii->phy_id, MII_ADVERTISE, tmp);
-			mii->advertising = tmp;
-		}
-
-		/* turn on autonegotiation, and force a renegotiate */
-		bmcr = mii->mdio_read(dev, mii->phy_id, MII_BMCR);
-		bmcr |= (BMCR_ANENABLE | BMCR_ANRESTART);
-		mii->mdio_write(dev, mii->phy_id, MII_BMCR, bmcr);
-
-		mii->force_media = 0;
-	} else {
-		u32 bmcr, tmp;
-
-		/* turn off auto negotiation, set speed and duplexity */
-		bmcr = mii->mdio_read(dev, mii->phy_id, MII_BMCR);
-		tmp = bmcr & ~(BMCR_ANENABLE | BMCR_SPEED100 | BMCR_FULLDPLX);
-		if (ecmd->speed == SPEED_100)
-			tmp |= BMCR_SPEED100;
-		if (ecmd->duplex == DUPLEX_FULL) {
-			tmp |= BMCR_FULLDPLX;
-			mii->full_duplex = 1;
-		} else
-			mii->full_duplex = 0;
-		if (bmcr != tmp)
-			mii->mdio_write(dev, mii->phy_id, MII_BMCR, tmp);
-
-		mii->force_media = 1;
-	}
-	return 0;
-}
-
-int _kc_mii_link_ok (struct mii_if_info *mii)
-{
-	/* first, a dummy read, needed to latch some MII phys */
-	mii->mdio_read(mii->dev, mii->phy_id, MII_BMSR);
-	if (mii->mdio_read(mii->dev, mii->phy_id, MII_BMSR) & BMSR_LSTATUS)
-		return 1;
-	return 0;
-}
-
-int _kc_mii_nway_restart (struct mii_if_info *mii)
-{
-	int bmcr;
-	int r = -EINVAL;
-
-	/* if autoneg is off, it's an error */
-	bmcr = mii->mdio_read(mii->dev, mii->phy_id, MII_BMCR);
-
-	if (bmcr & BMCR_ANENABLE) {
-		bmcr |= BMCR_ANRESTART;
-		mii->mdio_write(mii->dev, mii->phy_id, MII_BMCR, bmcr);
-		r = 0;
-	}
-
-	return r;
-}
-
-void _kc_mii_check_link (struct mii_if_info *mii)
-{
-	int cur_link = mii_link_ok(mii);
-	int prev_link = netif_carrier_ok(mii->dev);
-
-	if (cur_link && !prev_link)
-		netif_carrier_on(mii->dev);
-	else if (prev_link && !cur_link)
-		netif_carrier_off(mii->dev);
-}
-
-#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,4,6) )
-int _kc_generic_mii_ioctl(struct mii_if_info *mii_if,
-                          struct mii_ioctl_data *mii_data, int cmd,
-                          unsigned int *duplex_chg_out)
-{
-	int rc = 0;
-	unsigned int duplex_changed = 0;
-
-	if (duplex_chg_out)
-		*duplex_chg_out = 0;
-
-	mii_data->phy_id &= mii_if->phy_id_mask;
-	mii_data->reg_num &= mii_if->reg_num_mask;
-
-	switch(cmd) {
-	case SIOCDEVPRIVATE:	/* binary compat, remove in 2.5 */
-	case SIOCGMIIPHY:
-		mii_data->phy_id = mii_if->phy_id;
-		/* fall through */
-
-	case SIOCDEVPRIVATE + 1:/* binary compat, remove in 2.5 */
-	case SIOCGMIIREG:
-		mii_data->val_out =
-			mii_if->mdio_read(mii_if->dev, mii_data->phy_id,
-					  mii_data->reg_num);
-		break;
-
-	case SIOCDEVPRIVATE + 2:/* binary compat, remove in 2.5 */
-	case SIOCSMIIREG: {
-		u16 val = mii_data->val_in;
-
-		if (!capable(CAP_NET_ADMIN))
-			return -EPERM;
-
-		if (mii_data->phy_id == mii_if->phy_id) {
-			switch(mii_data->reg_num) {
-			case MII_BMCR: {
-				unsigned int new_duplex = 0;
-				if (val & (BMCR_RESET|BMCR_ANENABLE))
-					mii_if->force_media = 0;
-				else
-					mii_if->force_media = 1;
-				if (mii_if->force_media &&
-				    (val & BMCR_FULLDPLX))
-					new_duplex = 1;
-				if (mii_if->full_duplex != new_duplex) {
-					duplex_changed = 1;
-					mii_if->full_duplex = new_duplex;
-				}
-				break;
-			}
-			case MII_ADVERTISE:
-				mii_if->advertising = val;
-				break;
-			default:
-				/* do nothing */
-				break;
-			}
-		}
-
-		mii_if->mdio_write(mii_if->dev, mii_data->phy_id,
-				   mii_data->reg_num, val);
-		break;
-	}
-
-	default:
-		rc = -EOPNOTSUPP;
-		break;
-	}
-
-	if ((rc == 0) && (duplex_chg_out) && (duplex_changed))
-		*duplex_chg_out = 1;
-
-	return rc;
-}
-#endif /* > 2.4.6 */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/COPYING b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/COPYING
deleted file mode 100644
index 5f297e5b..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/COPYING
+++ /dev/null
@@ -1,339 +0,0 @@
-
-"This software program is licensed subject to the GNU General Public License 
-(GPL). Version 2, June 1991, available at 
-<http://www.fsf.org/copyleft/gpl.html>"
-
-GNU General Public License 
-
-Version 2, June 1991
-
-Copyright (C) 1989, 1991 Free Software Foundation, Inc.  
-59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
-
-Everyone is permitted to copy and distribute verbatim copies of this license
-document, but changing it is not allowed.
-
-Preamble
-
-The licenses for most software are designed to take away your freedom to 
-share and change it. By contrast, the GNU General Public License is intended
-to guarantee your freedom to share and change free software--to make sure 
-the software is free for all its users. This General Public License applies 
-to most of the Free Software Foundation's software and to any other program 
-whose authors commit to using it. (Some other Free Software Foundation 
-software is covered by the GNU Library General Public License instead.) You 
-can apply it to your programs, too.
-
-When we speak of free software, we are referring to freedom, not price. Our
-General Public Licenses are designed to make sure that you have the freedom 
-to distribute copies of free software (and charge for this service if you 
-wish), that you receive source code or can get it if you want it, that you 
-can change the software or use pieces of it in new free programs; and that 
-you know you can do these things.
-
-To protect your rights, we need to make restrictions that forbid anyone to 
-deny you these rights or to ask you to surrender the rights. These 
-restrictions translate to certain responsibilities for you if you distribute
-copies of the software, or if you modify it.
-
-For example, if you distribute copies of such a program, whether gratis or 
-for a fee, you must give the recipients all the rights that you have. You 
-must make sure that they, too, receive or can get the source code. And you 
-must show them these terms so they know their rights.
- 
-We protect your rights with two steps: (1) copyright the software, and (2) 
-offer you this license which gives you legal permission to copy, distribute 
-and/or modify the software. 
-
-Also, for each author's protection and ours, we want to make certain that 
-everyone understands that there is no warranty for this free software. If 
-the software is modified by someone else and passed on, we want its 
-recipients to know that what they have is not the original, so that any 
-problems introduced by others will not reflect on the original authors' 
-reputations. 
-
-Finally, any free program is threatened constantly by software patents. We 
-wish to avoid the danger that redistributors of a free program will 
-individually obtain patent licenses, in effect making the program 
-proprietary. To prevent this, we have made it clear that any patent must be 
-licensed for everyone's free use or not licensed at all. 
-
-The precise terms and conditions for copying, distribution and modification 
-follow. 
-
-TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
-
-0. This License applies to any program or other work which contains a notice
-   placed by the copyright holder saying it may be distributed under the 
-   terms of this General Public License. The "Program", below, refers to any
-   such program or work, and a "work based on the Program" means either the 
-   Program or any derivative work under copyright law: that is to say, a 
-   work containing the Program or a portion of it, either verbatim or with 
-   modifications and/or translated into another language. (Hereinafter, 
-   translation is included without limitation in the term "modification".) 
-   Each licensee is addressed as "you". 
-
-   Activities other than copying, distribution and modification are not 
-   covered by this License; they are outside its scope. The act of running 
-   the Program is not restricted, and the output from the Program is covered 
-   only if its contents constitute a work based on the Program (independent 
-   of having been made by running the Program). Whether that is true depends
-   on what the Program does. 
-
-1. You may copy and distribute verbatim copies of the Program's source code 
-   as you receive it, in any medium, provided that you conspicuously and 
-   appropriately publish on each copy an appropriate copyright notice and 
-   disclaimer of warranty; keep intact all the notices that refer to this 
-   License and to the absence of any warranty; and give any other recipients 
-   of the Program a copy of this License along with the Program. 
-
-   You may charge a fee for the physical act of transferring a copy, and you 
-   may at your option offer warranty protection in exchange for a fee. 
-
-2. You may modify your copy or copies of the Program or any portion of it, 
-   thus forming a work based on the Program, and copy and distribute such 
-   modifications or work under the terms of Section 1 above, provided that 
-   you also meet all of these conditions: 
-
-   * a) You must cause the modified files to carry prominent notices stating 
-        that you changed the files and the date of any change. 
-
-   * b) You must cause any work that you distribute or publish, that in 
-        whole or in part contains or is derived from the Program or any part 
-        thereof, to be licensed as a whole at no charge to all third parties
-        under the terms of this License. 
-
-   * c) If the modified program normally reads commands interactively when 
-        run, you must cause it, when started running for such interactive 
-        use in the most ordinary way, to print or display an announcement 
-        including an appropriate copyright notice and a notice that there is
-        no warranty (or else, saying that you provide a warranty) and that 
-        users may redistribute the program under these conditions, and 
-        telling the user how to view a copy of this License. (Exception: if 
-        the Program itself is interactive but does not normally print such 
-        an announcement, your work based on the Program is not required to 
-        print an announcement.) 
-
-   These requirements apply to the modified work as a whole. If identifiable 
-   sections of that work are not derived from the Program, and can be 
-   reasonably considered independent and separate works in themselves, then 
-   this License, and its terms, do not apply to those sections when you 
-   distribute them as separate works. But when you distribute the same 
-   sections as part of a whole which is a work based on the Program, the 
-   distribution of the whole must be on the terms of this License, whose 
-   permissions for other licensees extend to the entire whole, and thus to 
-   each and every part regardless of who wrote it. 
-
-   Thus, it is not the intent of this section to claim rights or contest 
-   your rights to work written entirely by you; rather, the intent is to 
-   exercise the right to control the distribution of derivative or 
-   collective works based on the Program. 
-
-   In addition, mere aggregation of another work not based on the Program 
-   with the Program (or with a work based on the Program) on a volume of a 
-   storage or distribution medium does not bring the other work under the 
-   scope of this License. 
-
-3. You may copy and distribute the Program (or a work based on it, under 
-   Section 2) in object code or executable form under the terms of Sections 
-   1 and 2 above provided that you also do one of the following: 
-
-   * a) Accompany it with the complete corresponding machine-readable source 
-        code, which must be distributed under the terms of Sections 1 and 2 
-        above on a medium customarily used for software interchange; or, 
-
-   * b) Accompany it with a written offer, valid for at least three years, 
-        to give any third party, for a charge no more than your cost of 
-        physically performing source distribution, a complete machine-
-        readable copy of the corresponding source code, to be distributed 
-        under the terms of Sections 1 and 2 above on a medium customarily 
-        used for software interchange; or, 
-
-   * c) Accompany it with the information you received as to the offer to 
-        distribute corresponding source code. (This alternative is allowed 
-        only for noncommercial distribution and only if you received the 
-        program in object code or executable form with such an offer, in 
-        accord with Subsection b above.) 
-
-   The source code for a work means the preferred form of the work for 
-   making modifications to it. For an executable work, complete source code 
-   means all the source code for all modules it contains, plus any 
-   associated interface definition files, plus the scripts used to control 
-   compilation and installation of the executable. However, as a special 
-   exception, the source code distributed need not include anything that is 
-   normally distributed (in either source or binary form) with the major 
-   components (compiler, kernel, and so on) of the operating system on which
-   the executable runs, unless that component itself accompanies the 
-   executable. 
-
-   If distribution of executable or object code is made by offering access 
-   to copy from a designated place, then offering equivalent access to copy 
-   the source code from the same place counts as distribution of the source 
-   code, even though third parties are not compelled to copy the source 
-   along with the object code. 
-
-4. You may not copy, modify, sublicense, or distribute the Program except as
-   expressly provided under this License. Any attempt otherwise to copy, 
-   modify, sublicense or distribute the Program is void, and will 
-   automatically terminate your rights under this License. However, parties 
-   who have received copies, or rights, from you under this License will not
-   have their licenses terminated so long as such parties remain in full 
-   compliance. 
-
-5. You are not required to accept this License, since you have not signed 
-   it. However, nothing else grants you permission to modify or distribute 
-   the Program or its derivative works. These actions are prohibited by law 
-   if you do not accept this License. Therefore, by modifying or 
-   distributing the Program (or any work based on the Program), you 
-   indicate your acceptance of this License to do so, and all its terms and
-   conditions for copying, distributing or modifying the Program or works 
-   based on it. 
-
-6. Each time you redistribute the Program (or any work based on the 
-   Program), the recipient automatically receives a license from the 
-   original licensor to copy, distribute or modify the Program subject to 
-   these terms and conditions. You may not impose any further restrictions 
-   on the recipients' exercise of the rights granted herein. You are not 
-   responsible for enforcing compliance by third parties to this License. 
-
-7. If, as a consequence of a court judgment or allegation of patent 
-   infringement or for any other reason (not limited to patent issues), 
-   conditions are imposed on you (whether by court order, agreement or 
-   otherwise) that contradict the conditions of this License, they do not 
-   excuse you from the conditions of this License. If you cannot distribute 
-   so as to satisfy simultaneously your obligations under this License and 
-   any other pertinent obligations, then as a consequence you may not 
-   distribute the Program at all. For example, if a patent license would 
-   not permit royalty-free redistribution of the Program by all those who 
-   receive copies directly or indirectly through you, then the only way you 
-   could satisfy both it and this License would be to refrain entirely from 
-   distribution of the Program. 
-
-   If any portion of this section is held invalid or unenforceable under any
-   particular circumstance, the balance of the section is intended to apply
-   and the section as a whole is intended to apply in other circumstances. 
-
-   It is not the purpose of this section to induce you to infringe any 
-   patents or other property right claims or to contest validity of any 
-   such claims; this section has the sole purpose of protecting the 
-   integrity of the free software distribution system, which is implemented 
-   by public license practices. Many people have made generous contributions
-   to the wide range of software distributed through that system in 
-   reliance on consistent application of that system; it is up to the 
-   author/donor to decide if he or she is willing to distribute software 
-   through any other system and a licensee cannot impose that choice. 
-
-   This section is intended to make thoroughly clear what is believed to be 
-   a consequence of the rest of this License. 
-
-8. If the distribution and/or use of the Program is restricted in certain 
-   countries either by patents or by copyrighted interfaces, the original 
-   copyright holder who places the Program under this License may add an 
-   explicit geographical distribution limitation excluding those countries, 
-   so that distribution is permitted only in or among countries not thus 
-   excluded. In such case, this License incorporates the limitation as if 
-   written in the body of this License. 
-
-9. The Free Software Foundation may publish revised and/or new versions of 
-   the General Public License from time to time. Such new versions will be 
-   similar in spirit to the present version, but may differ in detail to 
-   address new problems or concerns. 
-
-   Each version is given a distinguishing version number. If the Program 
-   specifies a version number of this License which applies to it and "any 
-   later version", you have the option of following the terms and 
-   conditions either of that version or of any later version published by 
-   the Free Software Foundation. If the Program does not specify a version 
-   number of this License, you may choose any version ever published by the 
-   Free Software Foundation. 
-
-10. If you wish to incorporate parts of the Program into other free programs
-    whose distribution conditions are different, write to the author to ask 
-    for permission. For software which is copyrighted by the Free Software 
-    Foundation, write to the Free Software Foundation; we sometimes make 
-    exceptions for this. Our decision will be guided by the two goals of 
-    preserving the free status of all derivatives of our free software and 
-    of promoting the sharing and reuse of software generally. 
-
-   NO WARRANTY
-
-11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 
-    FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 
-    OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 
-    PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER 
-    EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
-    WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE 
-    ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH 
-    YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL 
-    NECESSARY SERVICING, REPAIR OR CORRECTION. 
-
-12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 
-    WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 
-    REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR 
-    DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL 
-    DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM 
-    (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED 
-    INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF 
-    THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR 
-    OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 
-
-END OF TERMS AND CONDITIONS
-
-How to Apply These Terms to Your New Programs
-
-If you develop a new program, and you want it to be of the greatest 
-possible use to the public, the best way to achieve this is to make it free 
-software which everyone can redistribute and change under these terms. 
-
-To do so, attach the following notices to the program. It is safest to 
-attach them to the start of each source file to most effectively convey the
-exclusion of warranty; and each file should have at least the "copyright" 
-line and a pointer to where the full notice is found. 
-
-one line to give the program's name and an idea of what it does.
-Copyright (C) yyyy  name of author
-
-This program is free software; you can redistribute it and/or modify it 
-under the terms of the GNU General Public License as published by the Free 
-Software Foundation; either version 2 of the License, or (at your option) 
-any later version.
-
-This program is distributed in the hope that it will be useful, but WITHOUT 
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
-FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for 
-more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 
-Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-
-Also add information on how to contact you by electronic and paper mail. 
-
-If the program is interactive, make it output a short notice like this when 
-it starts in an interactive mode: 
-
-Gnomovision version 69, Copyright (C) year name of author Gnomovision comes 
-with ABSOLUTELY NO WARRANTY; for details type 'show w'.  This is free 
-software, and you are welcome to redistribute it under certain conditions; 
-type 'show c' for details.
-
-The hypothetical commands 'show w' and 'show c' should show the appropriate 
-parts of the General Public License. Of course, the commands you use may be 
-called something other than 'show w' and 'show c'; they could even be 
-mouse-clicks or menu items--whatever suits your program. 
-
-You should also get your employer (if you work as a programmer) or your 
-school, if any, to sign a "copyright disclaimer" for the program, if 
-necessary. Here is a sample; alter the names: 
-
-Yoyodyne, Inc., hereby disclaims all copyright interest in the program 
-'Gnomovision' (which makes passes at compilers) written by James Hacker.
-
-signature of Ty Coon, 1 April 1989
-Ty Coon, President of Vice
-
-This General Public License does not permit incorporating your program into 
-proprietary programs. If your program is a subroutine library, you may 
-consider it more useful to permit linking proprietary applications with the 
-library. If this is what you want to do, use the GNU Library General Public 
-License instead of this License.
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe.h
index 222c2c71..59415469 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe.h
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.c
index 24015844..e17b7f18 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.c
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.h
index c6abb020..00a584f4 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.h
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c
index c6f4130d..30de47eb 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.h
index 02be92ab..41024400 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.h
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c
index ef7ce629..f00fe796 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.h
index a6ab30d2..98b74000 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.h
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c
index 93659ca0..88b33fa0 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.h
index 9bd6f534..6ae5926f 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.h
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_dcb.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_dcb.h
index a6690451..5e6f9ac9 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_dcb.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_dcb.h
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c
index 11472bd3..bc3cb2f4 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_fcoe.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_fcoe.h
index cad28622..48f7dcfc 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_fcoe.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_fcoe.h
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c
index 238028d0..d26016c9 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_mbx.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_mbx.h
index 124f00de..5ced84f8 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_mbx.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_mbx.h
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_osdep.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_osdep.h
index d161600b..c6f8e21f 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_osdep.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_osdep.h
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.c
index e3f5275e..234fa632 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.c
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.h
index bbe5a9e3..5ae171ac 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.h
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_sriov.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_sriov.h
deleted file mode 100644
index 5e3559fd..00000000
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_sriov.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*******************************************************************************
-
-  Intel 10 Gigabit PCI Express Linux driver
-  Copyright(c) 1999 - 2012 Intel Corporation.
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Contact Information:
-  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
-*******************************************************************************/
-
-
-#ifndef _IXGBE_SRIOV_H_
-#define _IXGBE_SRIOV_H_
-
-int ixgbe_set_vf_multicasts(struct ixgbe_adapter *adapter,
-			    int entries, u16 *hash_list, u32 vf);
-void ixgbe_restore_vf_multicasts(struct ixgbe_adapter *adapter);
-int ixgbe_set_vf_vlan(struct ixgbe_adapter *adapter, int add, int vid, u32 vf);
-void ixgbe_set_vmolr(struct ixgbe_hw *hw, u32 vf, bool aupe);
-void ixgbe_vf_reset_event(struct ixgbe_adapter *adapter, u32 vf);
-void ixgbe_vf_reset_msg(struct ixgbe_adapter *adapter, u32 vf);
-void ixgbe_msg_task(struct ixgbe_adapter *adapter);
-int ixgbe_set_vf_mac(struct ixgbe_adapter *adapter,
-		     int vf, unsigned char *mac_addr);
-void ixgbe_disable_tx_rx(struct ixgbe_adapter *adapter);
-void ixgbe_ping_all_vfs(struct ixgbe_adapter *adapter);
-#ifdef IFLA_VF_MAX
-int ixgbe_ndo_set_vf_mac(struct net_device *netdev, int queue, u8 *mac);
-int ixgbe_ndo_set_vf_vlan(struct net_device *netdev, int queue, u16 vlan,
-			  u8 qos);
-int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
-#ifdef HAVE_VF_SPOOFCHK_CONFIGURE
-int ixgbe_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting);
-#endif
-int ixgbe_ndo_get_vf_config(struct net_device *netdev,
-			    int vf, struct ifla_vf_info *ivi);
-#endif
-void ixgbe_disable_sriov(struct ixgbe_adapter *adapter);
-#ifdef CONFIG_PCI_IOV
-int ixgbe_vf_configuration(struct pci_dev *pdev, unsigned int event_mask);
-void ixgbe_enable_sriov(struct ixgbe_adapter *adapter);
-#endif
-int ixgbe_check_vf_assignment(struct ixgbe_adapter *adapter);
-#ifdef IFLA_VF_MAX
-void ixgbe_check_vf_rate_limit(struct ixgbe_adapter *adapter);
-#endif /* IFLA_VF_MAX */
-void ixgbe_dump_registers(struct ixgbe_adapter *adapter);
-
-/*
- * These are defined in ixgbe_type.h on behalf of the VF driver
- * but we need them here unwrapped for the PF driver.
- */
-#define IXGBE_DEV_ID_82599_VF			0x10ED
-#define IXGBE_DEV_ID_X540_VF			0x1515
-
-#endif /* _IXGBE_SRIOV_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_type.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_type.h
index 6b21c879..bda61fa4 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_type.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_type.h
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.c
index b99d9e84..2affe242 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.c
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.h
index 77e8952d..38bcc87b 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.h
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.c
index 5f2523ed..d84c7ccb 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.c
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h
index bf27579b..4c7a6408 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h
@@ -17,7 +17,7 @@
   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 
   The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
+  the file called "LICENSE.GPL".
 
   Contact Information:
   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
@@ -3140,4 +3140,16 @@ static inline int __kc_pci_vfs_assigned(struct pci_dev *dev)
 #define SET_ETHTOOL_OPS(netdev, ops) ((netdev)->ethtool_ops = (ops))
 #endif /* >= 3.16.0 */
 
+/*
+ * vlan_tx_tag_* macros renamed to skb_vlan_tag_* (Linux commit: df8a39defad4)
+ * For older kernels backported this commit, need to use renamed functions.
+ * This fix is specific to RedHat/CentOS kernels.
+ */
+#if (defined(RHEL_RELEASE_CODE) && \
+	RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 8) && \
+	LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 34))
+#define vlan_tx_tag_get skb_vlan_tag_get
+#define vlan_tx_tag_present skb_vlan_tag_present
+#endif
+
 #endif /* _KCOMPAT_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/kni_dev.h b/lib/librte_eal/linuxapp/kni/kni_dev.h
index a0e5cb6b..58cbadd3 100644
--- a/lib/librte_eal/linuxapp/kni/kni_dev.h
+++ b/lib/librte_eal/linuxapp/kni/kni_dev.h
@@ -25,6 +25,11 @@
 #ifndef _KNI_DEV_H_
 #define _KNI_DEV_H_
 
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/if.h>
 #include <linux/wait.h>
 #include <linux/sched.h>
@@ -39,10 +44,11 @@
 #include <exec-env/rte_kni_common.h>
 #define KNI_KTHREAD_RESCHEDULE_INTERVAL 5 /* us */
 
+#define MBUF_BURST_SZ 32
+
 /**
  * A structure describing the private information for a kni device.
  */
-
 struct kni_dev {
 	/* kni list */
 	struct list_head list;
@@ -50,7 +56,7 @@ struct kni_dev {
 	struct net_device_stats stats;
 	int status;
 	uint16_t group_id;           /* Group ID of a group of KNI devices */
-	unsigned core_id;            /* Core ID to bind */
+	uint32_t core_id;            /* Core ID to bind */
 	char name[RTE_KNI_NAMESIZE]; /* Network device name */
 	struct task_struct *pthread;
 
@@ -84,38 +90,36 @@ struct kni_dev {
 	/* response queue */
 	void *resp_q;
 
-	void * sync_kva;
+	void *sync_kva;
 	void *sync_va;
 
 	void *mbuf_kva;
 	void *mbuf_va;
 
 	/* mbuf size */
-	unsigned mbuf_size;
+	uint32_t mbuf_size;
 
 	/* synchro for request processing */
 	unsigned long synchro;
 
 #ifdef RTE_KNI_VHOST
-	struct kni_vhost_queue* vhost_queue;
+	struct kni_vhost_queue *vhost_queue;
+
 	volatile enum {
 		BE_STOP = 0x1,
 		BE_START = 0x2,
 		BE_FINISH = 0x4,
-	}vq_status;
+	} vq_status;
 #endif
+	/* buffers */
+	void *pa[MBUF_BURST_SZ];
+	void *va[MBUF_BURST_SZ];
+	void *alloc_pa[MBUF_BURST_SZ];
+	void *alloc_va[MBUF_BURST_SZ];
 };
 
-#define KNI_ERR(args...) printk(KERN_DEBUG "KNI: Error: " args)
-#define KNI_PRINT(args...) printk(KERN_DEBUG "KNI: " args)
-#ifdef RTE_KNI_KO_DEBUG
-	#define KNI_DBG(args...) printk(KERN_DEBUG "KNI: " args)
-#else
-	#define KNI_DBG(args...)
-#endif
-
 #ifdef RTE_KNI_VHOST
-unsigned int
+uint32_t
 kni_poll(struct file *file, struct socket *sock, poll_table * wait);
 int kni_chk_vhost_rx(struct kni_dev *kni);
 int kni_vhost_init(struct kni_dev *kni);
@@ -127,23 +131,22 @@ struct kni_vhost_queue {
 	int vnet_hdr_sz;
 	struct kni_dev *kni;
 	int sockfd;
-	unsigned int flags;
-	struct sk_buff* cache;
-	struct rte_kni_fifo* fifo;
+	uint32_t flags;
+	struct sk_buff *cache;
+	struct rte_kni_fifo *fifo;
 };
 
 #endif
 
-#ifdef RTE_KNI_VHOST_DEBUG_RX
-	#define KNI_DBG_RX(args...) printk(KERN_DEBUG "KNI RX: " args)
-#else
-	#define KNI_DBG_RX(args...)
-#endif
+void kni_net_rx(struct kni_dev *kni);
+void kni_net_init(struct net_device *dev);
+void kni_net_config_lo_mode(char *lo_str);
+void kni_net_poll_resp(struct kni_dev *kni);
+void kni_set_ethtool_ops(struct net_device *netdev);
 
-#ifdef RTE_KNI_VHOST_DEBUG_TX
-	#define KNI_DBG_TX(args...) printk(KERN_DEBUG "KNI TX: " args)
-#else
-	#define KNI_DBG_TX(args...)
-#endif
+int ixgbe_kni_probe(struct pci_dev *pdev, struct net_device **lad_dev);
+void ixgbe_kni_remove(struct pci_dev *pdev);
+int igb_kni_probe(struct pci_dev *pdev, struct net_device **lad_dev);
+void igb_kni_remove(struct pci_dev *pdev);
 
 #endif
diff --git a/lib/librte_eal/linuxapp/kni/kni_ethtool.c b/lib/librte_eal/linuxapp/kni/kni_ethtool.c
index 06b6d463..0c88589c 100644
--- a/lib/librte_eal/linuxapp/kni/kni_ethtool.c
+++ b/lib/librte_eal/linuxapp/kni/kni_ethtool.c
@@ -31,6 +31,7 @@ static int
 kni_check_if_running(struct net_device *dev)
 {
 	struct kni_dev *priv = netdev_priv(dev);
+
 	if (priv->lad_dev)
 		return 0;
 	else
@@ -41,6 +42,7 @@ static void
 kni_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
 {
 	struct kni_dev *priv = netdev_priv(dev);
+
 	priv->lad_dev->ethtool_ops->get_drvinfo(priv->lad_dev, info);
 }
 
@@ -48,6 +50,7 @@ static int
 kni_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
 {
 	struct kni_dev *priv = netdev_priv(dev);
+
 	return priv->lad_dev->ethtool_ops->get_settings(priv->lad_dev, ecmd);
 }
 
@@ -55,6 +58,7 @@ static int
 kni_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
 {
 	struct kni_dev *priv = netdev_priv(dev);
+
 	return priv->lad_dev->ethtool_ops->set_settings(priv->lad_dev, ecmd);
 }
 
@@ -62,6 +66,7 @@ static void
 kni_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 {
 	struct kni_dev *priv = netdev_priv(dev);
+
 	priv->lad_dev->ethtool_ops->get_wol(priv->lad_dev, wol);
 }
 
@@ -69,6 +74,7 @@ static int
 kni_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 {
 	struct kni_dev *priv = netdev_priv(dev);
+
 	return priv->lad_dev->ethtool_ops->set_wol(priv->lad_dev, wol);
 }
 
@@ -76,6 +82,7 @@ static int
 kni_nway_reset(struct net_device *dev)
 {
 	struct kni_dev *priv = netdev_priv(dev);
+
 	return priv->lad_dev->ethtool_ops->nway_reset(priv->lad_dev);
 }
 
@@ -83,6 +90,7 @@ static int
 kni_get_eeprom_len(struct net_device *dev)
 {
 	struct kni_dev *priv = netdev_priv(dev);
+
 	return priv->lad_dev->ethtool_ops->get_eeprom_len(priv->lad_dev);
 }
 
@@ -91,6 +99,7 @@ kni_get_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom,
 							u8 *bytes)
 {
 	struct kni_dev *priv = netdev_priv(dev);
+
 	return priv->lad_dev->ethtool_ops->get_eeprom(priv->lad_dev, eeprom,
 								bytes);
 }
@@ -100,6 +109,7 @@ kni_set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom,
 							u8 *bytes)
 {
 	struct kni_dev *priv = netdev_priv(dev);
+
 	return priv->lad_dev->ethtool_ops->set_eeprom(priv->lad_dev, eeprom,
 								bytes);
 }
@@ -108,6 +118,7 @@ static void
 kni_get_ringparam(struct net_device *dev, struct ethtool_ringparam *ring)
 {
 	struct kni_dev *priv = netdev_priv(dev);
+
 	priv->lad_dev->ethtool_ops->get_ringparam(priv->lad_dev, ring);
 }
 
@@ -115,6 +126,7 @@ static int
 kni_set_ringparam(struct net_device *dev, struct ethtool_ringparam *ring)
 {
 	struct kni_dev *priv = netdev_priv(dev);
+
 	return priv->lad_dev->ethtool_ops->set_ringparam(priv->lad_dev, ring);
 }
 
@@ -122,6 +134,7 @@ static void
 kni_get_pauseparam(struct net_device *dev, struct ethtool_pauseparam *pause)
 {
 	struct kni_dev *priv = netdev_priv(dev);
+
 	priv->lad_dev->ethtool_ops->get_pauseparam(priv->lad_dev, pause);
 }
 
@@ -129,6 +142,7 @@ static int
 kni_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam *pause)
 {
 	struct kni_dev *priv = netdev_priv(dev);
+
 	return priv->lad_dev->ethtool_ops->set_pauseparam(priv->lad_dev,
 								pause);
 }
@@ -137,6 +151,7 @@ static u32
 kni_get_msglevel(struct net_device *dev)
 {
 	struct kni_dev *priv = netdev_priv(dev);
+
 	return priv->lad_dev->ethtool_ops->get_msglevel(priv->lad_dev);
 }
 
@@ -144,6 +159,7 @@ static void
 kni_set_msglevel(struct net_device *dev, u32 data)
 {
 	struct kni_dev *priv = netdev_priv(dev);
+
 	priv->lad_dev->ethtool_ops->set_msglevel(priv->lad_dev, data);
 }
 
@@ -151,6 +167,7 @@ static int
 kni_get_regs_len(struct net_device *dev)
 {
 	struct kni_dev *priv = netdev_priv(dev);
+
 	return priv->lad_dev->ethtool_ops->get_regs_len(priv->lad_dev);
 }
 
@@ -158,6 +175,7 @@ static void
 kni_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *p)
 {
 	struct kni_dev *priv = netdev_priv(dev);
+
 	priv->lad_dev->ethtool_ops->get_regs(priv->lad_dev, regs, p);
 }
 
@@ -165,6 +183,7 @@ static void
 kni_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 {
 	struct kni_dev *priv = netdev_priv(dev);
+
 	priv->lad_dev->ethtool_ops->get_strings(priv->lad_dev, stringset,
 								data);
 }
@@ -173,6 +192,7 @@ static int
 kni_get_sset_count(struct net_device *dev, int sset)
 {
 	struct kni_dev *priv = netdev_priv(dev);
+
 	return priv->lad_dev->ethtool_ops->get_sset_count(priv->lad_dev, sset);
 }
 
@@ -181,24 +201,25 @@ kni_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats,
 								u64 *data)
 {
 	struct kni_dev *priv = netdev_priv(dev);
+
 	priv->lad_dev->ethtool_ops->get_ethtool_stats(priv->lad_dev, stats,
 								data);
 }
 
 struct ethtool_ops kni_ethtool_ops = {
-	.begin 				= kni_check_if_running,
+	.begin			= kni_check_if_running,
 	.get_drvinfo		= kni_get_drvinfo,
 	.get_settings		= kni_get_settings,
 	.set_settings		= kni_set_settings,
 	.get_regs_len		= kni_get_regs_len,
-	.get_regs			= kni_get_regs,
-	.get_wol			= kni_get_wol,
-	.set_wol			= kni_set_wol,
-	.nway_reset			= kni_nway_reset,
-	.get_link			= ethtool_op_get_link,
+	.get_regs		= kni_get_regs,
+	.get_wol		= kni_get_wol,
+	.set_wol		= kni_set_wol,
+	.nway_reset		= kni_nway_reset,
+	.get_link		= ethtool_op_get_link,
 	.get_eeprom_len		= kni_get_eeprom_len,
-	.get_eeprom			= kni_get_eeprom,
-	.set_eeprom			= kni_set_eeprom,
+	.get_eeprom		= kni_get_eeprom,
+	.set_eeprom		= kni_set_eeprom,
 	.get_ringparam		= kni_get_ringparam,
 	.set_ringparam		= kni_set_ringparam,
 	.get_pauseparam		= kni_get_pauseparam,
@@ -207,7 +228,7 @@ struct ethtool_ops kni_ethtool_ops = {
 	.set_msglevel		= kni_set_msglevel,
 	.get_strings		= kni_get_strings,
 	.get_sset_count		= kni_get_sset_count,
-	.get_ethtool_stats  = kni_get_ethtool_stats,
+	.get_ethtool_stats	= kni_get_ethtool_stats,
 };
 
 void
diff --git a/lib/librte_eal/linuxapp/kni/kni_fifo.h b/lib/librte_eal/linuxapp/kni/kni_fifo.h
index 3ea750e2..025ec1c9 100644
--- a/lib/librte_eal/linuxapp/kni/kni_fifo.h
+++ b/lib/librte_eal/linuxapp/kni/kni_fifo.h
@@ -30,13 +30,13 @@
 /**
  * Adds num elements into the fifo. Return the number actually written
  */
-static inline unsigned
-kni_fifo_put(struct rte_kni_fifo *fifo, void **data, unsigned num)
+static inline uint32_t
+kni_fifo_put(struct rte_kni_fifo *fifo, void **data, uint32_t num)
 {
-	unsigned i = 0;
-	unsigned fifo_write = fifo->write;
-	unsigned fifo_read = fifo->read;
-	unsigned new_write = fifo_write;
+	uint32_t i = 0;
+	uint32_t fifo_write = fifo->write;
+	uint32_t fifo_read = fifo->read;
+	uint32_t new_write = fifo_write;
 
 	for (i = 0; i < num; i++) {
 		new_write = (new_write + 1) & (fifo->len - 1);
@@ -54,12 +54,12 @@ kni_fifo_put(struct rte_kni_fifo *fifo, void **data, unsigned num)
 /**
  * Get up to num elements from the fifo. Return the number actully read
  */
-static inline unsigned
-kni_fifo_get(struct rte_kni_fifo *fifo, void **data, unsigned num)
+static inline uint32_t
+kni_fifo_get(struct rte_kni_fifo *fifo, void **data, uint32_t num)
 {
-	unsigned i = 0;
-	unsigned new_read = fifo->read;
-	unsigned fifo_write = fifo->write;
+	uint32_t i = 0;
+	uint32_t new_read = fifo->read;
+	uint32_t fifo_write = fifo->write;
 
 	for (i = 0; i < num; i++) {
 		if (new_read == fifo_write)
@@ -76,16 +76,16 @@ kni_fifo_get(struct rte_kni_fifo *fifo, void **data, unsigned num)
 /**
  * Get the num of elements in the fifo
  */
-static inline unsigned
+static inline uint32_t
 kni_fifo_count(struct rte_kni_fifo *fifo)
 {
-	return (fifo->len + fifo->write - fifo->read) & ( fifo->len - 1);
+	return (fifo->len + fifo->write - fifo->read) & (fifo->len - 1);
 }
 
 /**
  * Get the num of available elements in the fifo
  */
-static inline unsigned
+static inline uint32_t
 kni_fifo_free_count(struct rte_kni_fifo *fifo)
 {
 	return (fifo->read - fifo->write - 1) & (fifo->len - 1);
@@ -96,7 +96,7 @@ kni_fifo_free_count(struct rte_kni_fifo *fifo)
  * Initializes the kni fifo structure
  */
 static inline void
-kni_fifo_init(struct rte_kni_fifo *fifo, unsigned size)
+kni_fifo_init(struct rte_kni_fifo *fifo, uint32_t size)
 {
 	fifo->write = 0;
 	fifo->read = 0;
diff --git a/lib/librte_eal/linuxapp/kni/kni_misc.c b/lib/librte_eal/linuxapp/kni/kni_misc.c
index 59d15ca6..497db9bd 100644
--- a/lib/librte_eal/linuxapp/kni/kni_misc.c
+++ b/lib/librte_eal/linuxapp/kni/kni_misc.c
@@ -30,6 +30,7 @@
 #include <linux/pci.h>
 #include <linux/kthread.h>
 #include <linux/rwsem.h>
+#include <linux/mutex.h>
 #include <linux/nsproxy.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
@@ -47,52 +48,15 @@ MODULE_DESCRIPTION("Kernel Module for managing kni devices");
 
 #define KNI_MAX_DEVICES 32
 
-extern void kni_net_rx(struct kni_dev *kni);
-extern void kni_net_init(struct net_device *dev);
-extern void kni_net_config_lo_mode(char *lo_str);
-extern void kni_net_poll_resp(struct kni_dev *kni);
-extern void kni_set_ethtool_ops(struct net_device *netdev);
-
-extern int ixgbe_kni_probe(struct pci_dev *pdev, struct net_device **lad_dev);
-extern void ixgbe_kni_remove(struct pci_dev *pdev);
-extern int igb_kni_probe(struct pci_dev *pdev, struct net_device **lad_dev);
-extern void igb_kni_remove(struct pci_dev *pdev);
-
-static int kni_open(struct inode *inode, struct file *file);
-static int kni_release(struct inode *inode, struct file *file);
-static int kni_ioctl(struct inode *inode, unsigned int ioctl_num,
-					unsigned long ioctl_param);
-static int kni_compat_ioctl(struct inode *inode, unsigned int ioctl_num,
-						unsigned long ioctl_param);
-static int kni_dev_remove(struct kni_dev *dev);
-
-static int __init kni_parse_kthread_mode(void);
-
-/* KNI processing for single kernel thread mode */
-static int kni_thread_single(void *unused);
-/* KNI processing for multiple kernel thread mode */
-static int kni_thread_multiple(void *param);
-
-static struct file_operations kni_fops = {
-	.owner = THIS_MODULE,
-	.open = kni_open,
-	.release = kni_release,
-	.unlocked_ioctl = (void *)kni_ioctl,
-	.compat_ioctl = (void *)kni_compat_ioctl,
-};
-
-static struct miscdevice kni_misc = {
-	.minor = MISC_DYNAMIC_MINOR,
-	.name = KNI_DEVICE,
-	.fops = &kni_fops,
-};
+extern const struct pci_device_id ixgbe_pci_tbl[];
+extern const struct pci_device_id igb_pci_tbl[];
 
 /* loopback mode */
-static char *lo_mode = NULL;
+static char *lo_mode;
 
 /* Kernel thread mode */
-static char *kthread_mode = NULL;
-static unsigned multiple_kthread_on = 0;
+static char *kthread_mode;
+static uint32_t multiple_kthread_on;
 
 #define KNI_DEV_IN_USE_BIT_NUM 0 /* Bit number for device in use */
 
@@ -100,20 +64,24 @@ static int kni_net_id;
 
 struct kni_net {
 	unsigned long device_in_use; /* device in use flag */
+	struct mutex kni_kthread_lock;
 	struct task_struct *kni_kthread;
 	struct rw_semaphore kni_list_lock;
 	struct list_head kni_list_head;
 };
 
-static int __net_init kni_init_net(struct net *net)
+static int __net_init
+kni_init_net(struct net *net)
 {
 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
 	struct kni_net *knet = net_generic(net, kni_net_id);
+
+	memset(knet, 0, sizeof(*knet));
 #else
 	struct kni_net *knet;
 	int ret;
 
-	knet = kmalloc(sizeof(struct kni_net), GFP_KERNEL);
+	knet = kzalloc(sizeof(struct kni_net), GFP_KERNEL);
 	if (!knet) {
 		ret = -ENOMEM;
 		return ret;
@@ -123,6 +91,8 @@ static int __net_init kni_init_net(struct net *net)
 	/* Clear the bit of device in use */
 	clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use);
 
+	mutex_init(&knet->kni_kthread_lock);
+
 	init_rwsem(&knet->kni_list_lock);
 	INIT_LIST_HEAD(&knet->kni_list_head);
 
@@ -137,11 +107,15 @@ static int __net_init kni_init_net(struct net *net)
 #endif
 }
 
-static void __net_exit kni_exit_net(struct net *net)
+static void __net_exit
+kni_exit_net(struct net *net)
 {
-#ifndef HAVE_SIMPLIFIED_PERNET_OPERATIONS
-	struct kni_net *knet = net_generic(net, kni_net_id);
+	struct kni_net *knet __maybe_unused;
+
+	knet = net_generic(net, kni_net_id);
+	mutex_destroy(&knet->kni_kthread_lock);
 
+#ifndef HAVE_SIMPLIFIED_PERNET_OPERATIONS
 	kfree(knet);
 #endif
 }
@@ -155,72 +129,56 @@ static struct pernet_operations kni_net_ops = {
 #endif
 };
 
-static int __init
-kni_init(void)
+static int
+kni_thread_single(void *data)
 {
-	int rc;
-
-	KNI_PRINT("######## DPDK kni module loading ########\n");
-
-	if (kni_parse_kthread_mode() < 0) {
-		KNI_ERR("Invalid parameter for kthread_mode\n");
-		return -EINVAL;
-	}
+	struct kni_net *knet = data;
+	int j;
+	struct kni_dev *dev;
 
-#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
-	rc = register_pernet_subsys(&kni_net_ops);
+	while (!kthread_should_stop()) {
+		down_read(&knet->kni_list_lock);
+		for (j = 0; j < KNI_RX_LOOP_NUM; j++) {
+			list_for_each_entry(dev, &knet->kni_list_head, list) {
+#ifdef RTE_KNI_VHOST
+				kni_chk_vhost_rx(dev);
 #else
-	rc = register_pernet_gen_subsys(&kni_net_id, &kni_net_ops);
+				kni_net_rx(dev);
+#endif
+				kni_net_poll_resp(dev);
+			}
+		}
+		up_read(&knet->kni_list_lock);
+#ifdef RTE_KNI_PREEMPT_DEFAULT
+		/* reschedule out for a while */
+		schedule_timeout_interruptible(
+			usecs_to_jiffies(KNI_KTHREAD_RESCHEDULE_INTERVAL));
 #endif
-	if (rc)
-		return -EPERM;
-
-	rc = misc_register(&kni_misc);
-	if (rc != 0) {
-		KNI_ERR("Misc registration failed\n");
-		goto out;
 	}
 
-	/* Configure the lo mode according to the input parameter */
-	kni_net_config_lo_mode(lo_mode);
-
-	KNI_PRINT("######## DPDK kni module loaded  ########\n");
-
 	return 0;
-
-out:
-#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
-	unregister_pernet_subsys(&kni_net_ops);
-#else
-	register_pernet_gen_subsys(&kni_net_id, &kni_net_ops);
-#endif
-	return rc;
 }
 
-static void __exit
-kni_exit(void)
+static int
+kni_thread_multiple(void *param)
 {
-	misc_deregister(&kni_misc);
-#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
-	unregister_pernet_subsys(&kni_net_ops);
+	int j;
+	struct kni_dev *dev = (struct kni_dev *)param;
+
+	while (!kthread_should_stop()) {
+		for (j = 0; j < KNI_RX_LOOP_NUM; j++) {
+#ifdef RTE_KNI_VHOST
+			kni_chk_vhost_rx(dev);
 #else
-	register_pernet_gen_subsys(&kni_net_id, &kni_net_ops);
+			kni_net_rx(dev);
 #endif
-	KNI_PRINT("####### DPDK kni module unloaded  #######\n");
-}
-
-static int __init
-kni_parse_kthread_mode(void)
-{
-	if (!kthread_mode)
-		return 0;
-
-	if (strcmp(kthread_mode, "single") == 0)
-		return 0;
-	else if (strcmp(kthread_mode, "multiple") == 0)
-		multiple_kthread_on = 1;
-	else
-		return -1;
+			kni_net_poll_resp(dev);
+		}
+#ifdef RTE_KNI_PREEMPT_DEFAULT
+		schedule_timeout_interruptible(
+			usecs_to_jiffies(KNI_KTHREAD_RESCHEDULE_INTERVAL));
+#endif
+	}
 
 	return 0;
 }
@@ -235,21 +193,29 @@ kni_open(struct inode *inode, struct file *file)
 	if (test_and_set_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use))
 		return -EBUSY;
 
-	/* Create kernel thread for single mode */
-	if (multiple_kthread_on == 0) {
-		KNI_PRINT("Single kernel thread for all KNI devices\n");
-		/* Create kernel thread for RX */
-		knet->kni_kthread = kthread_run(kni_thread_single, (void *)knet,
-						"kni_single");
-		if (IS_ERR(knet->kni_kthread)) {
-			KNI_ERR("Unable to create kernel threaed\n");
-			return PTR_ERR(knet->kni_kthread);
-		}
-	} else
-		KNI_PRINT("Multiple kernel thread mode enabled\n");
-
 	file->private_data = get_net(net);
-	KNI_PRINT("/dev/kni opened\n");
+	pr_debug("/dev/kni opened\n");
+
+	return 0;
+}
+
+static int
+kni_dev_remove(struct kni_dev *dev)
+{
+	if (!dev)
+		return -ENODEV;
+
+	if (dev->pci_dev) {
+		if (pci_match_id(ixgbe_pci_tbl, dev->pci_dev))
+			ixgbe_kni_remove(dev->pci_dev);
+		else if (pci_match_id(igb_pci_tbl, dev->pci_dev))
+			igb_kni_remove(dev->pci_dev);
+	}
+
+	if (dev->net_dev) {
+		unregister_netdev(dev->net_dev);
+		free_netdev(dev->net_dev);
+	}
 
 	return 0;
 }
@@ -263,9 +229,13 @@ kni_release(struct inode *inode, struct file *file)
 
 	/* Stop kernel thread for single mode */
 	if (multiple_kthread_on == 0) {
+		mutex_lock(&knet->kni_kthread_lock);
 		/* Stop kernel thread */
-		kthread_stop(knet->kni_kthread);
-		knet->kni_kthread = NULL;
+		if (knet->kni_kthread != NULL) {
+			kthread_stop(knet->kni_kthread);
+			knet->kni_kthread = NULL;
+		}
+		mutex_unlock(&knet->kni_kthread_lock);
 	}
 
 	down_write(&knet->kni_list_lock);
@@ -288,110 +258,70 @@ kni_release(struct inode *inode, struct file *file)
 	clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use);
 
 	put_net(net);
-	KNI_PRINT("/dev/kni closed\n");
+	pr_debug("/dev/kni closed\n");
 
 	return 0;
 }
 
 static int
-kni_thread_single(void *data)
+kni_check_param(struct kni_dev *kni, struct rte_kni_device_info *dev)
 {
-	struct kni_net *knet = data;
-	int j;
-	struct kni_dev *dev;
+	if (!kni || !dev)
+		return -1;
 
-	while (!kthread_should_stop()) {
-		down_read(&knet->kni_list_lock);
-		for (j = 0; j < KNI_RX_LOOP_NUM; j++) {
-			list_for_each_entry(dev, &knet->kni_list_head, list) {
-#ifdef RTE_KNI_VHOST
-				kni_chk_vhost_rx(dev);
-#else
-				kni_net_rx(dev);
-#endif
-				kni_net_poll_resp(dev);
-			}
-		}
-		up_read(&knet->kni_list_lock);
-#ifdef RTE_KNI_PREEMPT_DEFAULT
-		/* reschedule out for a while */
-		schedule_timeout_interruptible(usecs_to_jiffies( \
-				KNI_KTHREAD_RESCHEDULE_INTERVAL));
-#endif
+	/* Check if network name has been used */
+	if (!strncmp(kni->name, dev->name, RTE_KNI_NAMESIZE)) {
+		pr_err("KNI name %s duplicated\n", dev->name);
+		return -1;
 	}
 
 	return 0;
 }
 
 static int
-kni_thread_multiple(void *param)
+kni_run_thread(struct kni_net *knet, struct kni_dev *kni, uint8_t force_bind)
 {
-	int j;
-	struct kni_dev *dev = (struct kni_dev *)param;
-
-	while (!kthread_should_stop()) {
-		for (j = 0; j < KNI_RX_LOOP_NUM; j++) {
-#ifdef RTE_KNI_VHOST
-			kni_chk_vhost_rx(dev);
-#else
-			kni_net_rx(dev);
-#endif
-			kni_net_poll_resp(dev);
+	/**
+	 * Create a new kernel thread for multiple mode, set its core affinity,
+	 * and finally wake it up.
+	 */
+	if (multiple_kthread_on) {
+		kni->pthread = kthread_create(kni_thread_multiple,
+			(void *)kni, "kni_%s", kni->name);
+		if (IS_ERR(kni->pthread)) {
+			kni_dev_remove(kni);
+			return -ECANCELED;
 		}
-#ifdef RTE_KNI_PREEMPT_DEFAULT
-		schedule_timeout_interruptible(usecs_to_jiffies( \
-				KNI_KTHREAD_RESCHEDULE_INTERVAL));
-#endif
-	}
-
-	return 0;
-}
-
-static int
-kni_dev_remove(struct kni_dev *dev)
-{
-	if (!dev)
-		return -ENODEV;
-
-	switch (dev->device_id) {
-	#define RTE_PCI_DEV_ID_DECL_IGB(vend, dev) case (dev):
-	#include <rte_pci_dev_ids.h>
-		igb_kni_remove(dev->pci_dev);
-		break;
-	#define RTE_PCI_DEV_ID_DECL_IXGBE(vend, dev) case (dev):
-	#include <rte_pci_dev_ids.h>
-		ixgbe_kni_remove(dev->pci_dev);
-		break;
-	default:
-		break;
-	}
-
-	if (dev->net_dev) {
-		unregister_netdev(dev->net_dev);
-		free_netdev(dev->net_dev);
-	}
 
-	return 0;
-}
+		if (force_bind)
+			kthread_bind(kni->pthread, kni->core_id);
+		wake_up_process(kni->pthread);
+	} else {
+		mutex_lock(&knet->kni_kthread_lock);
+
+		if (knet->kni_kthread == NULL) {
+			knet->kni_kthread = kthread_create(kni_thread_single,
+				(void *)knet, "kni_single");
+			if (IS_ERR(knet->kni_kthread)) {
+				mutex_unlock(&knet->kni_kthread_lock);
+				kni_dev_remove(kni);
+				return -ECANCELED;
+			}
 
-static int
-kni_check_param(struct kni_dev *kni, struct rte_kni_device_info *dev)
-{
-	if (!kni || !dev)
-		return -1;
+			if (force_bind)
+				kthread_bind(knet->kni_kthread, kni->core_id);
+			wake_up_process(knet->kni_kthread);
+		}
 
-	/* Check if network name has been used */
-	if (!strncmp(kni->name, dev->name, RTE_KNI_NAMESIZE)) {
-		KNI_ERR("KNI name %s duplicated\n", dev->name);
-		return -1;
+		mutex_unlock(&knet->kni_kthread_lock);
 	}
 
 	return 0;
 }
 
 static int
-kni_ioctl_create(struct net *net,
-		unsigned int ioctl_num, unsigned long ioctl_param)
+kni_ioctl_create(struct net *net, uint32_t ioctl_num,
+		unsigned long ioctl_param)
 {
 	struct kni_net *knet = net_generic(net, kni_net_id);
 	int ret;
@@ -402,7 +332,7 @@ kni_ioctl_create(struct net *net,
 	struct net_device *lad_dev = NULL;
 	struct kni_dev *kni, *dev, *n;
 
-	printk(KERN_INFO "KNI: Creating kni...\n");
+	pr_info("Creating kni...\n");
 	/* Check the buffer size, to avoid warning */
 	if (_IOC_SIZE(ioctl_num) > sizeof(dev_info))
 		return -EINVAL;
@@ -410,17 +340,15 @@ kni_ioctl_create(struct net *net,
 	/* Copy kni info from user space */
 	ret = copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info));
 	if (ret) {
-		KNI_ERR("copy_from_user in kni_ioctl_create");
+		pr_err("copy_from_user in kni_ioctl_create");
 		return -EIO;
 	}
 
 	/**
-	 * Check if the cpu core id is valid for binding,
-	 * for multiple kernel thread mode.
+	 * Check if the cpu core id is valid for binding.
 	 */
-	if (multiple_kthread_on && dev_info.force_bind &&
-				!cpu_online(dev_info.core_id)) {
-		KNI_ERR("cpu %u is not online\n", dev_info.core_id);
+	if (dev_info.force_bind && !cpu_online(dev_info.core_id)) {
+		pr_err("cpu %u is not online\n", dev_info.core_id);
 		return -EINVAL;
 	}
 
@@ -440,7 +368,7 @@ kni_ioctl_create(struct net *net,
 #endif
 							kni_net_init);
 	if (net_dev == NULL) {
-		KNI_ERR("error allocating device \"%s\"\n", dev_info.name);
+		pr_err("error allocating device \"%s\"\n", dev_info.name);
 		return -EBUSY;
 	}
 
@@ -464,33 +392,27 @@ kni_ioctl_create(struct net *net,
 	kni->sync_va = dev_info.sync_va;
 	kni->sync_kva = phys_to_virt(dev_info.sync_phys);
 
-	kni->mbuf_kva = phys_to_virt(dev_info.mbuf_phys);
-	kni->mbuf_va = dev_info.mbuf_va;
-
 #ifdef RTE_KNI_VHOST
 	kni->vhost_queue = NULL;
 	kni->vq_status = BE_STOP;
 #endif
 	kni->mbuf_size = dev_info.mbuf_size;
 
-	KNI_PRINT("tx_phys:      0x%016llx, tx_q addr:      0x%p\n",
+	pr_debug("tx_phys:      0x%016llx, tx_q addr:      0x%p\n",
 		(unsigned long long) dev_info.tx_phys, kni->tx_q);
-	KNI_PRINT("rx_phys:      0x%016llx, rx_q addr:      0x%p\n",
+	pr_debug("rx_phys:      0x%016llx, rx_q addr:      0x%p\n",
 		(unsigned long long) dev_info.rx_phys, kni->rx_q);
-	KNI_PRINT("alloc_phys:   0x%016llx, alloc_q addr:   0x%p\n",
+	pr_debug("alloc_phys:   0x%016llx, alloc_q addr:   0x%p\n",
 		(unsigned long long) dev_info.alloc_phys, kni->alloc_q);
-	KNI_PRINT("free_phys:    0x%016llx, free_q addr:    0x%p\n",
+	pr_debug("free_phys:    0x%016llx, free_q addr:    0x%p\n",
 		(unsigned long long) dev_info.free_phys, kni->free_q);
-	KNI_PRINT("req_phys:     0x%016llx, req_q addr:     0x%p\n",
+	pr_debug("req_phys:     0x%016llx, req_q addr:     0x%p\n",
 		(unsigned long long) dev_info.req_phys, kni->req_q);
-	KNI_PRINT("resp_phys:    0x%016llx, resp_q addr:    0x%p\n",
+	pr_debug("resp_phys:    0x%016llx, resp_q addr:    0x%p\n",
 		(unsigned long long) dev_info.resp_phys, kni->resp_q);
-	KNI_PRINT("mbuf_phys:    0x%016llx, mbuf_kva:       0x%p\n",
-		(unsigned long long) dev_info.mbuf_phys, kni->mbuf_kva);
-	KNI_PRINT("mbuf_va:      0x%p\n", dev_info.mbuf_va);
-	KNI_PRINT("mbuf_size:    %u\n", kni->mbuf_size);
+	pr_debug("mbuf_size:    %u\n", kni->mbuf_size);
 
-	KNI_DBG("PCI: %02x:%02x.%02x %04x:%04x\n",
+	pr_debug("PCI: %02x:%02x.%02x %04x:%04x\n",
 					dev_info.bus,
 					dev_info.devid,
 					dev_info.function,
@@ -501,7 +423,7 @@ kni_ioctl_create(struct net *net,
 
 	/* Support Ethtool */
 	while (pci) {
-		KNI_PRINT("pci_bus: %02x:%02x:%02x \n",
+		pr_debug("pci_bus: %02x:%02x:%02x\n",
 					pci->bus->number,
 					PCI_SLOT(pci->devfn),
 					PCI_FUNC(pci->devfn));
@@ -510,28 +432,21 @@ kni_ioctl_create(struct net *net,
 			(PCI_SLOT(pci->devfn) == dev_info.devid) &&
 			(PCI_FUNC(pci->devfn) == dev_info.function)) {
 			found_pci = pci;
-			switch (dev_info.device_id) {
-			#define RTE_PCI_DEV_ID_DECL_IGB(vend, dev) case (dev):
-			#include <rte_pci_dev_ids.h>
-				ret = igb_kni_probe(found_pci, &lad_dev);
-				break;
-			#define RTE_PCI_DEV_ID_DECL_IXGBE(vend, dev) \
-							case (dev):
-			#include <rte_pci_dev_ids.h>
+
+			if (pci_match_id(ixgbe_pci_tbl, found_pci))
 				ret = ixgbe_kni_probe(found_pci, &lad_dev);
-				break;
-			default:
+			else if (pci_match_id(igb_pci_tbl, found_pci))
+				ret = igb_kni_probe(found_pci, &lad_dev);
+			else
 				ret = -1;
-				break;
-			}
 
-			KNI_DBG("PCI found: pci=0x%p, lad_dev=0x%p\n",
+			pr_debug("PCI found: pci=0x%p, lad_dev=0x%p\n",
 							pci, lad_dev);
 			if (ret == 0) {
 				kni->lad_dev = lad_dev;
 				kni_set_ethtool_ops(kni->net_dev);
 			} else {
-				KNI_ERR("Device not supported by ethtool");
+				pr_err("Device not supported by ethtool");
 				kni->lad_dev = NULL;
 			}
 
@@ -546,7 +461,7 @@ kni_ioctl_create(struct net *net,
 		pci_dev_put(pci);
 
 	if (kni->lad_dev)
-		memcpy(net_dev->dev_addr, kni->lad_dev->dev_addr, ETH_ALEN);
+		ether_addr_copy(net_dev->dev_addr, kni->lad_dev->dev_addr);
 	else
 		/*
 		 * Generate random mac address. eth_random_addr() is the newer
@@ -556,9 +471,11 @@ kni_ioctl_create(struct net *net,
 
 	ret = register_netdev(net_dev);
 	if (ret) {
-		KNI_ERR("error %i registering device \"%s\"\n",
+		pr_err("error %i registering device \"%s\"\n",
 					ret, dev_info.name);
+		kni->net_dev = NULL;
 		kni_dev_remove(kni);
+		free_netdev(net_dev);
 		return -ENODEV;
 	}
 
@@ -566,22 +483,9 @@ kni_ioctl_create(struct net *net,
 	kni_vhost_init(kni);
 #endif
 
-	/**
-	 * Create a new kernel thread for multiple mode, set its core affinity,
-	 * and finally wake it up.
-	 */
-	if (multiple_kthread_on) {
-		kni->pthread = kthread_create(kni_thread_multiple,
-					      (void *)kni,
-					      "kni_%s", kni->name);
-		if (IS_ERR(kni->pthread)) {
-			kni_dev_remove(kni);
-			return -ECANCELED;
-		}
-		if (dev_info.force_bind)
-			kthread_bind(kni->pthread, kni->core_id);
-		wake_up_process(kni->pthread);
-	}
+	ret = kni_run_thread(knet, kni, dev_info.force_bind);
+	if (ret != 0)
+		return ret;
 
 	down_write(&knet->kni_list_lock);
 	list_add(&kni->list, &knet->kni_list_head);
@@ -591,8 +495,8 @@ kni_ioctl_create(struct net *net,
 }
 
 static int
-kni_ioctl_release(struct net *net,
-		unsigned int ioctl_num, unsigned long ioctl_param)
+kni_ioctl_release(struct net *net, uint32_t ioctl_num,
+		unsigned long ioctl_param)
 {
 	struct kni_net *knet = net_generic(net, kni_net_id);
 	int ret = -EINVAL;
@@ -600,11 +504,11 @@ kni_ioctl_release(struct net *net,
 	struct rte_kni_device_info dev_info;
 
 	if (_IOC_SIZE(ioctl_num) > sizeof(dev_info))
-			return -EINVAL;
+		return -EINVAL;
 
 	ret = copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info));
 	if (ret) {
-		KNI_ERR("copy_from_user in kni_ioctl_release");
+		pr_err("copy_from_user in kni_ioctl_release");
 		return -EIO;
 	}
 
@@ -631,21 +535,19 @@ kni_ioctl_release(struct net *net,
 		break;
 	}
 	up_write(&knet->kni_list_lock);
-	printk(KERN_INFO "KNI: %s release kni named %s\n",
+	pr_info("%s release kni named %s\n",
 		(ret == 0 ? "Successfully" : "Unsuccessfully"), dev_info.name);
 
 	return ret;
 }
 
 static int
-kni_ioctl(struct inode *inode,
-	unsigned int ioctl_num,
-	unsigned long ioctl_param)
+kni_ioctl(struct inode *inode, uint32_t ioctl_num, unsigned long ioctl_param)
 {
 	int ret = -EINVAL;
 	struct net *net = current->nsproxy->net_ns;
 
-	KNI_DBG("IOCTL num=0x%0x param=0x%0lx\n", ioctl_num, ioctl_param);
+	pr_debug("IOCTL num=0x%0x param=0x%0lx\n", ioctl_num, ioctl_param);
 
 	/*
 	 * Switch according to the ioctl called
@@ -661,7 +563,7 @@ kni_ioctl(struct inode *inode,
 		ret = kni_ioctl_release(net, ioctl_num, ioctl_param);
 		break;
 	default:
-		KNI_DBG("IOCTL default\n");
+		pr_debug("IOCTL default\n");
 		break;
 	}
 
@@ -669,16 +571,99 @@ kni_ioctl(struct inode *inode,
 }
 
 static int
-kni_compat_ioctl(struct inode *inode,
-		unsigned int ioctl_num,
+kni_compat_ioctl(struct inode *inode, uint32_t ioctl_num,
 		unsigned long ioctl_param)
 {
 	/* 32 bits app on 64 bits OS to be supported later */
-	KNI_PRINT("Not implemented.\n");
+	pr_debug("Not implemented.\n");
 
 	return -EINVAL;
 }
 
+static const struct file_operations kni_fops = {
+	.owner = THIS_MODULE,
+	.open = kni_open,
+	.release = kni_release,
+	.unlocked_ioctl = (void *)kni_ioctl,
+	.compat_ioctl = (void *)kni_compat_ioctl,
+};
+
+static struct miscdevice kni_misc = {
+	.minor = MISC_DYNAMIC_MINOR,
+	.name = KNI_DEVICE,
+	.fops = &kni_fops,
+};
+
+static int __init
+kni_parse_kthread_mode(void)
+{
+	if (!kthread_mode)
+		return 0;
+
+	if (strcmp(kthread_mode, "single") == 0)
+		return 0;
+	else if (strcmp(kthread_mode, "multiple") == 0)
+		multiple_kthread_on = 1;
+	else
+		return -1;
+
+	return 0;
+}
+
+static int __init
+kni_init(void)
+{
+	int rc;
+
+	if (kni_parse_kthread_mode() < 0) {
+		pr_err("Invalid parameter for kthread_mode\n");
+		return -EINVAL;
+	}
+
+	if (multiple_kthread_on == 0)
+		pr_debug("Single kernel thread for all KNI devices\n");
+	else
+		pr_debug("Multiple kernel thread mode enabled\n");
+
+#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
+	rc = register_pernet_subsys(&kni_net_ops);
+#else
+	rc = register_pernet_gen_subsys(&kni_net_id, &kni_net_ops);
+#endif
+	if (rc)
+		return -EPERM;
+
+	rc = misc_register(&kni_misc);
+	if (rc != 0) {
+		pr_err("Misc registration failed\n");
+		goto out;
+	}
+
+	/* Configure the lo mode according to the input parameter */
+	kni_net_config_lo_mode(lo_mode);
+
+	return 0;
+
+out:
+#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
+	unregister_pernet_subsys(&kni_net_ops);
+#else
+	unregister_pernet_gen_subsys(kni_net_id, &kni_net_ops);
+#endif
+	return rc;
+}
+
+static void __exit
+kni_exit(void)
+{
+	misc_deregister(&kni_misc);
+#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
+	unregister_pernet_subsys(&kni_net_ops);
+#else
+	unregister_pernet_gen_subsys(kni_net_id, &kni_net_ops);
+#endif
+}
+
 module_init(kni_init);
 module_exit(kni_exit);
 
diff --git a/lib/librte_eal/linuxapp/kni/kni_net.c b/lib/librte_eal/linuxapp/kni/kni_net.c
index fc82193a..4ac99cfe 100644
--- a/lib/librte_eal/linuxapp/kni/kni_net.c
+++ b/lib/librte_eal/linuxapp/kni/kni_net.c
@@ -44,23 +44,103 @@
 
 #define WD_TIMEOUT 5 /*jiffies */
 
-#define MBUF_BURST_SZ 32
-
 #define KNI_WAIT_RESPONSE_TIMEOUT 300 /* 3 seconds */
 
 /* typedef for rx function */
 typedef void (*kni_net_rx_t)(struct kni_dev *kni);
 
-static int kni_net_tx(struct sk_buff *skb, struct net_device *dev);
 static void kni_net_rx_normal(struct kni_dev *kni);
-static void kni_net_rx_lo_fifo(struct kni_dev *kni);
-static void kni_net_rx_lo_fifo_skb(struct kni_dev *kni);
-static int kni_net_process_request(struct kni_dev *kni,
-			struct rte_kni_request *req);
 
 /* kni rx function pointer, with default to normal rx */
 static kni_net_rx_t kni_net_rx_func = kni_net_rx_normal;
 
+/* physical address to kernel virtual address */
+static void *
+pa2kva(void *pa)
+{
+	return phys_to_virt((unsigned long)pa);
+}
+
+/* physical address to virtual address */
+static void *
+pa2va(void *pa, struct rte_kni_mbuf *m)
+{
+	void *va;
+
+	va = (void *)((unsigned long)pa +
+			(unsigned long)m->buf_addr -
+			(unsigned long)m->buf_physaddr);
+	return va;
+}
+
+/* mbuf data kernel virtual address from mbuf kernel virtual address */
+static void *
+kva2data_kva(struct rte_kni_mbuf *m)
+{
+	return phys_to_virt(m->buf_physaddr + m->data_off);
+}
+
+/* virtual address to physical address */
+static void *
+va2pa(void *va, struct rte_kni_mbuf *m)
+{
+	void *pa;
+
+	pa = (void *)((unsigned long)va -
+			((unsigned long)m->buf_addr -
+			 (unsigned long)m->buf_physaddr));
+	return pa;
+}
+
+/*
+ * It can be called to process the request.
+ */
+static int
+kni_net_process_request(struct kni_dev *kni, struct rte_kni_request *req)
+{
+	int ret = -1;
+	void *resp_va;
+	uint32_t num;
+	int ret_val;
+
+	if (!kni || !req) {
+		pr_err("No kni instance or request\n");
+		return -EINVAL;
+	}
+
+	mutex_lock(&kni->sync_lock);
+
+	/* Construct data */
+	memcpy(kni->sync_kva, req, sizeof(struct rte_kni_request));
+	num = kni_fifo_put(kni->req_q, &kni->sync_va, 1);
+	if (num < 1) {
+		pr_err("Cannot send to req_q\n");
+		ret = -EBUSY;
+		goto fail;
+	}
+
+	ret_val = wait_event_interruptible_timeout(kni->wq,
+			kni_fifo_count(kni->resp_q), 3 * HZ);
+	if (signal_pending(current) || ret_val <= 0) {
+		ret = -ETIME;
+		goto fail;
+	}
+	num = kni_fifo_get(kni->resp_q, (void **)&resp_va, 1);
+	if (num != 1 || resp_va != kni->sync_va) {
+		/* This should never happen */
+		pr_err("No data in resp_q\n");
+		ret = -ENODATA;
+		goto fail;
+	}
+
+	memcpy(req, kni->sync_kva, sizeof(struct rte_kni_request));
+	ret = 0;
+
+fail:
+	mutex_unlock(&kni->sync_lock);
+	return ret;
+}
+
 /*
  * Open and close
  */
@@ -116,18 +196,112 @@ kni_net_config(struct net_device *dev, struct ifmap *map)
 }
 
 /*
+ * Transmit a packet (called by the kernel)
+ */
+#ifdef RTE_KNI_VHOST
+static int
+kni_net_tx(struct sk_buff *skb, struct net_device *dev)
+{
+	struct kni_dev *kni = netdev_priv(dev);
+
+	dev_kfree_skb(skb);
+	kni->stats.tx_dropped++;
+
+	return NETDEV_TX_OK;
+}
+#else
+static int
+kni_net_tx(struct sk_buff *skb, struct net_device *dev)
+{
+	int len = 0;
+	uint32_t ret;
+	struct kni_dev *kni = netdev_priv(dev);
+	struct rte_kni_mbuf *pkt_kva = NULL;
+	void *pkt_pa = NULL;
+	void *pkt_va = NULL;
+
+	/* save the timestamp */
+#ifdef HAVE_TRANS_START_HELPER
+	netif_trans_update(dev);
+#else
+	dev->trans_start = jiffies;
+#endif
+
+	/* Check if the length of skb is less than mbuf size */
+	if (skb->len > kni->mbuf_size)
+		goto drop;
+
+	/**
+	 * Check if it has at least one free entry in tx_q and
+	 * one entry in alloc_q.
+	 */
+	if (kni_fifo_free_count(kni->tx_q) == 0 ||
+			kni_fifo_count(kni->alloc_q) == 0) {
+		/**
+		 * If no free entry in tx_q or no entry in alloc_q,
+		 * drops skb and goes out.
+		 */
+		goto drop;
+	}
+
+	/* dequeue a mbuf from alloc_q */
+	ret = kni_fifo_get(kni->alloc_q, &pkt_pa, 1);
+	if (likely(ret == 1)) {
+		void *data_kva;
+
+		pkt_kva = pa2kva(pkt_pa);
+		data_kva = kva2data_kva(pkt_kva);
+		pkt_va = pa2va(pkt_pa, pkt_kva);
+
+		len = skb->len;
+		memcpy(data_kva, skb->data, len);
+		if (unlikely(len < ETH_ZLEN)) {
+			memset(data_kva + len, 0, ETH_ZLEN - len);
+			len = ETH_ZLEN;
+		}
+		pkt_kva->pkt_len = len;
+		pkt_kva->data_len = len;
+
+		/* enqueue mbuf into tx_q */
+		ret = kni_fifo_put(kni->tx_q, &pkt_va, 1);
+		if (unlikely(ret != 1)) {
+			/* Failing should not happen */
+			pr_err("Fail to enqueue mbuf into tx_q\n");
+			goto drop;
+		}
+	} else {
+		/* Failing should not happen */
+		pr_err("Fail to dequeue mbuf from alloc_q\n");
+		goto drop;
+	}
+
+	/* Free skb and update statistics */
+	dev_kfree_skb(skb);
+	kni->stats.tx_bytes += len;
+	kni->stats.tx_packets++;
+
+	return NETDEV_TX_OK;
+
+drop:
+	/* Free skb and update statistics */
+	dev_kfree_skb(skb);
+	kni->stats.tx_dropped++;
+
+	return NETDEV_TX_OK;
+}
+#endif
+
+/*
  * RX: normal working mode
  */
 static void
 kni_net_rx_normal(struct kni_dev *kni)
 {
-	unsigned ret;
+	uint32_t ret;
 	uint32_t len;
-	unsigned i, num_rx, num_fq;
+	uint32_t i, num_rx, num_fq;
 	struct rte_kni_mbuf *kva;
-	struct rte_kni_mbuf *va[MBUF_BURST_SZ];
-	void * data_kva;
-
+	void *data_kva;
 	struct sk_buff *skb;
 	struct net_device *dev = kni->net_dev;
 
@@ -139,24 +313,22 @@ kni_net_rx_normal(struct kni_dev *kni)
 	}
 
 	/* Calculate the number of entries to dequeue from rx_q */
-	num_rx = min(num_fq, (unsigned)MBUF_BURST_SZ);
+	num_rx = min_t(uint32_t, num_fq, MBUF_BURST_SZ);
 
 	/* Burst dequeue from rx_q */
-	num_rx = kni_fifo_get(kni->rx_q, (void **)va, num_rx);
+	num_rx = kni_fifo_get(kni->rx_q, kni->pa, num_rx);
 	if (num_rx == 0)
 		return;
 
 	/* Transfer received packets to netif */
 	for (i = 0; i < num_rx; i++) {
-		kva = (void *)va[i] - kni->mbuf_va + kni->mbuf_kva;
+		kva = pa2kva(kni->pa[i]);
 		len = kva->pkt_len;
-
-		data_kva = kva->buf_addr + kva->data_off - kni->mbuf_va
-				+ kni->mbuf_kva;
+		data_kva = kva2data_kva(kva);
+		kni->va[i] = pa2va(kni->pa[i], kva);
 
 		skb = dev_alloc_skb(len + 2);
 		if (!skb) {
-			KNI_ERR("Out of mem, dropping pkts\n");
 			/* Update statistics */
 			kni->stats.rx_dropped++;
 			continue;
@@ -178,9 +350,8 @@ kni_net_rx_normal(struct kni_dev *kni)
 				if (!kva->next)
 					break;
 
-				kva = kva->next - kni->mbuf_va + kni->mbuf_kva;
-				data_kva = kva->buf_addr + kva->data_off
-					- kni->mbuf_va + kni->mbuf_kva;
+				kva = pa2kva(va2pa(kva->next, kva));
+				data_kva = kva2data_kva(kva);
 			}
 		}
 
@@ -197,10 +368,10 @@ kni_net_rx_normal(struct kni_dev *kni)
 	}
 
 	/* Burst enqueue mbufs into free_q */
-	ret = kni_fifo_put(kni->free_q, (void **)va, num_rx);
+	ret = kni_fifo_put(kni->free_q, kni->va, num_rx);
 	if (ret != num_rx)
 		/* Failing should not happen */
-		KNI_ERR("Fail to enqueue entries into free_q\n");
+		pr_err("Fail to enqueue entries into free_q\n");
 }
 
 /*
@@ -209,15 +380,12 @@ kni_net_rx_normal(struct kni_dev *kni)
 static void
 kni_net_rx_lo_fifo(struct kni_dev *kni)
 {
-	unsigned ret;
+	uint32_t ret;
 	uint32_t len;
-	unsigned i, num, num_rq, num_tq, num_aq, num_fq;
+	uint32_t i, num, num_rq, num_tq, num_aq, num_fq;
 	struct rte_kni_mbuf *kva;
-	struct rte_kni_mbuf *va[MBUF_BURST_SZ];
-	void * data_kva;
-
+	void *data_kva;
 	struct rte_kni_mbuf *alloc_kva;
-	struct rte_kni_mbuf *alloc_va[MBUF_BURST_SZ];
 	void *alloc_data_kva;
 
 	/* Get the number of entries in rx_q */
@@ -236,33 +404,32 @@ kni_net_rx_lo_fifo(struct kni_dev *kni)
 	num = min(num_rq, num_tq);
 	num = min(num, num_aq);
 	num = min(num, num_fq);
-	num = min(num, (unsigned)MBUF_BURST_SZ);
+	num = min_t(uint32_t, num, MBUF_BURST_SZ);
 
 	/* Return if no entry to dequeue from rx_q */
 	if (num == 0)
 		return;
 
 	/* Burst dequeue from rx_q */
-	ret = kni_fifo_get(kni->rx_q, (void **)va, num);
+	ret = kni_fifo_get(kni->rx_q, kni->pa, num);
 	if (ret == 0)
 		return; /* Failing should not happen */
 
 	/* Dequeue entries from alloc_q */
-	ret = kni_fifo_get(kni->alloc_q, (void **)alloc_va, num);
+	ret = kni_fifo_get(kni->alloc_q, kni->alloc_pa, num);
 	if (ret) {
 		num = ret;
 		/* Copy mbufs */
 		for (i = 0; i < num; i++) {
-			kva = (void *)va[i] - kni->mbuf_va + kni->mbuf_kva;
+			kva = pa2kva(kni->pa[i]);
 			len = kva->pkt_len;
-			data_kva = kva->buf_addr + kva->data_off -
-					kni->mbuf_va + kni->mbuf_kva;
-
-			alloc_kva = (void *)alloc_va[i] - kni->mbuf_va +
-							kni->mbuf_kva;
-			alloc_data_kva = alloc_kva->buf_addr +
-					alloc_kva->data_off - kni->mbuf_va +
-							kni->mbuf_kva;
+			data_kva = kva2data_kva(kva);
+			kni->va[i] = pa2va(kni->pa[i], kva);
+
+			alloc_kva = pa2kva(kni->alloc_pa[i]);
+			alloc_data_kva = kva2data_kva(alloc_kva);
+			kni->alloc_va[i] = pa2va(kni->alloc_pa[i], alloc_kva);
+
 			memcpy(alloc_data_kva, data_kva, len);
 			alloc_kva->pkt_len = len;
 			alloc_kva->data_len = len;
@@ -272,17 +439,17 @@ kni_net_rx_lo_fifo(struct kni_dev *kni)
 		}
 
 		/* Burst enqueue mbufs into tx_q */
-		ret = kni_fifo_put(kni->tx_q, (void **)alloc_va, num);
+		ret = kni_fifo_put(kni->tx_q, kni->alloc_va, num);
 		if (ret != num)
 			/* Failing should not happen */
-			KNI_ERR("Fail to enqueue mbufs into tx_q\n");
+			pr_err("Fail to enqueue mbufs into tx_q\n");
 	}
 
 	/* Burst enqueue mbufs into free_q */
-	ret = kni_fifo_put(kni->free_q, (void **)va, num);
+	ret = kni_fifo_put(kni->free_q, kni->va, num);
 	if (ret != num)
 		/* Failing should not happen */
-		KNI_ERR("Fail to enqueue mbufs into free_q\n");
+		pr_err("Fail to enqueue mbufs into free_q\n");
 
 	/**
 	 * Update statistic, and enqueue/dequeue failure is impossible,
@@ -298,13 +465,11 @@ kni_net_rx_lo_fifo(struct kni_dev *kni)
 static void
 kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
 {
-	unsigned ret;
+	uint32_t ret;
 	uint32_t len;
-	unsigned i, num_rq, num_fq, num;
+	uint32_t i, num_rq, num_fq, num;
 	struct rte_kni_mbuf *kva;
-	struct rte_kni_mbuf *va[MBUF_BURST_SZ];
-	void * data_kva;
-
+	void *data_kva;
 	struct sk_buff *skb;
 	struct net_device *dev = kni->net_dev;
 
@@ -316,28 +481,26 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
 
 	/* Calculate the number of entries to dequeue from rx_q */
 	num = min(num_rq, num_fq);
-	num = min(num, (unsigned)MBUF_BURST_SZ);
+	num = min_t(uint32_t, num, MBUF_BURST_SZ);
 
 	/* Return if no entry to dequeue from rx_q */
 	if (num == 0)
 		return;
 
 	/* Burst dequeue mbufs from rx_q */
-	ret = kni_fifo_get(kni->rx_q, (void **)va, num);
+	ret = kni_fifo_get(kni->rx_q, kni->pa, num);
 	if (ret == 0)
 		return;
 
 	/* Copy mbufs to sk buffer and then call tx interface */
 	for (i = 0; i < num; i++) {
-		kva = (void *)va[i] - kni->mbuf_va + kni->mbuf_kva;
+		kva = pa2kva(kni->pa[i]);
 		len = kva->pkt_len;
-		data_kva = kva->buf_addr + kva->data_off - kni->mbuf_va +
-				kni->mbuf_kva;
+		data_kva = kva2data_kva(kva);
+		kni->va[i] = pa2va(kni->pa[i], kva);
 
 		skb = dev_alloc_skb(len + 2);
-		if (skb == NULL)
-			KNI_ERR("Out of mem, dropping pkts\n");
-		else {
+		if (skb) {
 			/* Align IP on 16B boundary */
 			skb_reserve(skb, 2);
 			memcpy(skb_put(skb, len), data_kva, len);
@@ -349,7 +512,6 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
 		/* Simulate real usage, allocate/copy skb twice */
 		skb = dev_alloc_skb(len + 2);
 		if (skb == NULL) {
-			KNI_ERR("Out of mem, dropping pkts\n");
 			kni->stats.rx_dropped++;
 			continue;
 		}
@@ -370,9 +532,8 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
 				if (!kva->next)
 					break;
 
-				kva = kva->next - kni->mbuf_va + kni->mbuf_kva;
-				data_kva = kva->buf_addr + kva->data_off
-					- kni->mbuf_va + kni->mbuf_kva;
+				kva = pa2kva(va2pa(kva->next, kva));
+				data_kva = kva2data_kva(kva);
 			}
 		}
 
@@ -387,10 +548,10 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
 	}
 
 	/* enqueue all the mbufs from rx_q into free_q */
-	ret = kni_fifo_put(kni->free_q, (void **)&va, num);
+	ret = kni_fifo_put(kni->free_q, kni->va, num);
 	if (ret != num)
 		/* Failing should not happen */
-		KNI_ERR("Fail to enqueue mbufs into free_q\n");
+		pr_err("Fail to enqueue mbufs into free_q\n");
 }
 
 /* rx interface */
@@ -405,114 +566,18 @@ kni_net_rx(struct kni_dev *kni)
 }
 
 /*
- * Transmit a packet (called by the kernel)
- */
-#ifdef RTE_KNI_VHOST
-static int
-kni_net_tx(struct sk_buff *skb, struct net_device *dev)
-{
-	struct kni_dev *kni = netdev_priv(dev);
-
-	dev_kfree_skb(skb);
-	kni->stats.tx_dropped++;
-
-	return NETDEV_TX_OK;
-}
-#else
-static int
-kni_net_tx(struct sk_buff *skb, struct net_device *dev)
-{
-	int len = 0;
-	unsigned ret;
-	struct kni_dev *kni = netdev_priv(dev);
-	struct rte_kni_mbuf *pkt_kva = NULL;
-	struct rte_kni_mbuf *pkt_va = NULL;
-
-	/* save the timestamp */
-#ifdef HAVE_TRANS_START_HELPER
-	netif_trans_update(dev);
-#else
-	dev->trans_start = jiffies;
-#endif
-
-	/* Check if the length of skb is less than mbuf size */
-	if (skb->len > kni->mbuf_size)
-		goto drop;
-
-	/**
-	 * Check if it has at least one free entry in tx_q and
-	 * one entry in alloc_q.
-	 */
-	if (kni_fifo_free_count(kni->tx_q) == 0 ||
-			kni_fifo_count(kni->alloc_q) == 0) {
-		/**
-		 * If no free entry in tx_q or no entry in alloc_q,
-		 * drops skb and goes out.
-		 */
-		goto drop;
-	}
-
-	/* dequeue a mbuf from alloc_q */
-	ret = kni_fifo_get(kni->alloc_q, (void **)&pkt_va, 1);
-	if (likely(ret == 1)) {
-		void *data_kva;
-
-		pkt_kva = (void *)pkt_va - kni->mbuf_va + kni->mbuf_kva;
-		data_kva = pkt_kva->buf_addr + pkt_kva->data_off - kni->mbuf_va
-				+ kni->mbuf_kva;
-
-		len = skb->len;
-		memcpy(data_kva, skb->data, len);
-		if (unlikely(len < ETH_ZLEN)) {
-			memset(data_kva + len, 0, ETH_ZLEN - len);
-			len = ETH_ZLEN;
-		}
-		pkt_kva->pkt_len = len;
-		pkt_kva->data_len = len;
-
-		/* enqueue mbuf into tx_q */
-		ret = kni_fifo_put(kni->tx_q, (void **)&pkt_va, 1);
-		if (unlikely(ret != 1)) {
-			/* Failing should not happen */
-			KNI_ERR("Fail to enqueue mbuf into tx_q\n");
-			goto drop;
-		}
-	} else {
-		/* Failing should not happen */
-		KNI_ERR("Fail to dequeue mbuf from alloc_q\n");
-		goto drop;
-	}
-
-	/* Free skb and update statistics */
-	dev_kfree_skb(skb);
-	kni->stats.tx_bytes += len;
-	kni->stats.tx_packets++;
-
-	return NETDEV_TX_OK;
-
-drop:
-	/* Free skb and update statistics */
-	dev_kfree_skb(skb);
-	kni->stats.tx_dropped++;
-
-	return NETDEV_TX_OK;
-}
-#endif
-
-/*
  * Deal with a transmit timeout.
  */
 static void
-kni_net_tx_timeout (struct net_device *dev)
+kni_net_tx_timeout(struct net_device *dev)
 {
 	struct kni_dev *kni = netdev_priv(dev);
 
-	KNI_DBG("Transmit timeout at %ld, latency %ld\n", jiffies,
-			jiffies - dev->trans_start);
+	pr_debug("Transmit timeout at %ld, latency %ld\n", jiffies,
+			jiffies - dev_trans_start(dev));
 
 	kni->stats.tx_errors++;
 	netif_wake_queue(dev);
-	return;
 }
 
 /*
@@ -521,8 +586,8 @@ kni_net_tx_timeout (struct net_device *dev)
 static int
 kni_net_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 {
-	KNI_DBG("kni_net_ioctl %d\n",
-		((struct kni_dev *)netdev_priv(dev))->group_id);
+	pr_debug("kni_net_ioctl group:%d cmd:%d\n",
+		((struct kni_dev *)netdev_priv(dev))->group_id, cmd);
 
 	return 0;
 }
@@ -539,7 +604,7 @@ kni_net_change_mtu(struct net_device *dev, int new_mtu)
 	struct rte_kni_request req;
 	struct kni_dev *kni = netdev_priv(dev);
 
-	KNI_DBG("kni_net_change_mtu new mtu %d to be set\n", new_mtu);
+	pr_debug("kni_net_change_mtu new mtu %d to be set\n", new_mtu);
 
 	memset(&req, 0, sizeof(req));
 	req.req_id = RTE_KNI_REQ_CHANGE_MTU;
@@ -562,61 +627,13 @@ kni_net_poll_resp(struct kni_dev *kni)
 }
 
 /*
- * It can be called to process the request.
- */
-static int
-kni_net_process_request(struct kni_dev *kni, struct rte_kni_request *req)
-{
-	int ret = -1;
-	void *resp_va;
-	unsigned num;
-	int ret_val;
-
-	if (!kni || !req) {
-		KNI_ERR("No kni instance or request\n");
-		return -EINVAL;
-	}
-
-	mutex_lock(&kni->sync_lock);
-
-	/* Construct data */
-	memcpy(kni->sync_kva, req, sizeof(struct rte_kni_request));
-	num = kni_fifo_put(kni->req_q, &kni->sync_va, 1);
-	if (num < 1) {
-		KNI_ERR("Cannot send to req_q\n");
-		ret = -EBUSY;
-		goto fail;
-	}
-
-	ret_val = wait_event_interruptible_timeout(kni->wq,
-			kni_fifo_count(kni->resp_q), 3 * HZ);
-	if (signal_pending(current) || ret_val <= 0) {
-		ret = -ETIME;
-		goto fail;
-	}
-	num = kni_fifo_get(kni->resp_q, (void **)&resp_va, 1);
-	if (num != 1 || resp_va != kni->sync_va) {
-		/* This should never happen */
-		KNI_ERR("No data in resp_q\n");
-		ret = -ENODATA;
-		goto fail;
-	}
-
-	memcpy(req, kni->sync_kva, sizeof(struct rte_kni_request));
-	ret = 0;
-
-fail:
-	mutex_unlock(&kni->sync_lock);
-	return ret;
-}
-
-/*
  * Return statistics to the caller
  */
 static struct net_device_stats *
 kni_net_stats(struct net_device *dev)
 {
 	struct kni_dev *kni = netdev_priv(dev);
+
 	return &kni->stats;
 }
 
@@ -626,7 +643,7 @@ kni_net_stats(struct net_device *dev)
 static int
 kni_net_header(struct sk_buff *skb, struct net_device *dev,
 		unsigned short type, const void *daddr,
-		const void *saddr, unsigned int len)
+		const void *saddr, uint32_t len)
 {
 	struct ethhdr *eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
 
@@ -637,7 +654,6 @@ kni_net_header(struct sk_buff *skb, struct net_device *dev,
 	return dev->hard_header_len;
 }
 
-
 /*
  * Re-fill the eth header
  */
@@ -662,9 +678,11 @@ kni_net_rebuild_header(struct sk_buff *skb)
  *
  * Returns 0 on success, negative on failure
  **/
-static int kni_net_set_mac(struct net_device *netdev, void *p)
+static int
+kni_net_set_mac(struct net_device *netdev, void *p)
 {
 	struct sockaddr *addr = p;
+
 	if (!is_valid_ether_addr((unsigned char *)(addr->sa_data)))
 		return -EADDRNOTAVAIL;
 	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
@@ -672,7 +690,8 @@ static int kni_net_set_mac(struct net_device *netdev, void *p)
 }
 
 #ifdef HAVE_CHANGE_CARRIER_CB
-static int kni_net_change_carrier(struct net_device *dev, bool new_carrier)
+static int
+kni_net_change_carrier(struct net_device *dev, bool new_carrier)
 {
 	if (new_carrier)
 		netif_carrier_on(dev);
@@ -711,8 +730,6 @@ kni_net_init(struct net_device *dev)
 {
 	struct kni_dev *kni = netdev_priv(dev);
 
-	KNI_DBG("kni_net_init\n");
-
 	init_waitqueue_head(&kni->wq);
 	mutex_init(&kni->sync_lock);
 
@@ -726,18 +743,18 @@ void
 kni_net_config_lo_mode(char *lo_str)
 {
 	if (!lo_str) {
-		KNI_PRINT("loopback disabled");
+		pr_debug("loopback disabled");
 		return;
 	}
 
 	if (!strcmp(lo_str, "lo_mode_none"))
-		KNI_PRINT("loopback disabled");
+		pr_debug("loopback disabled");
 	else if (!strcmp(lo_str, "lo_mode_fifo")) {
-		KNI_PRINT("loopback mode=lo_mode_fifo enabled");
+		pr_debug("loopback mode=lo_mode_fifo enabled");
 		kni_net_rx_func = kni_net_rx_lo_fifo;
 	} else if (!strcmp(lo_str, "lo_mode_fifo_skb")) {
-		KNI_PRINT("loopback mode=lo_mode_fifo_skb enabled");
+		pr_debug("loopback mode=lo_mode_fifo_skb enabled");
 		kni_net_rx_func = kni_net_rx_lo_fifo_skb;
 	} else
-		KNI_PRINT("Incognizant parameter, loopback disabled");
+		pr_debug("Incognizant parameter, loopback disabled");
 }
diff --git a/lib/librte_eal/linuxapp/kni/kni_vhost.c b/lib/librte_eal/linuxapp/kni/kni_vhost.c
index a3ca8499..f54c34b1 100644
--- a/lib/librte_eal/linuxapp/kni/kni_vhost.c
+++ b/lib/librte_eal/linuxapp/kni/kni_vhost.c
@@ -32,6 +32,7 @@
 #include <linux/sched.h>
 #include <linux/if_tun.h>
 #include <linux/version.h>
+#include <linux/file.h>
 
 #include "compat.h"
 #include "kni_dev.h"
@@ -39,21 +40,12 @@
 
 #define RX_BURST_SZ 4
 
-extern void put_unused_fd(unsigned int fd);
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,7,0)
-extern struct file*
-sock_alloc_file(struct socket *sock,
-		int flags, const char *dname);
-
-extern int get_unused_fd_flags(unsigned flags);
-
-extern void fd_install(unsigned int fd, struct file *file);
-
+#ifdef HAVE_STATIC_SOCK_MAP_FD
 static int kni_sock_map_fd(struct socket *sock)
 {
 	struct file *file;
 	int fd = get_unused_fd_flags(0);
+
 	if (fd < 0)
 		return fd;
 
@@ -65,8 +57,6 @@ static int kni_sock_map_fd(struct socket *sock)
 	fd_install(fd, file);
 	return fd;
 }
-#else
-#define kni_sock_map_fd(s)             sock_map_fd(s, 0)
 #endif
 
 static struct proto kni_raw_proto = {
@@ -77,13 +67,13 @@ static struct proto kni_raw_proto = {
 
 static inline int
 kni_vhost_net_tx(struct kni_dev *kni, struct msghdr *m,
-		 unsigned offset, unsigned len)
+		 uint32_t offset, uint32_t len)
 {
 	struct rte_kni_mbuf *pkt_kva = NULL;
 	struct rte_kni_mbuf *pkt_va = NULL;
 	int ret;
 
-	KNI_DBG_TX("tx offset=%d, len=%d, iovlen=%d\n",
+	pr_debug("tx offset=%d, len=%d, iovlen=%d\n",
 #ifdef HAVE_IOV_ITER_MSGHDR
 		   offset, len, (int)m->msg_iter.iov->iov_len);
 #else
@@ -110,7 +100,7 @@ kni_vhost_net_tx(struct kni_dev *kni, struct msghdr *m,
 
 		pkt_kva = (void *)pkt_va - kni->mbuf_va + kni->mbuf_kva;
 		data_kva = pkt_kva->buf_addr + pkt_kva->data_off
-		           - kni->mbuf_va + kni->mbuf_kva;
+			- kni->mbuf_va + kni->mbuf_kva;
 
 #ifdef HAVE_IOV_ITER_MSGHDR
 		copy_from_iter(data_kva, len, &m->msg_iter);
@@ -129,12 +119,12 @@ kni_vhost_net_tx(struct kni_dev *kni, struct msghdr *m,
 		ret = kni_fifo_put(kni->tx_q, (void **)&pkt_va, 1);
 		if (unlikely(ret != 1)) {
 			/* Failing should not happen */
-			KNI_ERR("Fail to enqueue mbuf into tx_q\n");
+			pr_err("Fail to enqueue mbuf into tx_q\n");
 			goto drop;
 		}
 	} else {
 		/* Failing should not happen */
-		KNI_ERR("Fail to dequeue mbuf from alloc_q\n");
+		pr_err("Fail to dequeue mbuf from alloc_q\n");
 		goto drop;
 	}
 
@@ -153,12 +143,12 @@ drop:
 
 static inline int
 kni_vhost_net_rx(struct kni_dev *kni, struct msghdr *m,
-		 unsigned offset, unsigned len)
+		 uint32_t offset, uint32_t len)
 {
 	uint32_t pkt_len;
 	struct rte_kni_mbuf *kva;
 	struct rte_kni_mbuf *va;
-	void * data_kva;
+	void *data_kva;
 	struct sk_buff *skb;
 	struct kni_vhost_queue *q = kni->vhost_queue;
 
@@ -173,19 +163,19 @@ kni_vhost_net_rx(struct kni_dev *kni, struct msghdr *m,
 	if (unlikely(skb == NULL))
 		return 0;
 
-	kva = (struct rte_kni_mbuf*)skb->data;
+	kva = (struct rte_kni_mbuf *)skb->data;
 
 	/* free skb to cache */
 	skb->data = NULL;
-	if (unlikely(1 != kni_fifo_put(q->fifo, (void **)&skb, 1)))
+	if (unlikely(kni_fifo_put(q->fifo, (void **)&skb, 1) != 1))
 		/* Failing should not happen */
-		KNI_ERR("Fail to enqueue entries into rx cache fifo\n");
+		pr_err("Fail to enqueue entries into rx cache fifo\n");
 
 	pkt_len = kva->data_len;
 	if (unlikely(pkt_len > len))
 		goto drop;
 
-	KNI_DBG_RX("rx offset=%d, len=%d, pkt_len=%d, iovlen=%d\n",
+	pr_debug("rx offset=%d, len=%d, pkt_len=%d, iovlen=%d\n",
 #ifdef HAVE_IOV_ITER_MSGHDR
 		   offset, len, pkt_len, (int)m->msg_iter.iov->iov_len);
 #else
@@ -205,12 +195,12 @@ kni_vhost_net_rx(struct kni_dev *kni, struct msghdr *m,
 	kni->stats.rx_packets++;
 
 	/* enqueue mbufs into free_q */
-	va = (void*)kva - kni->mbuf_kva + kni->mbuf_va;
-	if (unlikely(1 != kni_fifo_put(kni->free_q, (void **)&va, 1)))
+	va = (void *)kva - kni->mbuf_kva + kni->mbuf_va;
+	if (unlikely(kni_fifo_put(kni->free_q, (void **)&va, 1) != 1))
 		/* Failing should not happen */
-		KNI_ERR("Fail to enqueue entries into free_q\n");
+		pr_err("Fail to enqueue entries into free_q\n");
 
-	KNI_DBG_RX("receive done %d\n", pkt_len);
+	pr_debug("receive done %d\n", pkt_len);
 
 	return pkt_len;
 
@@ -221,29 +211,25 @@ drop:
 	return 0;
 }
 
-static unsigned int
-kni_sock_poll(struct file *file, struct socket *sock, poll_table * wait)
+static uint32_t
+kni_sock_poll(struct file *file, struct socket *sock, poll_table *wait)
 {
 	struct kni_vhost_queue *q =
 		container_of(sock->sk, struct kni_vhost_queue, sk);
 	struct kni_dev *kni;
-	unsigned int mask = 0;
+	uint32_t mask = 0;
 
 	if (unlikely(q == NULL || q->kni == NULL))
 		return POLLERR;
 
 	kni = q->kni;
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35)
-	KNI_DBG("start kni_poll on group %d, wq 0x%16llx\n",
+#ifdef HAVE_SOCKET_WQ
+	pr_debug("start kni_poll on group %d, wq 0x%16llx\n",
 		  kni->group_id, (uint64_t)sock->wq);
-#else
-	KNI_DBG("start kni_poll on group %d, wait at 0x%16llx\n",
-		  kni->group_id, (uint64_t)&sock->wait);
-#endif
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35)
 	poll_wait(file, &sock->wq->wait, wait);
 #else
+	pr_debug("start kni_poll on group %d, wait at 0x%16llx\n",
+		  kni->group_id, (uint64_t)&sock->wait);
 	poll_wait(file, &sock->wait, wait);
 #endif
 
@@ -252,11 +238,12 @@ kni_sock_poll(struct file *file, struct socket *sock, poll_table * wait)
 
 	if (sock_writeable(&q->sk) ||
 #ifdef SOCKWQ_ASYNC_NOSPACE
-	    (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &q->sock->flags) &&
+		(!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &q->sock->flags) &&
+			sock_writeable(&q->sk)))
 #else
-	    (!test_and_set_bit(SOCK_ASYNC_NOSPACE, &q->sock->flags) &&
+		(!test_and_set_bit(SOCK_ASYNC_NOSPACE, &q->sock->flags) &&
+			sock_writeable(&q->sk)))
 #endif
-	     sock_writeable(&q->sk)))
 		mask |= POLLOUT | POLLWRNORM;
 
 	return mask;
@@ -269,7 +256,7 @@ kni_vhost_enqueue(struct kni_dev *kni, struct kni_vhost_queue *q,
 	struct rte_kni_mbuf *kva;
 
 	kva = (void *)(va) - kni->mbuf_va + kni->mbuf_kva;
-	(skb)->data = (unsigned char*)kva;
+	(skb)->data = (unsigned char *)kva;
 	(skb)->len = kva->data_len;
 	skb_queue_tail(&q->sk.sk_receive_queue, skb);
 }
@@ -279,6 +266,7 @@ kni_vhost_enqueue_burst(struct kni_dev *kni, struct kni_vhost_queue *q,
 	  struct sk_buff **skb, struct rte_kni_mbuf **va)
 {
 	int i;
+
 	for (i = 0; i < RX_BURST_SZ; skb++, va++, i++)
 		kni_vhost_enqueue(kni, q, *skb, *va);
 }
@@ -287,9 +275,9 @@ int
 kni_chk_vhost_rx(struct kni_dev *kni)
 {
 	struct kni_vhost_queue *q = kni->vhost_queue;
-	unsigned nb_in, nb_mbuf, nb_skb;
-	const unsigned BURST_MASK = RX_BURST_SZ - 1;
-	unsigned nb_burst, nb_backlog, i;
+	uint32_t nb_in, nb_mbuf, nb_skb;
+	const uint32_t BURST_MASK = RX_BURST_SZ - 1;
+	uint32_t nb_burst, nb_backlog, i;
 	struct sk_buff *skb[RX_BURST_SZ];
 	struct rte_kni_mbuf *va[RX_BURST_SZ];
 
@@ -305,20 +293,18 @@ kni_chk_vhost_rx(struct kni_dev *kni)
 	nb_mbuf = kni_fifo_count(kni->rx_q);
 
 	nb_in = min(nb_mbuf, nb_skb);
-	nb_in = min(nb_in, (unsigned)RX_BURST_SZ);
+	nb_in = min_t(uint32_t, nb_in, RX_BURST_SZ);
 	nb_burst   = (nb_in & ~BURST_MASK);
 	nb_backlog = (nb_in & BURST_MASK);
 
 	/* enqueue skb_queue per BURST_SIZE bulk */
-	if (0 != nb_burst) {
-		if (unlikely(RX_BURST_SZ != kni_fifo_get(
-				     kni->rx_q, (void **)&va,
-				     RX_BURST_SZ)))
+	if (nb_burst != 0) {
+		if (unlikely(kni_fifo_get(kni->rx_q, (void **)&va, RX_BURST_SZ)
+				!= RX_BURST_SZ))
 			goto except;
 
-		if (unlikely(RX_BURST_SZ != kni_fifo_get(
-				     q->fifo, (void **)&skb,
-				     RX_BURST_SZ)))
+		if (unlikely(kni_fifo_get(q->fifo, (void **)&skb, RX_BURST_SZ)
+				!= RX_BURST_SZ))
 			goto except;
 
 		kni_vhost_enqueue_burst(kni, q, skb, va);
@@ -326,12 +312,10 @@ kni_chk_vhost_rx(struct kni_dev *kni)
 
 	/* all leftover, do one by one */
 	for (i = 0; i < nb_backlog; ++i) {
-		if (unlikely(1 != kni_fifo_get(
-				     kni->rx_q,(void **)&va, 1)))
+		if (unlikely(kni_fifo_get(kni->rx_q, (void **)&va, 1) != 1))
 			goto except;
 
-		if (unlikely(1 != kni_fifo_get(
-				     q->fifo, (void **)&skb, 1)))
+		if (unlikely(kni_fifo_get(q->fifo, (void **)&skb, 1) != 1))
 			goto except;
 
 		kni_vhost_enqueue(kni, q, *skb, *va);
@@ -342,7 +326,7 @@ kni_chk_vhost_rx(struct kni_dev *kni)
 	    ((nb_mbuf < RX_BURST_SZ) && (nb_mbuf != 0))) {
 		wake_up_interruptible_poll(sk_sleep(&q->sk),
 				   POLLIN | POLLRDNORM | POLLRDBAND);
-		KNI_DBG_RX("RX CHK KICK nb_mbuf %d, nb_skb %d, nb_in %d\n",
+		pr_debug("RX CHK KICK nb_mbuf %d, nb_skb %d, nb_in %d\n",
 			   nb_mbuf, nb_skb, nb_in);
 	}
 
@@ -350,7 +334,7 @@ kni_chk_vhost_rx(struct kni_dev *kni)
 
 except:
 	/* Failing should not happen */
-	KNI_ERR("Fail to enqueue fifo, it shouldn't happen \n");
+	pr_err("Fail to enqueue fifo, it shouldn't happen\n");
 	BUG_ON(1);
 
 	return 0;
@@ -373,7 +357,7 @@ kni_sock_sndmsg(struct socket *sock,
 	if (unlikely(q == NULL || q->kni == NULL))
 		return 0;
 
-	KNI_DBG_TX("kni_sndmsg len %ld, flags 0x%08x, nb_iov %d\n",
+	pr_debug("kni_sndmsg len %ld, flags 0x%08x, nb_iov %d\n",
 #ifdef HAVE_IOV_ITER_MSGHDR
 		   len, q->flags, (int)m->msg_iter.iov->iov_len);
 #else
@@ -420,13 +404,14 @@ kni_sock_rcvmsg(struct socket *sock,
 #ifdef RTE_KNI_VHOST_VNET_HDR_EN
 	if (likely(q->flags & IFF_VNET_HDR)) {
 		vnet_hdr_len = q->vnet_hdr_sz;
-		if ((len -= vnet_hdr_len) < 0)
+		len -= vnet_hdr_len;
+		if (len < 0)
 			return -EINVAL;
 	}
 #endif
 
-	if (unlikely(0 == (pkt_len = kni_vhost_net_rx(q->kni,
-		m, vnet_hdr_len, len))))
+	pkt_len = kni_vhost_net_rx(q->kni, m, vnet_hdr_len, len);
+	if (unlikely(pkt_len == 0))
 		return 0;
 
 #ifdef RTE_KNI_VHOST_VNET_HDR_EN
@@ -440,7 +425,7 @@ kni_sock_rcvmsg(struct socket *sock,
 #endif /* HAVE_IOV_ITER_MSGHDR */
 		return -EFAULT;
 #endif /* RTE_KNI_VHOST_VNET_HDR_EN */
-	KNI_DBG_RX("kni_rcvmsg expect_len %ld, flags 0x%08x, pkt_len %d\n",
+	pr_debug("kni_rcvmsg expect_len %ld, flags 0x%08x, pkt_len %d\n",
 		   (unsigned long)len, q->flags, pkt_len);
 
 	return pkt_len + vnet_hdr_len;
@@ -448,25 +433,24 @@ kni_sock_rcvmsg(struct socket *sock,
 
 /* dummy tap like ioctl */
 static int
-kni_sock_ioctl(struct socket *sock, unsigned int cmd,
-	      unsigned long arg)
+kni_sock_ioctl(struct socket *sock, uint32_t cmd, unsigned long arg)
 {
 	void __user *argp = (void __user *)arg;
 	struct ifreq __user *ifr = argp;
-	unsigned int __user *up = argp;
+	uint32_t __user *up = argp;
 	struct kni_vhost_queue *q =
 		container_of(sock->sk, struct kni_vhost_queue, sk);
 	struct kni_dev *kni;
-	unsigned int u;
+	uint32_t u;
 	int __user *sp = argp;
 	int s;
 	int ret;
 
-	KNI_DBG("tap ioctl cmd 0x%08x\n", cmd);
+	pr_debug("tap ioctl cmd 0x%08x\n", cmd);
 
 	switch (cmd) {
 	case TUNSETIFF:
-		KNI_DBG("TUNSETIFF\n");
+		pr_debug("TUNSETIFF\n");
 		/* ignore the name, just look at flags */
 		if (get_user(u, &ifr->ifr_flags))
 			return -EFAULT;
@@ -480,7 +464,7 @@ kni_sock_ioctl(struct socket *sock, unsigned int cmd,
 		return ret;
 
 	case TUNGETIFF:
-		KNI_DBG("TUNGETIFF\n");
+		pr_debug("TUNGETIFF\n");
 		rcu_read_lock_bh();
 		kni = rcu_dereference_bh(q->kni);
 		if (kni)
@@ -491,14 +475,14 @@ kni_sock_ioctl(struct socket *sock, unsigned int cmd,
 			return -ENOLINK;
 
 		ret = 0;
-		if (copy_to_user(&ifr->ifr_name, kni->net_dev->name, IFNAMSIZ) ||
-		    put_user(q->flags, &ifr->ifr_flags))
+		if (copy_to_user(&ifr->ifr_name, kni->net_dev->name, IFNAMSIZ)
+				|| put_user(q->flags, &ifr->ifr_flags))
 			ret = -EFAULT;
 		dev_put(kni->net_dev);
 		return ret;
 
 	case TUNGETFEATURES:
-		KNI_DBG("TUNGETFEATURES\n");
+		pr_debug("TUNGETFEATURES\n");
 		u = IFF_TAP | IFF_NO_PI;
 #ifdef RTE_KNI_VHOST_VNET_HDR_EN
 		u |= IFF_VNET_HDR;
@@ -508,7 +492,7 @@ kni_sock_ioctl(struct socket *sock, unsigned int cmd,
 		return 0;
 
 	case TUNSETSNDBUF:
-		KNI_DBG("TUNSETSNDBUF\n");
+		pr_debug("TUNSETSNDBUF\n");
 		if (get_user(u, up))
 			return -EFAULT;
 
@@ -519,7 +503,7 @@ kni_sock_ioctl(struct socket *sock, unsigned int cmd,
 		s = q->vnet_hdr_sz;
 		if (put_user(s, sp))
 			return -EFAULT;
-		KNI_DBG("TUNGETVNETHDRSZ %d\n", s);
+		pr_debug("TUNGETVNETHDRSZ %d\n", s);
 		return 0;
 
 	case TUNSETVNETHDRSZ:
@@ -528,12 +512,12 @@ kni_sock_ioctl(struct socket *sock, unsigned int cmd,
 		if (s < (int)sizeof(struct virtio_net_hdr))
 			return -EINVAL;
 
-		KNI_DBG("TUNSETVNETHDRSZ %d\n", s);
+		pr_debug("TUNSETVNETHDRSZ %d\n", s);
 		q->vnet_hdr_sz = s;
 		return 0;
 
 	case TUNSETOFFLOAD:
-		KNI_DBG("TUNSETOFFLOAD %lx\n", arg);
+		pr_debug("TUNSETOFFLOAD %lx\n", arg);
 #ifdef RTE_KNI_VHOST_VNET_HDR_EN
 		/* not support any offload yet */
 		if (!(q->flags & IFF_VNET_HDR))
@@ -545,26 +529,26 @@ kni_sock_ioctl(struct socket *sock, unsigned int cmd,
 #endif
 
 	default:
-		KNI_DBG("NOT SUPPORT\n");
+		pr_debug("NOT SUPPORT\n");
 		return -EINVAL;
 	}
 }
 
 static int
-kni_sock_compat_ioctl(struct socket *sock, unsigned int cmd,
+kni_sock_compat_ioctl(struct socket *sock, uint32_t cmd,
 		     unsigned long arg)
 {
 	/* 32 bits app on 64 bits OS to be supported later */
-	KNI_PRINT("Not implemented.\n");
+	pr_debug("Not implemented.\n");
 
 	return -EINVAL;
 }
 
 #define KNI_VHOST_WAIT_WQ_SAFE()                        \
-do {		                                	\
+do {							\
 	while ((BE_FINISH | BE_STOP) == kni->vq_status) \
-		msleep(1);                              \
-}while(0)                                               \
+		msleep(1);				\
+} while (0)						\
 
 
 static int
@@ -577,7 +561,8 @@ kni_sock_release(struct socket *sock)
 	if (q == NULL)
 		return 0;
 
-	if (NULL != (kni = q->kni)) {
+	kni = q->kni;
+	if (kni != NULL) {
 		kni->vq_status = BE_STOP;
 		KNI_VHOST_WAIT_WQ_SAFE();
 		kni->vhost_queue = NULL;
@@ -592,18 +577,17 @@ kni_sock_release(struct socket *sock)
 
 	sock_put(&q->sk);
 
-	KNI_DBG("dummy sock release done\n");
+	pr_debug("dummy sock release done\n");
 
 	return 0;
 }
 
 int
-kni_sock_getname (struct socket *sock,
-		  struct sockaddr *addr,
-		  int *sockaddr_len, int peer)
+kni_sock_getname(struct socket *sock, struct sockaddr *addr,
+		int *sockaddr_len, int peer)
 {
-	KNI_DBG("dummy sock getname\n");
-	((struct sockaddr_ll*)addr)->sll_family = AF_PACKET;
+	pr_debug("dummy sock getname\n");
+	((struct sockaddr_ll *)addr)->sll_family = AF_PACKET;
 	return 0;
 }
 
@@ -646,7 +630,7 @@ kni_sk_destruct(struct sock *sk)
 
 	/* make sure there's no packet in buffer */
 	while (skb_dequeue(&sk->sk_receive_queue) != NULL)
-	       ;
+		;
 
 	mb();
 
@@ -673,7 +657,7 @@ kni_vhost_backend_init(struct kni_dev *kni)
 	if (kni->vhost_queue != NULL)
 		return -1;
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0)
+#ifdef HAVE_SK_ALLOC_KERN_PARAM
 	q = (struct kni_vhost_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL,
 			&kni_raw_proto, 0);
 #else
@@ -694,8 +678,9 @@ kni_vhost_backend_init(struct kni_dev *kni)
 	}
 
 	/* cache init */
-	q->cache = kzalloc(RTE_KNI_VHOST_MAX_CACHE_SIZE * sizeof(struct sk_buff),
-			   GFP_KERNEL);
+	q->cache = kzalloc(
+		RTE_KNI_VHOST_MAX_CACHE_SIZE * sizeof(struct sk_buff),
+		GFP_KERNEL);
 	if (!q->cache)
 		goto free_fd;
 
@@ -708,7 +693,7 @@ kni_vhost_backend_init(struct kni_dev *kni)
 
 	for (i = 0; i < RTE_KNI_VHOST_MAX_CACHE_SIZE; i++) {
 		elem = &q->cache[i];
-		kni_fifo_put(fifo, (void**)&elem, 1);
+		kni_fifo_put(fifo, (void **)&elem, 1);
 	}
 	q->fifo = fifo;
 
@@ -738,14 +723,12 @@ kni_vhost_backend_init(struct kni_dev *kni)
 
 	kni->vq_status = BE_START;
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35)
-	KNI_DBG("backend init sockfd=%d, sock->wq=0x%16llx,"
-		  "sk->sk_wq=0x%16llx",
+#ifdef HAVE_SOCKET_WQ
+	pr_debug("backend init sockfd=%d, sock->wq=0x%16llx,sk->sk_wq=0x%16llx",
 		  q->sockfd, (uint64_t)q->sock->wq,
 		  (uint64_t)q->sk.sk_wq);
 #else
-	KNI_DBG("backend init sockfd=%d, sock->wait at 0x%16llx,"
-		  "sk->sk_sleep=0x%16llx",
+	pr_debug("backend init sockfd=%d, sock->wait at 0x%16llx,sk->sk_sleep=0x%16llx",
 		  q->sockfd, (uint64_t)&q->sock->wait,
 		  (uint64_t)q->sk.sk_sleep);
 #endif
@@ -768,7 +751,7 @@ free_sock:
 	q->sock = NULL;
 
 free_sk:
-	sk_free((struct sock*)q);
+	sk_free((struct sock *)q);
 
 	return err;
 }
@@ -781,6 +764,7 @@ show_sock_fd(struct device *dev, struct device_attribute *attr,
 	struct net_device *net_dev = container_of(dev, struct net_device, dev);
 	struct kni_dev *kni = netdev_priv(net_dev);
 	int sockfd = -1;
+
 	if (kni->vhost_queue != NULL)
 		sockfd = kni->vhost_queue->sockfd;
 	return snprintf(buf, 10, "%d\n", sockfd);
@@ -792,6 +776,7 @@ show_sock_en(struct device *dev, struct device_attribute *attr,
 {
 	struct net_device *net_dev = container_of(dev, struct net_device, dev);
 	struct kni_dev *kni = netdev_priv(net_dev);
+
 	return snprintf(buf, 10, "%u\n", (kni->vhost_queue == NULL ? 0 : 1));
 }
 
@@ -804,7 +789,7 @@ set_sock_en(struct device *dev, struct device_attribute *attr,
 	unsigned long en;
 	int err = 0;
 
-	if (0 != kstrtoul(buf, 0, &en))
+	if (kstrtoul(buf, 0, &en) != 0)
 		return -EINVAL;
 
 	if (en)
@@ -818,7 +803,7 @@ static DEVICE_ATTR(sock_en, S_IRUGO | S_IWUSR, show_sock_en, set_sock_en);
 static struct attribute *dev_attrs[] = {
 	&dev_attr_sock_fd.attr,
 	&dev_attr_sock_en.attr,
-        NULL,
+	NULL,
 };
 
 static const struct attribute_group dev_attr_grp = {
@@ -836,7 +821,7 @@ kni_vhost_backend_release(struct kni_dev *kni)
 	/* dettach from kni */
 	q->kni = NULL;
 
-	KNI_DBG("release backend done\n");
+	pr_debug("release backend done\n");
 
 	return 0;
 }
@@ -851,7 +836,7 @@ kni_vhost_init(struct kni_dev *kni)
 
 	kni->vq_status = BE_STOP;
 
-	KNI_DBG("kni_vhost_init done\n");
+	pr_debug("kni_vhost_init done\n");
 
 	return 0;
 }
diff --git a/lib/librte_ether/Makefile b/lib/librte_ether/Makefile
index 0bb5dc90..efe1e5fe 100644
--- a/lib/librte_ether/Makefile
+++ b/lib/librte_ether/Makefile
@@ -34,26 +34,25 @@ include $(RTE_SDK)/mk/rte.vars.mk
 #
 # library name
 #
-LIB = libethdev.a
+LIB = librte_ethdev.a
 
 CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS)
 
 EXPORT_MAP := rte_ether_version.map
 
-LIBABIVER := 4
+LIBABIVER := 5
 
 SRCS-y += rte_ethdev.c
 
 #
 # Export include files
 #
-SYMLINK-y-include += rte_ether.h
 SYMLINK-y-include += rte_ethdev.h
 SYMLINK-y-include += rte_eth_ctrl.h
 SYMLINK-y-include += rte_dev_info.h
 
 # this lib depends upon:
-DEPDIRS-y += lib/librte_eal lib/librte_mempool lib/librte_ring lib/librte_mbuf
+DEPDIRS-y += lib/librte_net lib/librte_eal lib/librte_mempool lib/librte_ring lib/librte_mbuf
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_ether/rte_dev_info.h b/lib/librte_ether/rte_dev_info.h
index 574683d3..aab6d1a6 100644
--- a/lib/librte_ether/rte_dev_info.h
+++ b/lib/librte_ether/rte_dev_info.h
@@ -34,6 +34,8 @@
 #ifndef _RTE_DEV_INFO_H_
 #define _RTE_DEV_INFO_H_
 
+#include <stdint.h>
+
 /*
  * Placeholder for accessing device registers
  */
diff --git a/lib/librte_ether/rte_eth_ctrl.h b/lib/librte_ether/rte_eth_ctrl.h
index c3a2c9e4..fe80eb01 100644
--- a/lib/librte_ether/rte_eth_ctrl.h
+++ b/lib/librte_ether/rte_eth_ctrl.h
@@ -34,6 +34,10 @@
 #ifndef _RTE_ETH_CTRL_H_
 #define _RTE_ETH_CTRL_H_
 
+#include <stdint.h>
+#include <rte_common.h>
+#include "rte_ether.h"
+
 /**
  * @file
  *
diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index a5b42aa8..fde8112f 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -58,7 +58,6 @@
 #include <rte_atomic.h>
 #include <rte_branch_prediction.h>
 #include <rte_common.h>
-#include <rte_ring.h>
 #include <rte_mempool.h>
 #include <rte_malloc.h>
 #include <rte_mbuf.h>
@@ -72,6 +71,7 @@
 static const char *MZ_RTE_ETH_DEV_DATA = "rte_eth_dev_data";
 struct rte_eth_dev rte_eth_devices[RTE_MAX_ETHPORTS];
 static struct rte_eth_dev_data *rte_eth_dev_data;
+static uint8_t eth_dev_last_created_port;
 static uint8_t nb_ports;
 
 /* spinlock for eth device callbacks */
@@ -190,7 +190,7 @@ rte_eth_dev_find_free_port(void)
 }
 
 struct rte_eth_dev *
-rte_eth_dev_allocate(const char *name, enum rte_eth_dev_type type)
+rte_eth_dev_allocate(const char *name)
 {
 	uint8_t port_id;
 	struct rte_eth_dev *eth_dev;
@@ -215,25 +215,11 @@ rte_eth_dev_allocate(const char *name, enum rte_eth_dev_type type)
 	snprintf(eth_dev->data->name, sizeof(eth_dev->data->name), "%s", name);
 	eth_dev->data->port_id = port_id;
 	eth_dev->attached = DEV_ATTACHED;
-	eth_dev->dev_type = type;
+	eth_dev_last_created_port = port_id;
 	nb_ports++;
 	return eth_dev;
 }
 
-static int
-rte_eth_dev_create_unique_device_name(char *name, size_t size,
-		struct rte_pci_device *pci_dev)
-{
-	int ret;
-
-	ret = snprintf(name, size, "%d:%d.%d",
-			pci_dev->addr.bus, pci_dev->addr.devid,
-			pci_dev->addr.function);
-	if (ret < 0)
-		return ret;
-	return 0;
-}
-
 int
 rte_eth_dev_release_port(struct rte_eth_dev *eth_dev)
 {
@@ -245,9 +231,9 @@ rte_eth_dev_release_port(struct rte_eth_dev *eth_dev)
 	return 0;
 }
 
-static int
-rte_eth_dev_init(struct rte_pci_driver *pci_drv,
-		 struct rte_pci_device *pci_dev)
+int
+rte_eth_dev_pci_probe(struct rte_pci_driver *pci_drv,
+		      struct rte_pci_device *pci_dev)
 {
 	struct eth_driver    *eth_drv;
 	struct rte_eth_dev *eth_dev;
@@ -257,11 +243,10 @@ rte_eth_dev_init(struct rte_pci_driver *pci_drv,
 
 	eth_drv = (struct eth_driver *)pci_drv;
 
-	/* Create unique Ethernet device name using PCI address */
-	rte_eth_dev_create_unique_device_name(ethdev_name,
-			sizeof(ethdev_name), pci_dev);
+	rte_eal_pci_device_name(&pci_dev->addr, ethdev_name,
+			sizeof(ethdev_name));
 
-	eth_dev = rte_eth_dev_allocate(ethdev_name, RTE_ETH_DEV_PCI);
+	eth_dev = rte_eth_dev_allocate(ethdev_name);
 	if (eth_dev == NULL)
 		return -ENOMEM;
 
@@ -290,7 +275,7 @@ rte_eth_dev_init(struct rte_pci_driver *pci_drv,
 		return 0;
 
 	RTE_PMD_DEBUG_TRACE("driver %s: eth_dev_init(vendor_id=0x%x device_id=0x%x) failed\n",
-			pci_drv->name,
+			pci_drv->driver.name,
 			(unsigned) pci_dev->id.vendor_id,
 			(unsigned) pci_dev->id.device_id);
 	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
@@ -299,8 +284,8 @@ rte_eth_dev_init(struct rte_pci_driver *pci_drv,
 	return diag;
 }
 
-static int
-rte_eth_dev_uninit(struct rte_pci_device *pci_dev)
+int
+rte_eth_dev_pci_remove(struct rte_pci_device *pci_dev)
 {
 	const struct eth_driver *eth_drv;
 	struct rte_eth_dev *eth_dev;
@@ -310,9 +295,8 @@ rte_eth_dev_uninit(struct rte_pci_device *pci_dev)
 	if (pci_dev == NULL)
 		return -EINVAL;
 
-	/* Create unique Ethernet device name using PCI address */
-	rte_eth_dev_create_unique_device_name(ethdev_name,
-			sizeof(ethdev_name), pci_dev);
+	rte_eal_pci_device_name(&pci_dev->addr, ethdev_name,
+			sizeof(ethdev_name));
 
 	eth_dev = rte_eth_dev_allocated(ethdev_name);
 	if (eth_dev == NULL)
@@ -340,28 +324,6 @@ rte_eth_dev_uninit(struct rte_pci_device *pci_dev)
 	return 0;
 }
 
-/**
- * Register an Ethernet [Poll Mode] driver.
- *
- * Function invoked by the initialization function of an Ethernet driver
- * to simultaneously register itself as a PCI driver and as an Ethernet
- * Poll Mode Driver.
- * Invokes the rte_eal_pci_register() function to register the *pci_drv*
- * structure embedded in the *eth_drv* structure, after having stored the
- * address of the rte_eth_dev_init() function in the *devinit* field of
- * the *pci_drv* structure.
- * During the PCI probing phase, the rte_eth_dev_init() function is
- * invoked for each PCI [Ethernet device] matching the embedded PCI
- * identifiers provided by the driver.
- */
-void
-rte_eth_driver_register(struct eth_driver *eth_drv)
-{
-	eth_drv->pci_drv.devinit = rte_eth_dev_init;
-	eth_drv->pci_drv.devuninit = rte_eth_dev_uninit;
-	rte_eal_pci_register(&eth_drv->pci_drv);
-}
-
 int
 rte_eth_dev_is_valid_port(uint8_t port_id)
 {
@@ -385,27 +347,6 @@ rte_eth_dev_count(void)
 	return nb_ports;
 }
 
-static enum rte_eth_dev_type
-rte_eth_dev_get_device_type(uint8_t port_id)
-{
-	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, RTE_ETH_DEV_UNKNOWN);
-	return rte_eth_devices[port_id].dev_type;
-}
-
-static int
-rte_eth_dev_get_addr_by_port(uint8_t port_id, struct rte_pci_addr *addr)
-{
-	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
-
-	if (addr == NULL) {
-		RTE_PMD_DEBUG_TRACE("Null pointer is specified\n");
-		return -EINVAL;
-	}
-
-	*addr = rte_eth_devices[port_id].pci_dev->addr;
-	return 0;
-}
-
 int
 rte_eth_dev_get_name_by_port(uint8_t port_id, char *name)
 {
@@ -451,34 +392,6 @@ rte_eth_dev_get_port_by_name(const char *name, uint8_t *port_id)
 }
 
 static int
-rte_eth_dev_get_port_by_addr(const struct rte_pci_addr *addr, uint8_t *port_id)
-{
-	int i;
-	struct rte_pci_device *pci_dev = NULL;
-
-	if (addr == NULL) {
-		RTE_PMD_DEBUG_TRACE("Null pointer is specified\n");
-		return -EINVAL;
-	}
-
-	*port_id = RTE_MAX_ETHPORTS;
-
-	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
-
-		pci_dev = rte_eth_devices[i].pci_dev;
-
-		if (pci_dev &&
-			!rte_eal_compare_pci_addr(&pci_dev->addr, addr)) {
-
-			*port_id = i;
-
-			return 0;
-		}
-	}
-	return -ENODEV;
-}
-
-static int
 rte_eth_dev_is_detachable(uint8_t port_id)
 {
 	uint32_t dev_flags;
@@ -503,127 +416,49 @@ rte_eth_dev_is_detachable(uint8_t port_id)
 		return 1;
 }
 
-/* attach the new physical device, then store port_id of the device */
-static int
-rte_eth_dev_attach_pdev(struct rte_pci_addr *addr, uint8_t *port_id)
+/* attach the new device, then store port_id of the device */
+int
+rte_eth_dev_attach(const char *devargs, uint8_t *port_id)
 {
-	/* re-construct pci_device_list */
-	if (rte_eal_pci_scan())
-		goto err;
-	/* Invoke probe func of the driver can handle the new device. */
-	if (rte_eal_pci_probe_one(addr))
-		goto err;
+	int ret = -1;
+	int current = rte_eth_dev_count();
+	char *name = NULL;
+	char *args = NULL;
 
-	if (rte_eth_dev_get_port_by_addr(addr, port_id))
+	if ((devargs == NULL) || (port_id == NULL)) {
+		ret = -EINVAL;
 		goto err;
+	}
 
-	return 0;
-err:
-	return -1;
-}
-
-/* detach the new physical device, then store pci_addr of the device */
-static int
-rte_eth_dev_detach_pdev(uint8_t port_id, struct rte_pci_addr *addr)
-{
-	struct rte_pci_addr freed_addr;
-	struct rte_pci_addr vp;
-
-	/* get pci address by port id */
-	if (rte_eth_dev_get_addr_by_port(port_id, &freed_addr))
+	/* parse devargs, then retrieve device name and args */
+	if (rte_eal_parse_devargs_str(devargs, &name, &args))
 		goto err;
 
-	/* Zeroed pci addr means the port comes from virtual device */
-	vp.domain = vp.bus = vp.devid = vp.function = 0;
-	if (rte_eal_compare_pci_addr(&vp, &freed_addr) == 0)
+	ret = rte_eal_dev_attach(name, args);
+	if (ret < 0)
 		goto err;
 
-	/* invoke devuninit func of the pci driver,
-	 * also remove the device from pci_device_list */
-	if (rte_eal_pci_detach(&freed_addr))
+	/* no point looking at the port count if no port exists */
+	if (!rte_eth_dev_count()) {
+		RTE_LOG(ERR, EAL, "No port found for device (%s)\n", name);
+		ret = -1;
 		goto err;
+	}
 
-	*addr = freed_addr;
-	return 0;
-err:
-	return -1;
-}
-
-/* attach the new virtual device, then store port_id of the device */
-static int
-rte_eth_dev_attach_vdev(const char *vdevargs, uint8_t *port_id)
-{
-	char *name = NULL, *args = NULL;
-	int ret = -1;
-
-	/* parse vdevargs, then retrieve device name and args */
-	if (rte_eal_parse_devargs_str(vdevargs, &name, &args))
-		goto end;
-
-	/* walk around dev_driver_list to find the driver of the device,
-	 * then invoke probe function of the driver.
-	 * rte_eal_vdev_init() updates port_id allocated after
-	 * initialization.
+	/* if nothing happened, there is a bug here, since some driver told us
+	 * it did attach a device, but did not create a port.
 	 */
-	if (rte_eal_vdev_init(name, args))
-		goto end;
-
-	if (rte_eth_dev_get_port_by_name(name, port_id))
-		goto end;
-
-	ret = 0;
-end:
-	free(name);
-	free(args);
-
-	return ret;
-}
-
-/* detach the new virtual device, then store the name of the device */
-static int
-rte_eth_dev_detach_vdev(uint8_t port_id, char *vdevname)
-{
-	char name[RTE_ETH_NAME_MAX_LEN];
-
-	/* get device name by port id */
-	if (rte_eth_dev_get_name_by_port(port_id, name))
-		goto err;
-	/* walk around dev_driver_list to find the driver of the device,
-	 * then invoke uninit function of the driver */
-	if (rte_eal_vdev_uninit(name))
-		goto err;
-
-	strncpy(vdevname, name, sizeof(name));
-	return 0;
-err:
-	return -1;
-}
-
-/* attach the new device, then store port_id of the device */
-int
-rte_eth_dev_attach(const char *devargs, uint8_t *port_id)
-{
-	struct rte_pci_addr addr;
-	int ret = -1;
-
-	if ((devargs == NULL) || (port_id == NULL)) {
-		ret = -EINVAL;
+	if (current == rte_eth_dev_count()) {
+		ret = -1;
 		goto err;
 	}
 
-	if (eal_parse_pci_DomBDF(devargs, &addr) == 0) {
-		ret = rte_eth_dev_attach_pdev(&addr, port_id);
-		if (ret < 0)
-			goto err;
-	} else {
-		ret = rte_eth_dev_attach_vdev(devargs, port_id);
-		if (ret < 0)
-			goto err;
-	}
+	*port_id = eth_dev_last_created_port;
+	ret = 0;
 
-	return 0;
 err:
-	RTE_LOG(ERR, EAL, "Driver, cannot attach the device\n");
+	free(name);
+	free(args);
 	return ret;
 }
 
@@ -631,7 +466,6 @@ err:
 int
 rte_eth_dev_detach(uint8_t port_id, char *name)
 {
-	struct rte_pci_addr addr;
 	int ret = -1;
 
 	if (name == NULL) {
@@ -639,33 +473,19 @@ rte_eth_dev_detach(uint8_t port_id, char *name)
 		goto err;
 	}
 
-	/* check whether the driver supports detach feature, or not */
+	/* FIXME: move this to eal, once device flags are relocated there */
 	if (rte_eth_dev_is_detachable(port_id))
 		goto err;
 
-	if (rte_eth_dev_get_device_type(port_id) == RTE_ETH_DEV_PCI) {
-		ret = rte_eth_dev_get_addr_by_port(port_id, &addr);
-		if (ret < 0)
-			goto err;
-
-		ret = rte_eth_dev_detach_pdev(port_id, &addr);
-		if (ret < 0)
-			goto err;
-
-		snprintf(name, RTE_ETH_NAME_MAX_LEN,
-			"%04x:%02x:%02x.%d",
-			addr.domain, addr.bus,
-			addr.devid, addr.function);
-	} else {
-		ret = rte_eth_dev_detach_vdev(port_id, name);
-		if (ret < 0)
-			goto err;
-	}
+	snprintf(name, sizeof(rte_eth_devices[port_id].data->name),
+		 "%s", rte_eth_devices[port_id].data->name);
+	ret = rte_eal_dev_detach(name);
+	if (ret < 0)
+		goto err;
 
 	return 0;
 
 err:
-	RTE_LOG(ERR, EAL, "Driver, cannot detach the device\n");
 	return ret;
 }
 
@@ -2689,7 +2509,7 @@ rte_eth_dev_callback_unregister(uint8_t port_id,
 
 void
 _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
-	enum rte_eth_event_type event)
+	enum rte_eth_event_type event, void *cb_arg)
 {
 	struct rte_eth_dev_callback *cb_lst;
 	struct rte_eth_dev_callback dev_cb;
@@ -2700,6 +2520,9 @@ _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
 			continue;
 		dev_cb = *cb_lst;
 		cb_lst->active = 1;
+		if (cb_arg != NULL)
+			dev_cb.cb_arg = (void *) cb_arg;
+
 		rte_spinlock_unlock(&rte_eth_dev_cb_lock);
 		dev_cb.cb_fn(dev->data->port_id, dev_cb.event,
 						dev_cb.cb_arg);
@@ -2749,7 +2572,7 @@ rte_eth_dma_zone_reserve(const struct rte_eth_dev *dev, const char *ring_name,
 	const struct rte_memzone *mz;
 
 	snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
-		 dev->driver->pci_drv.name, ring_name,
+		 dev->driver->pci_drv.driver.name, ring_name,
 		 dev->data->port_id, queue_id);
 
 	mz = rte_memzone_lookup(z_name);
@@ -3390,8 +3213,8 @@ rte_eth_copy_pci_info(struct rte_eth_dev *eth_dev, struct rte_pci_device *pci_de
 		eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE;
 
 	eth_dev->data->kdrv = pci_dev->kdrv;
-	eth_dev->data->numa_node = pci_dev->numa_node;
-	eth_dev->data->drv_name = pci_dev->driver->name;
+	eth_dev->data->numa_node = pci_dev->device.numa_node;
+	eth_dev->data->drv_name = pci_dev->driver->driver.name;
 }
 
 int
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index b0fe0334..96781792 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -190,6 +190,9 @@ struct rte_mbuf;
 
 /**
  * A structure used to retrieve statistics for an Ethernet port.
+ * Not all statistics fields in struct rte_eth_stats are supported
+ * by any type of network interface card (NIC). If any statistics
+ * field is not supported, its value is 0.
  */
 struct rte_eth_stats {
 	uint64_t ipackets;  /**< Total number of successfully received packets. */
@@ -198,7 +201,7 @@ struct rte_eth_stats {
 	uint64_t obytes;    /**< Total number of successfully transmitted bytes. */
 	uint64_t imissed;
 	/**< Total of RX packets dropped by the HW,
-	 * because there are no available mbufs (i.e. RX queues are full).
+	 * because there are no available buffer (i.e. RX queues are full).
 	 */
 	uint64_t ierrors;   /**< Total number of erroneous received packets. */
 	uint64_t oerrors;   /**< Total number of failed transmitted packets. */
@@ -255,6 +258,7 @@ struct rte_eth_stats {
 /**
  * A structure used to retrieve link-level information of an Ethernet port.
  */
+__extension__
 struct rte_eth_link {
 	uint32_t link_speed;        /**< ETH_SPEED_NUM_ */
 	uint16_t link_duplex  : 1;  /**< ETH_LINK_[HALF/FULL]_DUPLEX */
@@ -346,6 +350,7 @@ struct rte_eth_rxmode {
 	enum rte_eth_rx_mq_mode mq_mode;
 	uint32_t max_rx_pkt_len;  /**< Only used if jumbo_frame enabled. */
 	uint16_t split_hdr_size;  /**< hdr buf size (header_split enabled).*/
+	__extension__
 	uint16_t header_split : 1, /**< Header Split enable. */
 		hw_ip_checksum   : 1, /**< IP/UDP/TCP checksum offload enable. */
 		hw_vlan_filter   : 1, /**< VLAN filter enable. */
@@ -645,6 +650,7 @@ struct rte_eth_txmode {
 
 	/* For i40e specifically */
 	uint16_t pvid;
+	__extension__
 	uint8_t hw_vlan_reject_tagged : 1,
 		/**< If set, reject sending out tagged pkts */
 		hw_vlan_reject_untagged : 1,
@@ -864,6 +870,10 @@ struct rte_eth_conf {
 #define DEV_TX_OFFLOAD_UDP_TSO     0x00000040
 #define DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM 0x00000080 /**< Used for tunneling packet. */
 #define DEV_TX_OFFLOAD_QINQ_INSERT 0x00000100
+#define DEV_TX_OFFLOAD_VXLAN_TNL_TSO    0x00000200    /**< Used for tunneling packet. */
+#define DEV_TX_OFFLOAD_GRE_TNL_TSO      0x00000400    /**< Used for tunneling packet. */
+#define DEV_TX_OFFLOAD_IPIP_TNL_TSO     0x00000800    /**< Used for tunneling packet. */
+#define DEV_TX_OFFLOAD_GENEVE_TNL_TSO   0x00001000    /**< Used for tunneling packet. */
 
 /**
  * Ethernet device information
@@ -1603,17 +1613,6 @@ struct rte_eth_rxtx_callback {
 };
 
 /**
- * The eth device type.
- */
-enum rte_eth_dev_type {
-	RTE_ETH_DEV_UNKNOWN,	/**< unknown device type */
-	RTE_ETH_DEV_PCI,
-		/**< Physical function and Virtual function of PCI devices */
-	RTE_ETH_DEV_VIRTUAL,	/**< non hardware device */
-	RTE_ETH_DEV_MAX		/**< max value of this enum */
-};
-
-/**
  * @internal
  * The generic data structure associated with each ethernet device.
  *
@@ -1643,7 +1642,6 @@ struct rte_eth_dev {
 	 */
 	struct rte_eth_rxtx_callback *pre_tx_burst_cbs[RTE_MAX_QUEUES_PER_PORT];
 	uint8_t attached; /**< Flag indicating the port is attached */
-	enum rte_eth_dev_type dev_type; /**< Flag indicating the device type */
 } __rte_cache_aligned;
 
 struct rte_eth_dev_sriov {
@@ -1691,6 +1689,7 @@ struct rte_eth_dev_data {
 	struct ether_addr* hash_mac_addrs;
 	/** Device Ethernet MAC addresses of hash filtering. */
 	uint8_t port_id;           /**< Device [external] port identifier. */
+	__extension__
 	uint8_t promiscuous   : 1, /**< RX promiscuous mode ON(1) / OFF(0). */
 		scattered_rx : 1,  /**< RX of scattered packets is ON(1) / OFF(0) */
 		all_multicast : 1, /**< RX all multicast mode ON(1) / OFF(0). */
@@ -1756,8 +1755,7 @@ struct rte_eth_dev *rte_eth_dev_allocated(const char *name);
  * @return
  *   - Slot in the rte_dev_devices array for a new device;
  */
-struct rte_eth_dev *rte_eth_dev_allocate(const char *name,
-		enum rte_eth_dev_type type);
+struct rte_eth_dev *rte_eth_dev_allocate(const char *name);
 
 /**
  * @internal
@@ -1776,7 +1774,7 @@ int rte_eth_dev_release_port(struct rte_eth_dev *eth_dev);
  * @param devargs
  *  A pointer to a strings array describing the new device
  *  to be attached. The strings should be a pci address like
- *  '0000:01:00.0' or virtual device name like 'eth_pcap0'.
+ *  '0000:01:00.0' or virtual device name like 'net_pcap0'.
  * @param port_id
  *  A pointer to a port identifier actually attached.
  * @return
@@ -1871,18 +1869,6 @@ struct eth_driver {
 };
 
 /**
- * @internal
- * A function invoked by the initialization function of an Ethernet driver
- * to simultaneously register itself as a PCI driver and as an Ethernet
- * Poll Mode Driver (PMD).
- *
- * @param eth_drv
- *   The pointer to the *eth_driver* structure associated with
- *   the Ethernet driver.
- */
-void rte_eth_driver_register(struct eth_driver *eth_drv);
-
-/**
  * Convert a numerical speed in Mbps to a bitmap flag that can be used in
  * the bitmap link_speeds of the struct rte_eth_conf
  *
@@ -3047,6 +3033,7 @@ enum rte_eth_event_type {
 				/**< queue state event (enabled/disabled) */
 	RTE_ETH_EVENT_INTR_RESET,
 			/**< reset interrupt event, sent to VF on PF reset */
+	RTE_ETH_EVENT_VF_MBOX,  /**< message from the VF received by PF */
 	RTE_ETH_EVENT_MAX       /**< max value of this enum */
 };
 
@@ -3068,6 +3055,11 @@ typedef void (*rte_eth_dev_cb_fn)(uint8_t port_id, \
  * @param cb_arg
  *  Pointer to the parameters for the registered callback.
  *
+ *  The user data is overwritten in the case of RTE_ETH_EVENT_VF_MBOX.
+ *	This even occurs when a message from the VF is received by the PF.
+ *	The user data is overwritten with struct rte_pmd_ixgbe_mb_event_param.
+ *	This struct is defined in rte_pmd_ixgbe.h.
+ *
  * @return
  *  - On success, zero.
  *  - On failure, a negative value.
@@ -3106,12 +3098,16 @@ int rte_eth_dev_callback_unregister(uint8_t port_id,
  *  Pointer to struct rte_eth_dev.
  * @param event
  *  Eth device interrupt event type.
+ * @param cb_arg
+ *  Update callback parameter to pass data back to user application.
+ *  This allows the user application to decide if a particular function
+ *  is permitted or not.
  *
  * @return
  *  void
  */
 void _rte_eth_dev_callback_process(struct rte_eth_dev *dev,
-				enum rte_eth_event_type event);
+				enum rte_eth_event_type event, void *cb_arg);
 
 /**
  * When there is no rx packet coming in Rx Queue for a long time, we can
@@ -4341,7 +4337,7 @@ rte_eth_dev_l2_tunnel_offload_set(uint8_t port_id,
 
 /**
 * Get the port id from pci adrress or device name
-* Ex: 0000:2:00.0 or vdev name eth_pcap0
+* Ex: 0000:2:00.0 or vdev name net_pcap0
 *
 * @param name
 *  pci address or name of the device
@@ -4368,6 +4364,21 @@ rte_eth_dev_get_port_by_name(const char *name, uint8_t *port_id);
 int
 rte_eth_dev_get_name_by_port(uint8_t port_id, char *name);
 
+/**
+ * @internal
+ * Wrapper for use by pci drivers as a .probe function to attach to a ethdev
+ * interface.
+ */
+int rte_eth_dev_pci_probe(struct rte_pci_driver *pci_drv,
+			  struct rte_pci_device *pci_dev);
+
+/**
+ * @internal
+ * Wrapper for use by pci drivers as a .remove function to detach a ethdev
+ * interface.
+ */
+int rte_eth_dev_pci_remove(struct rte_pci_device *pci_dev);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_ether/rte_ether_version.map b/lib/librte_ether/rte_ether_version.map
index 45ddf44c..72be66d8 100644
--- a/lib/librte_ether/rte_ether_version.map
+++ b/lib/librte_ether/rte_ether_version.map
@@ -78,7 +78,6 @@ DPDK_2.2 {
 	rte_eth_dev_vlan_filter;
 	rte_eth_dev_wd_timeout_store;
 	rte_eth_dma_zone_reserve;
-	rte_eth_driver_register;
 	rte_eth_led_off;
 	rte_eth_led_on;
 	rte_eth_link;
@@ -138,4 +137,13 @@ DPDK_16.07 {
 	rte_eth_dev_get_name_by_port;
 	rte_eth_dev_get_port_by_name;
 	rte_eth_xstats_get_names;
+
 } DPDK_16.04;
+
+DPDK_16.11 {
+	global:
+
+	rte_eth_dev_pci_probe;
+	rte_eth_dev_pci_remove;
+
+} DPDK_16.07;
diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c
index d6e68c68..51db006a 100644
--- a/lib/librte_hash/rte_cuckoo_hash.c
+++ b/lib/librte_hash/rte_cuckoo_hash.c
@@ -98,6 +98,7 @@ rte_hash_find_existing(const char *name)
 
 void rte_hash_set_cmp_func(struct rte_hash *h, rte_hash_cmp_eq_t func)
 {
+	h->cmp_jump_table_idx = KEY_CUSTOM;
 	h->rte_hash_custom_cmp_eq = func;
 }
 
@@ -283,6 +284,15 @@ rte_hash_create(const struct rte_hash_parameters *params)
 	h->free_slots = r;
 	h->hw_trans_mem_support = hw_trans_mem_support;
 
+#if defined(RTE_ARCH_X86)
+	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
+		h->sig_cmp_fn = RTE_HASH_COMPARE_AVX2;
+	else if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE2))
+		h->sig_cmp_fn = RTE_HASH_COMPARE_SSE;
+	else
+#endif
+		h->sig_cmp_fn = RTE_HASH_COMPARE_SCALAR;
+
 	/* Turn on multi-writer only with explicit flat from user and TM
 	 * support.
 	 */
@@ -421,10 +431,10 @@ make_space_bucket(const struct rte_hash *h, struct rte_hash_bucket *bkt)
 	 */
 	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
 		/* Search for space in alternative locations */
-		next_bucket_idx = bkt->signatures[i].alt & h->bucket_bitmask;
+		next_bucket_idx = bkt->sig_alt[i] & h->bucket_bitmask;
 		next_bkt[i] = &h->buckets[next_bucket_idx];
 		for (j = 0; j < RTE_HASH_BUCKET_ENTRIES; j++) {
-			if (next_bkt[i]->signatures[j].sig == NULL_SIGNATURE)
+			if (next_bkt[i]->key_idx[j] == EMPTY_SLOT)
 				break;
 		}
 
@@ -434,8 +444,8 @@ make_space_bucket(const struct rte_hash *h, struct rte_hash_bucket *bkt)
 
 	/* Alternative location has spare room (end of recursive function) */
 	if (i != RTE_HASH_BUCKET_ENTRIES) {
-		next_bkt[i]->signatures[j].alt = bkt->signatures[i].current;
-		next_bkt[i]->signatures[j].current = bkt->signatures[i].alt;
+		next_bkt[i]->sig_alt[j] = bkt->sig_current[i];
+		next_bkt[i]->sig_current[j] = bkt->sig_alt[i];
 		next_bkt[i]->key_idx[j] = bkt->key_idx[i];
 		return i;
 	}
@@ -464,8 +474,8 @@ make_space_bucket(const struct rte_hash *h, struct rte_hash_bucket *bkt)
 	bkt->flag[i] = 0;
 	nr_pushes = 0;
 	if (ret >= 0) {
-		next_bkt[i]->signatures[ret].alt = bkt->signatures[i].current;
-		next_bkt[i]->signatures[ret].current = bkt->signatures[i].alt;
+		next_bkt[i]->sig_alt[ret] = bkt->sig_current[i];
+		next_bkt[i]->sig_current[ret] = bkt->sig_alt[i];
 		next_bkt[i]->key_idx[ret] = bkt->key_idx[i];
 		return i;
 	} else
@@ -547,8 +557,8 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
 
 	/* Check if key is already inserted in primary location */
 	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
-		if (prim_bkt->signatures[i].current == sig &&
-				prim_bkt->signatures[i].alt == alt_hash) {
+		if (prim_bkt->sig_current[i] == sig &&
+				prim_bkt->sig_alt[i] == alt_hash) {
 			k = (struct rte_hash_key *) ((char *)keys +
 					prim_bkt->key_idx[i] * h->key_entry_size);
 			if (rte_hash_cmp_eq(key, k->key, h) == 0) {
@@ -567,8 +577,8 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
 
 	/* Check if key is already inserted in secondary location */
 	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
-		if (sec_bkt->signatures[i].alt == sig &&
-				sec_bkt->signatures[i].current == alt_hash) {
+		if (sec_bkt->sig_alt[i] == sig &&
+				sec_bkt->sig_current[i] == alt_hash) {
 			k = (struct rte_hash_key *) ((char *)keys +
 					sec_bkt->key_idx[i] * h->key_entry_size);
 			if (rte_hash_cmp_eq(key, k->key, h) == 0) {
@@ -613,9 +623,9 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
 #endif
 		for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
 			/* Check if slot is available */
-			if (likely(prim_bkt->signatures[i].sig == NULL_SIGNATURE)) {
-				prim_bkt->signatures[i].current = sig;
-				prim_bkt->signatures[i].alt = alt_hash;
+			if (likely(prim_bkt->key_idx[i] == EMPTY_SLOT)) {
+				prim_bkt->sig_current[i] = sig;
+				prim_bkt->sig_alt[i] = alt_hash;
 				prim_bkt->key_idx[i] = new_idx;
 				break;
 			}
@@ -635,8 +645,8 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
 		 */
 		ret = make_space_bucket(h, prim_bkt);
 		if (ret >= 0) {
-			prim_bkt->signatures[ret].current = sig;
-			prim_bkt->signatures[ret].alt = alt_hash;
+			prim_bkt->sig_current[ret] = sig;
+			prim_bkt->sig_alt[ret] = alt_hash;
 			prim_bkt->key_idx[ret] = new_idx;
 			if (h->add_key == ADD_KEY_MULTIWRITER)
 				rte_spinlock_unlock(h->multiwriter_lock);
@@ -710,8 +720,8 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
 
 	/* Check if key is in primary location */
 	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
-		if (bkt->signatures[i].current == sig &&
-				bkt->signatures[i].sig != NULL_SIGNATURE) {
+		if (bkt->sig_current[i] == sig &&
+				bkt->key_idx[i] != EMPTY_SLOT) {
 			k = (struct rte_hash_key *) ((char *)keys +
 					bkt->key_idx[i] * h->key_entry_size);
 			if (rte_hash_cmp_eq(key, k->key, h) == 0) {
@@ -733,8 +743,8 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
 
 	/* Check if key is in secondary location */
 	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
-		if (bkt->signatures[i].current == alt_hash &&
-				bkt->signatures[i].alt == sig) {
+		if (bkt->sig_current[i] == alt_hash &&
+				bkt->sig_alt[i] == sig) {
 			k = (struct rte_hash_key *) ((char *)keys +
 					bkt->key_idx[i] * h->key_entry_size);
 			if (rte_hash_cmp_eq(key, k->key, h) == 0) {
@@ -788,7 +798,8 @@ remove_entry(const struct rte_hash *h, struct rte_hash_bucket *bkt, unsigned i)
 	unsigned lcore_id, n_slots;
 	struct lcore_cache *cached_free_slots;
 
-	bkt->signatures[i].sig = NULL_SIGNATURE;
+	bkt->sig_current[i] = NULL_SIGNATURE;
+	bkt->sig_alt[i] = NULL_SIGNATURE;
 	if (h->hw_trans_mem_support) {
 		lcore_id = rte_lcore_id();
 		cached_free_slots = &h->local_free_slots[lcore_id];
@@ -826,8 +837,8 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
 
 	/* Check if key is in primary location */
 	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
-		if (bkt->signatures[i].current == sig &&
-				bkt->signatures[i].sig != NULL_SIGNATURE) {
+		if (bkt->sig_current[i] == sig &&
+				bkt->key_idx[i] != EMPTY_SLOT) {
 			k = (struct rte_hash_key *) ((char *)keys +
 					bkt->key_idx[i] * h->key_entry_size);
 			if (rte_hash_cmp_eq(key, k->key, h) == 0) {
@@ -838,7 +849,7 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
 				 * substracting the first dummy index
 				 */
 				ret = bkt->key_idx[i] - 1;
-				bkt->key_idx[i] = 0;
+				bkt->key_idx[i] = EMPTY_SLOT;
 				return ret;
 			}
 		}
@@ -851,8 +862,8 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
 
 	/* Check if key is in secondary location */
 	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
-		if (bkt->signatures[i].current == alt_hash &&
-				bkt->signatures[i].sig != NULL_SIGNATURE) {
+		if (bkt->sig_current[i] == alt_hash &&
+				bkt->key_idx[i] != EMPTY_SLOT) {
 			k = (struct rte_hash_key *) ((char *)keys +
 					bkt->key_idx[i] * h->key_entry_size);
 			if (rte_hash_cmp_eq(key, k->key, h) == 0) {
@@ -863,7 +874,7 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
 				 * substracting the first dummy index
 				 */
 				ret = bkt->key_idx[i] - 1;
-				bkt->key_idx[i] = 0;
+				bkt->key_idx[i] = EMPTY_SLOT;
 				return ret;
 			}
 		}
@@ -907,280 +918,189 @@ rte_hash_get_key_with_position(const struct rte_hash *h, const int32_t position,
 	return 0;
 }
 
-/* Lookup bulk stage 0: Prefetch input key */
 static inline void
-lookup_stage0(unsigned *idx, uint64_t *lookup_mask,
-		const void * const *keys)
+compare_signatures(uint32_t *prim_hash_matches, uint32_t *sec_hash_matches,
+			const struct rte_hash_bucket *prim_bkt,
+			const struct rte_hash_bucket *sec_bkt,
+			hash_sig_t prim_hash, hash_sig_t sec_hash,
+			enum rte_hash_sig_compare_function sig_cmp_fn)
 {
-	*idx = __builtin_ctzl(*lookup_mask);
-	if (*lookup_mask == 0)
-		*idx = 0;
+	unsigned int i;
+
+	switch (sig_cmp_fn) {
+#ifdef RTE_MACHINE_CPUFLAG_AVX2
+	case RTE_HASH_COMPARE_AVX2:
+		*prim_hash_matches = _mm256_movemask_ps((__m256)_mm256_cmpeq_epi32(
+				_mm256_load_si256(
+					(__m256i const *)prim_bkt->sig_current),
+				_mm256_set1_epi32(prim_hash)));
+		*sec_hash_matches = _mm256_movemask_ps((__m256)_mm256_cmpeq_epi32(
+				_mm256_load_si256(
+					(__m256i const *)sec_bkt->sig_current),
+				_mm256_set1_epi32(sec_hash)));
+		break;
+#endif
+#ifdef RTE_MACHINE_CPUFLAG_SSE2
+	case RTE_HASH_COMPARE_SSE:
+		/* Compare the first 4 signatures in the bucket */
+		*prim_hash_matches = _mm_movemask_ps((__m128)_mm_cmpeq_epi16(
+				_mm_load_si128(
+					(__m128i const *)prim_bkt->sig_current),
+				_mm_set1_epi32(prim_hash)));
+		*prim_hash_matches |= (_mm_movemask_ps((__m128)_mm_cmpeq_epi16(
+				_mm_load_si128(
+					(__m128i const *)&prim_bkt->sig_current[4]),
+				_mm_set1_epi32(prim_hash)))) << 4;
+		/* Compare the first 4 signatures in the bucket */
+		*sec_hash_matches = _mm_movemask_ps((__m128)_mm_cmpeq_epi16(
+				_mm_load_si128(
+					(__m128i const *)sec_bkt->sig_current),
+				_mm_set1_epi32(sec_hash)));
+		*sec_hash_matches |= (_mm_movemask_ps((__m128)_mm_cmpeq_epi16(
+				_mm_load_si128(
+					(__m128i const *)&sec_bkt->sig_current[4]),
+				_mm_set1_epi32(sec_hash)))) << 4;
+		break;
+#endif
+	default:
+		for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
+			*prim_hash_matches |=
+				((prim_hash == prim_bkt->sig_current[i]) << i);
+			*sec_hash_matches |=
+				((sec_hash == sec_bkt->sig_current[i]) << i);
+		}
+	}
 
-	rte_prefetch0(keys[*idx]);
-	*lookup_mask &= ~(1llu << *idx);
 }
 
-/*
- * Lookup bulk stage 1: Calculate primary/secondary hashes
- * and prefetch primary/secondary buckets
- */
+#define PREFETCH_OFFSET 4
 static inline void
-lookup_stage1(unsigned idx, hash_sig_t *prim_hash, hash_sig_t *sec_hash,
-		const struct rte_hash_bucket **primary_bkt,
-		const struct rte_hash_bucket **secondary_bkt,
-		hash_sig_t *hash_vals, const void * const *keys,
-		const struct rte_hash *h)
+__rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
+			int32_t num_keys, int32_t *positions,
+			uint64_t *hit_mask, void *data[])
 {
-	*prim_hash = rte_hash_hash(h, keys[idx]);
-	hash_vals[idx] = *prim_hash;
-	*sec_hash = rte_hash_secondary_hash(*prim_hash);
+	uint64_t hits = 0;
+	int32_t i;
+	uint32_t prim_hash[RTE_HASH_LOOKUP_BULK_MAX];
+	uint32_t sec_hash[RTE_HASH_LOOKUP_BULK_MAX];
+	const struct rte_hash_bucket *primary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
+	const struct rte_hash_bucket *secondary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
+	uint32_t prim_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
+	uint32_t sec_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
+
+	/* Prefetch first keys */
+	for (i = 0; i < PREFETCH_OFFSET && i < num_keys; i++)
+		rte_prefetch0(keys[i]);
 
-	*primary_bkt = &h->buckets[*prim_hash & h->bucket_bitmask];
-	*secondary_bkt = &h->buckets[*sec_hash & h->bucket_bitmask];
+	/*
+	 * Prefetch rest of the keys, calculate primary and
+	 * secondary bucket and prefetch them
+	 */
+	for (i = 0; i < (num_keys - PREFETCH_OFFSET); i++) {
+		rte_prefetch0(keys[i + PREFETCH_OFFSET]);
 
-	rte_prefetch0(*primary_bkt);
-	rte_prefetch0(*secondary_bkt);
-}
+		prim_hash[i] = rte_hash_hash(h, keys[i]);
+		sec_hash[i] = rte_hash_secondary_hash(prim_hash[i]);
 
-/*
- * Lookup bulk stage 2:  Search for match hashes in primary/secondary locations
- * and prefetch first key slot
- */
-static inline void
-lookup_stage2(unsigned idx, hash_sig_t prim_hash, hash_sig_t sec_hash,
-		const struct rte_hash_bucket *prim_bkt,
-		const struct rte_hash_bucket *sec_bkt,
-		const struct rte_hash_key **key_slot, int32_t *positions,
-		uint64_t *extra_hits_mask, const void *keys,
-		const struct rte_hash *h)
-{
-	unsigned prim_hash_matches, sec_hash_matches, key_idx, i;
-	unsigned total_hash_matches;
+		primary_bkt[i] = &h->buckets[prim_hash[i] & h->bucket_bitmask];
+		secondary_bkt[i] = &h->buckets[sec_hash[i] & h->bucket_bitmask];
 
-	prim_hash_matches = 1 << RTE_HASH_BUCKET_ENTRIES;
-	sec_hash_matches = 1 << RTE_HASH_BUCKET_ENTRIES;
-	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
-		prim_hash_matches |= ((prim_hash == prim_bkt->signatures[i].current) << i);
-		sec_hash_matches |= ((sec_hash == sec_bkt->signatures[i].current) << i);
+		rte_prefetch0(primary_bkt[i]);
+		rte_prefetch0(secondary_bkt[i]);
 	}
 
-	key_idx = prim_bkt->key_idx[__builtin_ctzl(prim_hash_matches)];
-	if (key_idx == 0)
-		key_idx = sec_bkt->key_idx[__builtin_ctzl(sec_hash_matches)];
+	/* Calculate and prefetch rest of the buckets */
+	for (; i < num_keys; i++) {
+		prim_hash[i] = rte_hash_hash(h, keys[i]);
+		sec_hash[i] = rte_hash_secondary_hash(prim_hash[i]);
 
-	total_hash_matches = (prim_hash_matches |
-				(sec_hash_matches << (RTE_HASH_BUCKET_ENTRIES + 1)));
-	*key_slot = (const struct rte_hash_key *) ((const char *)keys +
-					key_idx * h->key_entry_size);
+		primary_bkt[i] = &h->buckets[prim_hash[i] & h->bucket_bitmask];
+		secondary_bkt[i] = &h->buckets[sec_hash[i] & h->bucket_bitmask];
 
-	rte_prefetch0(*key_slot);
-	/*
-	 * Return index where key is stored,
-	 * substracting the first dummy index
-	 */
-	positions[idx] = (key_idx - 1);
+		rte_prefetch0(primary_bkt[i]);
+		rte_prefetch0(secondary_bkt[i]);
+	}
 
-	*extra_hits_mask |= (uint64_t)(__builtin_popcount(total_hash_matches) > 3) << idx;
+	/* Compare signatures and prefetch key slot of first hit */
+	for (i = 0; i < num_keys; i++) {
+		compare_signatures(&prim_hitmask[i], &sec_hitmask[i],
+				primary_bkt[i], secondary_bkt[i],
+				prim_hash[i], sec_hash[i], h->sig_cmp_fn);
+
+		if (prim_hitmask[i]) {
+			uint32_t first_hit = __builtin_ctzl(prim_hitmask[i]);
+			uint32_t key_idx = primary_bkt[i]->key_idx[first_hit];
+			const struct rte_hash_key *key_slot =
+				(const struct rte_hash_key *)(
+				(const char *)h->key_store +
+				key_idx * h->key_entry_size);
+			rte_prefetch0(key_slot);
+			continue;
+		}
 
-}
+		if (sec_hitmask[i]) {
+			uint32_t first_hit = __builtin_ctzl(sec_hitmask[i]);
+			uint32_t key_idx = secondary_bkt[i]->key_idx[first_hit];
+			const struct rte_hash_key *key_slot =
+				(const struct rte_hash_key *)(
+				(const char *)h->key_store +
+				key_idx * h->key_entry_size);
+			rte_prefetch0(key_slot);
+		}
+	}
 
+	/* Compare keys, first hits in primary first */
+	for (i = 0; i < num_keys; i++) {
+		positions[i] = -ENOENT;
+		while (prim_hitmask[i]) {
+			uint32_t hit_index = __builtin_ctzl(prim_hitmask[i]);
+
+			uint32_t key_idx = primary_bkt[i]->key_idx[hit_index];
+			const struct rte_hash_key *key_slot =
+				(const struct rte_hash_key *)(
+				(const char *)h->key_store +
+				key_idx * h->key_entry_size);
+			/*
+			 * If key index is 0, do not compare key,
+			 * as it is checking the dummy slot
+			 */
+			if (!!key_idx & !rte_hash_cmp_eq(key_slot->key, keys[i], h)) {
+				if (data != NULL)
+					data[i] = key_slot->pdata;
 
-/* Lookup bulk stage 3: Check if key matches, update hit mask and return data */
-static inline void
-lookup_stage3(unsigned idx, const struct rte_hash_key *key_slot, const void * const *keys,
-		const int32_t *positions, void *data[], uint64_t *hits,
-		const struct rte_hash *h)
-{
-	unsigned hit;
-	unsigned key_idx;
+				hits |= 1ULL << i;
+				positions[i] = key_idx - 1;
+				goto next_key;
+			}
+			prim_hitmask[i] &= ~(1 << (hit_index));
+		}
 
-	hit = !rte_hash_cmp_eq(key_slot->key, keys[idx], h);
-	if (data != NULL)
-		data[idx] = key_slot->pdata;
+		while (sec_hitmask[i]) {
+			uint32_t hit_index = __builtin_ctzl(sec_hitmask[i]);
 
-	key_idx = positions[idx] + 1;
-	/*
-	 * If key index is 0, force hit to be 0, in case key to be looked up
-	 * is all zero (as in the dummy slot), which would result in a wrong hit
-	 */
-	*hits |= (uint64_t)(hit && !!key_idx)  << idx;
-}
+			uint32_t key_idx = secondary_bkt[i]->key_idx[hit_index];
+			const struct rte_hash_key *key_slot =
+				(const struct rte_hash_key *)(
+				(const char *)h->key_store +
+				key_idx * h->key_entry_size);
+			/*
+			 * If key index is 0, do not compare key,
+			 * as it is checking the dummy slot
+			 */
 
-static inline void
-__rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
-			uint32_t num_keys, int32_t *positions,
-			uint64_t *hit_mask, void *data[])
-{
-	uint64_t hits = 0;
-	uint64_t extra_hits_mask = 0;
-	uint64_t lookup_mask, miss_mask;
-	unsigned idx;
-	const void *key_store = h->key_store;
-	int ret;
-	hash_sig_t hash_vals[RTE_HASH_LOOKUP_BULK_MAX];
-
-	unsigned idx00, idx01, idx10, idx11, idx20, idx21, idx30, idx31;
-	const struct rte_hash_bucket *primary_bkt10, *primary_bkt11;
-	const struct rte_hash_bucket *secondary_bkt10, *secondary_bkt11;
-	const struct rte_hash_bucket *primary_bkt20, *primary_bkt21;
-	const struct rte_hash_bucket *secondary_bkt20, *secondary_bkt21;
-	const struct rte_hash_key *k_slot20, *k_slot21, *k_slot30, *k_slot31;
-	hash_sig_t primary_hash10, primary_hash11;
-	hash_sig_t secondary_hash10, secondary_hash11;
-	hash_sig_t primary_hash20, primary_hash21;
-	hash_sig_t secondary_hash20, secondary_hash21;
-
-	lookup_mask = (uint64_t) -1 >> (64 - num_keys);
-	miss_mask = lookup_mask;
-
-	lookup_stage0(&idx00, &lookup_mask, keys);
-	lookup_stage0(&idx01, &lookup_mask, keys);
-
-	idx10 = idx00, idx11 = idx01;
-
-	lookup_stage0(&idx00, &lookup_mask, keys);
-	lookup_stage0(&idx01, &lookup_mask, keys);
-	lookup_stage1(idx10, &primary_hash10, &secondary_hash10,
-			&primary_bkt10, &secondary_bkt10, hash_vals, keys, h);
-	lookup_stage1(idx11, &primary_hash11, &secondary_hash11,
-			&primary_bkt11,	&secondary_bkt11, hash_vals, keys, h);
-
-	primary_bkt20 = primary_bkt10;
-	primary_bkt21 = primary_bkt11;
-	secondary_bkt20 = secondary_bkt10;
-	secondary_bkt21 = secondary_bkt11;
-	primary_hash20 = primary_hash10;
-	primary_hash21 = primary_hash11;
-	secondary_hash20 = secondary_hash10;
-	secondary_hash21 = secondary_hash11;
-	idx20 = idx10, idx21 = idx11;
-	idx10 = idx00, idx11 = idx01;
-
-	lookup_stage0(&idx00, &lookup_mask, keys);
-	lookup_stage0(&idx01, &lookup_mask, keys);
-	lookup_stage1(idx10, &primary_hash10, &secondary_hash10,
-			&primary_bkt10, &secondary_bkt10, hash_vals, keys, h);
-	lookup_stage1(idx11, &primary_hash11, &secondary_hash11,
-			&primary_bkt11,	&secondary_bkt11, hash_vals, keys, h);
-	lookup_stage2(idx20, primary_hash20, secondary_hash20, primary_bkt20,
-			secondary_bkt20, &k_slot20, positions, &extra_hits_mask,
-			key_store, h);
-	lookup_stage2(idx21, primary_hash21, secondary_hash21, primary_bkt21,
-			secondary_bkt21, &k_slot21, positions, &extra_hits_mask,
-			key_store, h);
-
-	while (lookup_mask) {
-		k_slot30 = k_slot20, k_slot31 = k_slot21;
-		idx30 = idx20, idx31 = idx21;
-		primary_bkt20 = primary_bkt10;
-		primary_bkt21 = primary_bkt11;
-		secondary_bkt20 = secondary_bkt10;
-		secondary_bkt21 = secondary_bkt11;
-		primary_hash20 = primary_hash10;
-		primary_hash21 = primary_hash11;
-		secondary_hash20 = secondary_hash10;
-		secondary_hash21 = secondary_hash11;
-		idx20 = idx10, idx21 = idx11;
-		idx10 = idx00, idx11 = idx01;
-
-		lookup_stage0(&idx00, &lookup_mask, keys);
-		lookup_stage0(&idx01, &lookup_mask, keys);
-		lookup_stage1(idx10, &primary_hash10, &secondary_hash10,
-			&primary_bkt10, &secondary_bkt10, hash_vals, keys, h);
-		lookup_stage1(idx11, &primary_hash11, &secondary_hash11,
-			&primary_bkt11,	&secondary_bkt11, hash_vals, keys, h);
-		lookup_stage2(idx20, primary_hash20, secondary_hash20,
-			primary_bkt20, secondary_bkt20, &k_slot20, positions,
-			&extra_hits_mask, key_store, h);
-		lookup_stage2(idx21, primary_hash21, secondary_hash21,
-			primary_bkt21, secondary_bkt21,	&k_slot21, positions,
-			&extra_hits_mask, key_store, h);
-		lookup_stage3(idx30, k_slot30, keys, positions, data, &hits, h);
-		lookup_stage3(idx31, k_slot31, keys, positions, data, &hits, h);
-	}
+			if (!!key_idx & !rte_hash_cmp_eq(key_slot->key, keys[i], h)) {
+				if (data != NULL)
+					data[i] = key_slot->pdata;
 
-	k_slot30 = k_slot20, k_slot31 = k_slot21;
-	idx30 = idx20, idx31 = idx21;
-	primary_bkt20 = primary_bkt10;
-	primary_bkt21 = primary_bkt11;
-	secondary_bkt20 = secondary_bkt10;
-	secondary_bkt21 = secondary_bkt11;
-	primary_hash20 = primary_hash10;
-	primary_hash21 = primary_hash11;
-	secondary_hash20 = secondary_hash10;
-	secondary_hash21 = secondary_hash11;
-	idx20 = idx10, idx21 = idx11;
-	idx10 = idx00, idx11 = idx01;
-
-	lookup_stage1(idx10, &primary_hash10, &secondary_hash10,
-		&primary_bkt10, &secondary_bkt10, hash_vals, keys, h);
-	lookup_stage1(idx11, &primary_hash11, &secondary_hash11,
-		&primary_bkt11,	&secondary_bkt11, hash_vals, keys, h);
-	lookup_stage2(idx20, primary_hash20, secondary_hash20, primary_bkt20,
-		secondary_bkt20, &k_slot20, positions, &extra_hits_mask,
-		key_store, h);
-	lookup_stage2(idx21, primary_hash21, secondary_hash21, primary_bkt21,
-		secondary_bkt21, &k_slot21, positions, &extra_hits_mask,
-		key_store, h);
-	lookup_stage3(idx30, k_slot30, keys, positions, data, &hits, h);
-	lookup_stage3(idx31, k_slot31, keys, positions, data, &hits, h);
-
-	k_slot30 = k_slot20, k_slot31 = k_slot21;
-	idx30 = idx20, idx31 = idx21;
-	primary_bkt20 = primary_bkt10;
-	primary_bkt21 = primary_bkt11;
-	secondary_bkt20 = secondary_bkt10;
-	secondary_bkt21 = secondary_bkt11;
-	primary_hash20 = primary_hash10;
-	primary_hash21 = primary_hash11;
-	secondary_hash20 = secondary_hash10;
-	secondary_hash21 = secondary_hash11;
-	idx20 = idx10, idx21 = idx11;
-
-	lookup_stage2(idx20, primary_hash20, secondary_hash20, primary_bkt20,
-		secondary_bkt20, &k_slot20, positions, &extra_hits_mask,
-		key_store, h);
-	lookup_stage2(idx21, primary_hash21, secondary_hash21, primary_bkt21,
-		secondary_bkt21, &k_slot21, positions, &extra_hits_mask,
-		key_store, h);
-	lookup_stage3(idx30, k_slot30, keys, positions, data, &hits, h);
-	lookup_stage3(idx31, k_slot31, keys, positions, data, &hits, h);
-
-	k_slot30 = k_slot20, k_slot31 = k_slot21;
-	idx30 = idx20, idx31 = idx21;
-
-	lookup_stage3(idx30, k_slot30, keys, positions, data, &hits, h);
-	lookup_stage3(idx31, k_slot31, keys, positions, data, &hits, h);
-
-	/* ignore any items we have already found */
-	extra_hits_mask &= ~hits;
-
-	if (unlikely(extra_hits_mask)) {
-		/* run a single search for each remaining item */
-		do {
-			idx = __builtin_ctzl(extra_hits_mask);
-			if (data != NULL) {
-				ret = rte_hash_lookup_with_hash_data(h,
-						keys[idx], hash_vals[idx], &data[idx]);
-				if (ret >= 0)
-					hits |= 1ULL << idx;
-			} else {
-				positions[idx] = rte_hash_lookup_with_hash(h,
-							keys[idx], hash_vals[idx]);
-				if (positions[idx] >= 0)
-					hits |= 1llu << idx;
+				hits |= 1ULL << i;
+				positions[i] = key_idx - 1;
+				goto next_key;
 			}
-			extra_hits_mask &= ~(1llu << idx);
-		} while (extra_hits_mask);
-	}
+			sec_hitmask[i] &= ~(1 << (hit_index));
+		}
 
-	miss_mask &= ~hits;
-	if (unlikely(miss_mask)) {
-		do {
-			idx = __builtin_ctzl(miss_mask);
-			positions[idx] = -ENOENT;
-			miss_mask &= ~(1llu << idx);
-		} while (miss_mask);
+next_key:
+		continue;
 	}
 
 	if (hit_mask != NULL)
@@ -1233,7 +1153,7 @@ rte_hash_iterate(const struct rte_hash *h, const void **key, void **data, uint32
 	idx = *next % RTE_HASH_BUCKET_ENTRIES;
 
 	/* If current position is empty, go to the next one */
-	while (h->buckets[bucket_idx].signatures[idx].sig == NULL_SIGNATURE) {
+	while (h->buckets[bucket_idx].key_idx[idx] == EMPTY_SLOT) {
 		(*next)++;
 		/* End of table */
 		if (*next == total_entries)
diff --git a/lib/librte_hash/rte_cuckoo_hash.h b/lib/librte_hash/rte_cuckoo_hash.h
index 9625fffe..1b8ffed8 100644
--- a/lib/librte_hash/rte_cuckoo_hash.h
+++ b/lib/librte_hash/rte_cuckoo_hash.h
@@ -130,10 +130,12 @@ enum add_key_case {
 };
 
 /** Number of items per bucket. */
-#define RTE_HASH_BUCKET_ENTRIES		4
+#define RTE_HASH_BUCKET_ENTRIES		8
 
 #define NULL_SIGNATURE			0
 
+#define EMPTY_SLOT			0
+
 #define KEY_ALIGNMENT			16
 
 #define LCORE_CACHE_SIZE		64
@@ -151,17 +153,6 @@ struct lcore_cache {
 	void *objs[LCORE_CACHE_SIZE]; /**< Cache objects */
 } __rte_cache_aligned;
 
-/* Structure storing both primary and secondary hashes */
-struct rte_hash_signatures {
-	union {
-		struct {
-			hash_sig_t current;
-			hash_sig_t alt;
-		};
-		uint64_t sig;
-	};
-};
-
 /* Structure that stores key-value pair */
 struct rte_hash_key {
 	union {
@@ -172,11 +163,22 @@ struct rte_hash_key {
 	char key[0];
 } __attribute__((aligned(KEY_ALIGNMENT)));
 
+/* All different signature compare functions */
+enum rte_hash_sig_compare_function {
+	RTE_HASH_COMPARE_SCALAR = 0,
+	RTE_HASH_COMPARE_SSE,
+	RTE_HASH_COMPARE_AVX2,
+	RTE_HASH_COMPARE_NUM
+};
+
 /** Bucket structure */
 struct rte_hash_bucket {
-	struct rte_hash_signatures signatures[RTE_HASH_BUCKET_ENTRIES];
-	/* Includes dummy key index that always contains index 0 */
-	uint32_t key_idx[RTE_HASH_BUCKET_ENTRIES + 1];
+	hash_sig_t sig_current[RTE_HASH_BUCKET_ENTRIES];
+
+	uint32_t key_idx[RTE_HASH_BUCKET_ENTRIES];
+
+	hash_sig_t sig_alt[RTE_HASH_BUCKET_ENTRIES];
+
 	uint8_t flag[RTE_HASH_BUCKET_ENTRIES];
 } __rte_cache_aligned;
 
@@ -185,30 +187,38 @@ struct rte_hash {
 	char name[RTE_HASH_NAMESIZE];   /**< Name of the hash. */
 	uint32_t entries;               /**< Total table entries. */
 	uint32_t num_buckets;           /**< Number of buckets in table. */
-	uint32_t key_len;               /**< Length of hash key. */
+
+	struct rte_ring *free_slots;
+	/**< Ring that stores all indexes of the free slots in the key table */
+	uint8_t hw_trans_mem_support;
+	/**< Hardware transactional memory support */
+	struct lcore_cache *local_free_slots;
+	/**< Local cache per lcore, storing some indexes of the free slots */
+	enum add_key_case add_key; /**< Multi-writer hash add behavior */
+
+	rte_spinlock_t *multiwriter_lock; /**< Multi-writer spinlock for w/o TM */
+
+	/* Fields used in lookup */
+
+	uint32_t key_len __rte_cache_aligned;
+	/**< Length of hash key. */
 	rte_hash_function hash_func;    /**< Function used to calculate hash. */
 	uint32_t hash_func_init_val;    /**< Init value used by hash_func. */
 	rte_hash_cmp_eq_t rte_hash_custom_cmp_eq;
 	/**< Custom function used to compare keys. */
 	enum cmp_jump_table_case cmp_jump_table_idx;
 	/**< Indicates which compare function to use. */
-	uint32_t bucket_bitmask;        /**< Bitmask for getting bucket index
-						from hash signature. */
+	enum rte_hash_sig_compare_function sig_cmp_fn;
+	/**< Indicates which signature compare function to use. */
+	uint32_t bucket_bitmask;
+	/**< Bitmask for getting bucket index from hash signature. */
 	uint32_t key_entry_size;         /**< Size of each key entry. */
 
-	struct rte_ring *free_slots;    /**< Ring that stores all indexes
-						of the free slots in the key table */
 	void *key_store;                /**< Table storing all keys and data */
-	struct rte_hash_bucket *buckets;	/**< Table with buckets storing all the
-							hash values and key indexes
-							to the key table*/
-	uint8_t hw_trans_mem_support;	/**< Hardware transactional
-							memory support */
-	struct lcore_cache *local_free_slots;
-	/**< Local cache per lcore, storing some indexes of the free slots */
-	enum add_key_case add_key; /**< Multi-writer hash add behavior */
-
-	rte_spinlock_t *multiwriter_lock; /**< Multi-writer spinlock for w/o TM */
+	struct rte_hash_bucket *buckets;
+	/**< Table with buckets storing all the	hash values and key indexes
+	 * to the key table.
+	 */
 } __rte_cache_aligned;
 
 struct queue_node {
diff --git a/lib/librte_hash/rte_cuckoo_hash_x86.h b/lib/librte_hash/rte_cuckoo_hash_x86.h
index ace1bd2e..0c94244a 100644
--- a/lib/librte_hash/rte_cuckoo_hash_x86.h
+++ b/lib/librte_hash/rte_cuckoo_hash_x86.h
@@ -53,10 +53,9 @@ rte_hash_cuckoo_insert_mw_tm(struct rte_hash_bucket *prim_bkt,
 			*/
 			for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
 				/* Check if slot is available */
-				if (likely(prim_bkt->signatures[i].sig ==
-						NULL_SIGNATURE)) {
-					prim_bkt->signatures[i].current = sig;
-					prim_bkt->signatures[i].alt = alt_hash;
+				if (likely(prim_bkt->key_idx[i] == EMPTY_SLOT)) {
+					prim_bkt->sig_current[i] = sig;
+					prim_bkt->sig_alt[i] = alt_hash;
 					prim_bkt->key_idx[i] = new_idx;
 					break;
 				}
@@ -102,7 +101,7 @@ rte_hash_cuckoo_move_insert_mw_tm(const struct rte_hash *h,
 				prev_slot = curr_node->prev_slot;
 
 				prev_alt_bkt_idx
-					= prev_bkt->signatures[prev_slot].alt
+					= prev_bkt->sig_alt[prev_slot]
 					    & h->bucket_bitmask;
 
 				if (unlikely(&h->buckets[prev_alt_bkt_idx]
@@ -114,10 +113,10 @@ rte_hash_cuckoo_move_insert_mw_tm(const struct rte_hash *h,
 				 * Cuckoo insert to move elements back to its
 				 * primary bucket if available
 				 */
-				curr_bkt->signatures[curr_slot].alt =
-				    prev_bkt->signatures[prev_slot].current;
-				curr_bkt->signatures[curr_slot].current =
-				    prev_bkt->signatures[prev_slot].alt;
+				curr_bkt->sig_alt[curr_slot] =
+				    prev_bkt->sig_current[prev_slot];
+				curr_bkt->sig_current[curr_slot] =
+				    prev_bkt->sig_alt[prev_slot];
 				curr_bkt->key_idx[curr_slot]
 				    = prev_bkt->key_idx[prev_slot];
 
@@ -126,8 +125,8 @@ rte_hash_cuckoo_move_insert_mw_tm(const struct rte_hash *h,
 				curr_bkt = curr_node->bkt;
 			}
 
-			curr_bkt->signatures[curr_slot].current = sig;
-			curr_bkt->signatures[curr_slot].alt = alt_hash;
+			curr_bkt->sig_current[curr_slot] = sig;
+			curr_bkt->sig_alt[curr_slot] = alt_hash;
 			curr_bkt->key_idx[curr_slot] = new_idx;
 
 			rte_xend();
@@ -172,7 +171,7 @@ rte_hash_cuckoo_make_space_mw_tm(const struct rte_hash *h,
 					RTE_HASH_BUCKET_ENTRIES)) {
 		curr_bkt = tail->bkt;
 		for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
-			if (curr_bkt->signatures[i].sig == NULL_SIGNATURE) {
+			if (curr_bkt->key_idx[i] == EMPTY_SLOT) {
 				if (likely(rte_hash_cuckoo_move_insert_mw_tm(h,
 						tail, i, sig,
 						alt_hash, new_idx) == 0))
@@ -180,7 +179,7 @@ rte_hash_cuckoo_make_space_mw_tm(const struct rte_hash *h,
 			}
 
 			/* Enqueue new node and keep prev node info */
-			alt_bkt = &(h->buckets[curr_bkt->signatures[i].alt
+			alt_bkt = &(h->buckets[curr_bkt->sig_alt[i]
 						    & h->bucket_bitmask]);
 			head->bkt = alt_bkt;
 			head->prev = tail;
diff --git a/lib/librte_hash/rte_fbk_hash.h b/lib/librte_hash/rte_fbk_hash.h
index a430961d..bd46048f 100644
--- a/lib/librte_hash/rte_fbk_hash.h
+++ b/lib/librte_hash/rte_fbk_hash.h
@@ -115,7 +115,7 @@ struct rte_fbk_hash_table {
 	uint32_t init_val;		/**< For initialising hash function. */
 
 	/** A flat table of all buckets. */
-	union rte_fbk_hash_entry t[0];
+	union rte_fbk_hash_entry t[];
 };
 
 /**
diff --git a/lib/librte_hash/rte_thash.h b/lib/librte_hash/rte_thash.h
index d98e98e7..a4886a8c 100644
--- a/lib/librte_hash/rte_thash.h
+++ b/lib/librte_hash/rte_thash.h
@@ -54,6 +54,7 @@ extern "C" {
 #include <stdint.h>
 #include <rte_byteorder.h>
 #include <rte_ip.h>
+#include <rte_common.h>
 
 #ifdef __SSE3__
 #include <rte_vect.h>
@@ -102,6 +103,7 @@ static const __m128i rte_thash_ipv6_bswap_mask = {
 struct rte_ipv4_tuple {
 	uint32_t	src_addr;
 	uint32_t	dst_addr;
+	RTE_STD_C11
 	union {
 		struct {
 			uint16_t dport;
@@ -119,6 +121,7 @@ struct rte_ipv4_tuple {
 struct rte_ipv6_tuple {
 	uint8_t		src_addr[16];
 	uint8_t		dst_addr[16];
+	RTE_STD_C11
 	union {
 		struct {
 			uint16_t dport;
diff --git a/lib/librte_ip_frag/Makefile b/lib/librte_ip_frag/Makefile
index e97dfbd3..43f8b1e3 100644
--- a/lib/librte_ip_frag/Makefile
+++ b/lib/librte_ip_frag/Makefile
@@ -54,6 +54,7 @@ SYMLINK-$(CONFIG_RTE_LIBRTE_IP_FRAG)-include += rte_ip_frag.h
 
 DEPDIRS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += lib/librte_eal
 DEPDIRS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += lib/librte_ether
+DEPDIRS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += lib/librte_hash
 DEPDIRS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += lib/librte_mbuf
 DEPDIRS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += lib/librte_mempool
 
diff --git a/lib/librte_ip_frag/rte_ip_frag.h b/lib/librte_ip_frag/rte_ip_frag.h
index 9ac7081c..6708906d 100644
--- a/lib/librte_ip_frag/rte_ip_frag.h
+++ b/lib/librte_ip_frag/rte_ip_frag.h
@@ -124,7 +124,7 @@ struct rte_ip_frag_tbl {
 	struct ip_frag_pkt *last;         /**< last used entry. */
 	struct ip_pkt_list lru;           /**< LRU list for table entries. */
 	struct ip_frag_tbl_stat stat;     /**< statistics counters. */
-	struct ip_frag_pkt pkt[0];        /**< hash table. */
+	__extension__ struct ip_frag_pkt pkt[0]; /**< hash table. */
 };
 
 /** IPv6 fragment extension header */
diff --git a/lib/librte_ivshmem/Makefile b/lib/librte_ivshmem/Makefile
deleted file mode 100644
index c099438c..00000000
--- a/lib/librte_ivshmem/Makefile
+++ /dev/null
@@ -1,54 +0,0 @@
-#   BSD LICENSE
-#
-#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
-#   All rights reserved.
-#
-#   Redistribution and use in source and binary forms, with or without
-#   modification, are permitted provided that the following conditions
-#   are met:
-#
-#     * Redistributions of source code must retain the above copyright
-#       notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above copyright
-#       notice, this list of conditions and the following disclaimer in
-#       the documentation and/or other materials provided with the
-#       distribution.
-#     * Neither the name of Intel Corporation nor the names of its
-#       contributors may be used to endorse or promote products derived
-#       from this software without specific prior written permission.
-#
-#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-include $(RTE_SDK)/mk/rte.vars.mk
-
-# library name
-LIB = librte_ivshmem.a
-
-CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
-
-EXPORT_MAP := rte_ivshmem_version.map
-
-LIBABIVER := 1
-
-# all source are stored in SRCS-y
-SRCS-$(CONFIG_RTE_LIBRTE_IVSHMEM) := rte_ivshmem.c
-
-# install includes
-SYMLINK-$(CONFIG_RTE_LIBRTE_IVSHMEM)-include := rte_ivshmem.h
-
-# this lib needs EAL, ring and mempool
-DEPDIRS-$(CONFIG_RTE_LIBRTE_IVSHMEM) += lib/librte_eal
-DEPDIRS-$(CONFIG_RTE_LIBRTE_IVSHMEM) += lib/librte_ring
-DEPDIRS-$(CONFIG_RTE_LIBRTE_IVSHMEM) += lib/librte_mempool
-
-include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_ivshmem/rte_ivshmem.c b/lib/librte_ivshmem/rte_ivshmem.c
deleted file mode 100644
index c26edb61..00000000
--- a/lib/librte_ivshmem/rte_ivshmem.c
+++ /dev/null
@@ -1,919 +0,0 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-#include <fcntl.h>
-#include <limits.h>
-#include <unistd.h>
-#include <sys/mman.h>
-#include <string.h>
-#include <stdio.h>
-
-#include <rte_eal_memconfig.h>
-#include <rte_memory.h>
-#include <rte_ivshmem.h>
-#include <rte_string_fns.h>
-#include <rte_common.h>
-#include <rte_log.h>
-#include <rte_debug.h>
-#include <rte_spinlock.h>
-#include <rte_common.h>
-#include <rte_malloc.h>
-
-#include "rte_ivshmem.h"
-
-#define IVSHMEM_CONFIG_FILE_FMT "/var/run/.dpdk_ivshmem_metadata_%s"
-#define IVSHMEM_QEMU_CMD_LINE_HEADER_FMT "-device ivshmem,size=%" PRIu64 "M,shm=fd%s"
-#define IVSHMEM_QEMU_CMD_FD_FMT ":%s:0x%" PRIx64 ":0x%" PRIx64
-#define IVSHMEM_QEMU_CMDLINE_BUFSIZE 1024
-#define IVSHMEM_MAX_PAGES (1 << 12)
-#define adjacent(x,y) (((x).phys_addr+(x).len)==(y).phys_addr)
-#define METADATA_SIZE_ALIGNED \
-	(RTE_ALIGN_CEIL(sizeof(struct rte_ivshmem_metadata),pagesz))
-
-#define GET_PAGEMAP_ADDR(in,addr,dlm,err)    \
-{                                      \
-	char *end;                         \
-	errno = 0;                         \
-	addr = strtoull((in), &end, 16);   \
-	if (errno != 0 || *end != (dlm)) { \
-		RTE_LOG(ERR, EAL, err);        \
-		goto error;                    \
-	}                                  \
-	(in) = end + 1;                    \
-}
-
-static int pagesz;
-
-struct memseg_cache_entry {
-	char filepath[PATH_MAX];
-	uint64_t offset;
-	uint64_t len;
-};
-
-struct ivshmem_config {
-	struct rte_ivshmem_metadata * metadata;
-	struct memseg_cache_entry memseg_cache[IVSHMEM_MAX_PAGES];
-		/**< account for multiple files per segment case */
-	struct flock lock;
-	rte_spinlock_t sl;
-};
-
-static struct ivshmem_config
-ivshmem_global_config[RTE_LIBRTE_IVSHMEM_MAX_METADATA_FILES];
-
-static rte_spinlock_t global_cfg_sl;
-
-static struct ivshmem_config *
-get_config_by_name(const char * name)
-{
-	struct rte_ivshmem_metadata * config;
-	unsigned i;
-
-	for (i = 0; i < RTE_DIM(ivshmem_global_config); i++) {
-		config = ivshmem_global_config[i].metadata;
-		if (config == NULL)
-			return NULL;
-		if (strncmp(name, config->name, IVSHMEM_NAME_LEN) == 0)
-			return &ivshmem_global_config[i];
-	}
-
-	return NULL;
-}
-
-static int
-overlap(const struct rte_memzone * s1, const struct rte_memzone * s2)
-{
-	uint64_t start1, end1, start2, end2;
-
-	start1 = s1->addr_64;
-	end1 = s1->addr_64 + s1->len;
-	start2 = s2->addr_64;
-	end2 = s2->addr_64 + s2->len;
-
-	if (start1 >= start2 && start1 < end2)
-		return 1;
-	if (start2 >= start1 && start2 < end1)
-		return 1;
-
-	return 0;
-}
-
-static struct rte_memzone *
-get_memzone_by_addr(const void * addr)
-{
-	struct rte_memzone * tmp, * mz;
-	struct rte_mem_config * mcfg;
-	int i;
-
-	mcfg = rte_eal_get_configuration()->mem_config;
-	mz = NULL;
-
-	/* find memzone for the ring */
-	for (i = 0; i < RTE_MAX_MEMZONE; i++) {
-		tmp = &mcfg->memzone[i];
-
-		if (tmp->addr_64 == (uint64_t) addr) {
-			mz = tmp;
-			break;
-		}
-	}
-
-	return mz;
-}
-
-static int
-entry_compare(const void * a, const void * b)
-{
-	const struct rte_ivshmem_metadata_entry * e1 =
-			(const struct rte_ivshmem_metadata_entry*) a;
-	const struct rte_ivshmem_metadata_entry * e2 =
-			(const struct rte_ivshmem_metadata_entry*) b;
-
-	/* move unallocated zones to the end */
-	if (e1->mz.addr == NULL && e2->mz.addr == NULL)
-		return 0;
-	if (e1->mz.addr == 0)
-		return 1;
-	if (e2->mz.addr == 0)
-		return -1;
-
-	return e1->mz.phys_addr > e2->mz.phys_addr;
-}
-
-/* fills hugepage cache entry for a given start virt_addr */
-static int
-get_hugefile_by_virt_addr(uint64_t virt_addr, struct memseg_cache_entry * e)
-{
-	uint64_t start_addr, end_addr;
-	char *start,*path_end;
-	char buf[PATH_MAX*2];
-	FILE *f;
-
-	start = NULL;
-	path_end = NULL;
-	start_addr = 0;
-
-	memset(e->filepath, 0, sizeof(e->filepath));
-
-	/* open /proc/self/maps */
-	f = fopen("/proc/self/maps", "r");
-	if (f == NULL) {
-		RTE_LOG(ERR, EAL, "cannot open /proc/self/maps!\n");
-		return -1;
-	}
-
-	/* parse maps */
-	while (fgets(buf, sizeof(buf), f) != NULL) {
-
-		/* get endptr to end of start addr */
-		start = buf;
-
-		GET_PAGEMAP_ADDR(start,start_addr,'-',
-				"Cannot find start address in maps!\n");
-
-		/* if start address is bigger than our address, skip */
-		if (start_addr > virt_addr)
-			continue;
-
-		GET_PAGEMAP_ADDR(start,end_addr,' ',
-				"Cannot find end address in maps!\n");
-
-		/* if end address is less than our address, skip */
-		if (end_addr <= virt_addr)
-			continue;
-
-		/* find where the path starts */
-		start = strstr(start, "/");
-
-		if (start == NULL)
-			continue;
-
-		/* at this point, we know that this is our map.
-		 * now let's find the file */
-		path_end = strstr(start, "\n");
-		break;
-	}
-
-	if (path_end == NULL) {
-		RTE_LOG(ERR, EAL, "Hugefile path not found!\n");
-		goto error;
-	}
-
-	/* calculate offset and copy the file path */
-	snprintf(e->filepath, RTE_PTR_DIFF(path_end, start) + 1, "%s", start);
-
-	e->offset = virt_addr - start_addr;
-
-	fclose(f);
-
-	return 0;
-error:
-	fclose(f);
-	return -1;
-}
-
-/*
- * This is a complex function. What it does is the following:
- *  1. Goes through metadata and gets list of hugepages involved
- *  2. Sorts the hugepages by size (1G first)
- *  3. Goes through metadata again and writes correct offsets
- *  4. Goes through pages and finds out their filenames, offsets etc.
- */
-static int
-build_config(struct rte_ivshmem_metadata * metadata)
-{
-	struct rte_ivshmem_metadata_entry * e_local;
-	struct memseg_cache_entry * ms_local;
-	struct rte_memseg pages[IVSHMEM_MAX_PAGES];
-	struct rte_ivshmem_metadata_entry *entry;
-	struct memseg_cache_entry * c_entry, * prev_entry;
-	struct ivshmem_config * config;
-	unsigned i, j, mz_iter, ms_iter;
-	uint64_t biggest_len;
-	int biggest_idx;
-
-	/* return error if we try to use an unknown config file */
-	config = get_config_by_name(metadata->name);
-	if (config == NULL) {
-		RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", metadata->name);
-		goto fail_e;
-	}
-
-	memset(pages, 0, sizeof(pages));
-
-	e_local = malloc(sizeof(config->metadata->entry));
-	if (e_local == NULL)
-		goto fail_e;
-	ms_local = malloc(sizeof(config->memseg_cache));
-	if (ms_local == NULL)
-		goto fail_ms;
-
-
-	/* make local copies before doing anything */
-	memcpy(e_local, config->metadata->entry, sizeof(config->metadata->entry));
-	memcpy(ms_local, config->memseg_cache, sizeof(config->memseg_cache));
-
-	qsort(e_local, RTE_DIM(config->metadata->entry), sizeof(struct rte_ivshmem_metadata_entry),
-			entry_compare);
-
-	/* first pass - collect all huge pages */
-	for (mz_iter = 0; mz_iter < RTE_DIM(config->metadata->entry); mz_iter++) {
-
-		entry = &e_local[mz_iter];
-
-		uint64_t start_addr = RTE_ALIGN_FLOOR(entry->mz.addr_64,
-				entry->mz.hugepage_sz);
-		uint64_t offset = entry->mz.addr_64 - start_addr;
-		uint64_t len = RTE_ALIGN_CEIL(entry->mz.len + offset,
-				entry->mz.hugepage_sz);
-
-		if (entry->mz.addr_64 == 0 || start_addr == 0 || len == 0)
-			continue;
-
-		int start_page;
-
-		/* find first unused page - mz are phys_addr sorted so we don't have to
-		 * look out for holes */
-		for (i = 0; i < RTE_DIM(pages); i++) {
-
-			/* skip if we already have this page */
-			if (pages[i].addr_64 == start_addr) {
-				start_addr += entry->mz.hugepage_sz;
-				len -= entry->mz.hugepage_sz;
-				continue;
-			}
-			/* we found a new page */
-			else if (pages[i].addr_64 == 0) {
-				start_page = i;
-				break;
-			}
-		}
-		if (i == RTE_DIM(pages)) {
-			RTE_LOG(ERR, EAL, "Cannot find unused page!\n");
-			goto fail;
-		}
-
-		/* populate however many pages the memzone has */
-		for (i = start_page; i < RTE_DIM(pages) && len != 0; i++) {
-
-			pages[i].addr_64 = start_addr;
-			pages[i].len = entry->mz.hugepage_sz;
-			start_addr += entry->mz.hugepage_sz;
-			len -= entry->mz.hugepage_sz;
-		}
-		/* if there's still length left */
-		if (len != 0) {
-			RTE_LOG(ERR, EAL, "Not enough space for pages!\n");
-			goto fail;
-		}
-	}
-
-	/* second pass - sort pages by size */
-	for (i = 0; i < RTE_DIM(pages); i++) {
-
-		if (pages[i].addr == NULL)
-			break;
-
-		biggest_len = 0;
-		biggest_idx = -1;
-
-		/*
-		 * browse all entries starting at 'i', and find the
-		 * entry with the smallest addr
-		 */
-		for (j=i; j< RTE_DIM(pages); j++) {
-			if (pages[j].addr == NULL)
-					break;
-			if (biggest_len == 0 ||
-				pages[j].len > biggest_len) {
-				biggest_len = pages[j].len;
-				biggest_idx = j;
-			}
-		}
-
-		/* should not happen */
-		if (biggest_idx == -1) {
-			RTE_LOG(ERR, EAL, "Error sorting by size!\n");
-			goto fail;
-		}
-		if (i != (unsigned) biggest_idx) {
-			struct rte_memseg tmp;
-
-			memcpy(&tmp, &pages[biggest_idx], sizeof(struct rte_memseg));
-
-			/* we don't want to break contiguousness, so instead of just
-			 * swapping segments, we move all the preceding segments to the
-			 * right and then put the old segment @ biggest_idx in place of
-			 * segment @ i */
-			for (j = biggest_idx - 1; j >= i; j--) {
-				memcpy(&pages[j+1], &pages[j], sizeof(struct rte_memseg));
-				memset(&pages[j], 0, sizeof(struct rte_memseg));
-				if (j == 0)
-					break;
-			}
-
-			/* put old biggest segment to its new place */
-			memcpy(&pages[i], &tmp, sizeof(struct rte_memseg));
-		}
-	}
-
-	/* third pass - write correct offsets */
-	for (mz_iter = 0; mz_iter < RTE_DIM(config->metadata->entry); mz_iter++) {
-
-		uint64_t offset = 0;
-
-		entry = &e_local[mz_iter];
-
-		if (entry->mz.addr_64 == 0)
-			break;
-
-		/* find page for current memzone */
-		for (i = 0; i < RTE_DIM(pages); i++) {
-			/* we found our page */
-			if (entry->mz.addr_64 >= pages[i].addr_64 &&
-					entry->mz.addr_64 < pages[i].addr_64 + pages[i].len) {
-				entry->offset = (entry->mz.addr_64 - pages[i].addr_64) +
-						offset;
-				break;
-			}
-			offset += pages[i].len;
-		}
-		if (i == RTE_DIM(pages)) {
-			RTE_LOG(ERR, EAL, "Page not found!\n");
-			goto fail;
-		}
-	}
-
-	ms_iter = 0;
-	prev_entry = NULL;
-
-	/* fourth pass - create proper memseg cache */
-	for (i = 0; i < RTE_DIM(pages) &&
-			ms_iter <= RTE_DIM(config->memseg_cache); i++) {
-		if (pages[i].addr_64 == 0)
-			break;
-
-
-		if (ms_iter == RTE_DIM(pages)) {
-			RTE_LOG(ERR, EAL, "The universe has collapsed!\n");
-			goto fail;
-		}
-
-		c_entry = &ms_local[ms_iter];
-		c_entry->len = pages[i].len;
-
-		if (get_hugefile_by_virt_addr(pages[i].addr_64, c_entry) < 0)
-			goto fail;
-
-		/* if previous entry has the same filename and is contiguous,
-		 * clear current entry and increase previous entry's length
-		 */
-		if (prev_entry != NULL &&
-				strncmp(c_entry->filepath, prev_entry->filepath,
-				sizeof(c_entry->filepath)) == 0 &&
-				prev_entry->offset + prev_entry->len == c_entry->offset) {
-			prev_entry->len += pages[i].len;
-			memset(c_entry, 0, sizeof(struct memseg_cache_entry));
-		}
-		else {
-			prev_entry = c_entry;
-			ms_iter++;
-		}
-	}
-
-	/* update current configuration with new valid data */
-	memcpy(config->metadata->entry, e_local, sizeof(config->metadata->entry));
-	memcpy(config->memseg_cache, ms_local, sizeof(config->memseg_cache));
-
-	free(ms_local);
-	free(e_local);
-
-	return 0;
-fail:
-	free(ms_local);
-fail_ms:
-	free(e_local);
-fail_e:
-	return -1;
-}
-
-static int
-add_memzone_to_metadata(const struct rte_memzone * mz,
-		struct ivshmem_config * config)
-{
-	struct rte_ivshmem_metadata_entry * entry;
-	unsigned i, idx;
-	struct rte_mem_config *mcfg;
-
-	if (mz->len == 0) {
-		RTE_LOG(ERR, EAL, "Trying to add an empty memzone\n");
-		return -1;
-	}
-
-	rte_spinlock_lock(&config->sl);
-
-	mcfg = rte_eal_get_configuration()->mem_config;
-
-	/* it prevents the memzone being freed while we add it to the metadata */
-	rte_rwlock_write_lock(&mcfg->mlock);
-
-	/* find free slot in this config */
-	for (i = 0; i < RTE_DIM(config->metadata->entry); i++) {
-		entry = &config->metadata->entry[i];
-
-		if (&entry->mz.addr_64 != 0 && overlap(mz, &entry->mz)) {
-			RTE_LOG(ERR, EAL, "Overlapping memzones!\n");
-			goto fail;
-		}
-
-		/* if addr is zero, the memzone is probably free */
-		if (entry->mz.addr_64 == 0) {
-			RTE_LOG(DEBUG, EAL, "Adding memzone '%s' at %p to metadata %s\n",
-					mz->name, mz->addr, config->metadata->name);
-			memcpy(&entry->mz, mz, sizeof(struct rte_memzone));
-
-			/* run config file parser */
-			if (build_config(config->metadata) < 0)
-				goto fail;
-
-			break;
-		}
-	}
-
-	/* if we reached the maximum, that means we have no place in config */
-	if (i == RTE_DIM(config->metadata->entry)) {
-		RTE_LOG(ERR, EAL, "No space left in IVSHMEM metadata %s!\n",
-				config->metadata->name);
-		goto fail;
-	}
-
-	idx = ((uintptr_t)mz - (uintptr_t)mcfg->memzone);
-	idx = idx / sizeof(struct rte_memzone);
-
-	/* mark the memzone not freeable */
-	mcfg->memzone[idx].ioremap_addr = mz->phys_addr;
-
-	rte_rwlock_write_unlock(&mcfg->mlock);
-	rte_spinlock_unlock(&config->sl);
-	return 0;
-fail:
-	rte_rwlock_write_unlock(&mcfg->mlock);
-	rte_spinlock_unlock(&config->sl);
-	return -1;
-}
-
-static int
-add_ring_to_metadata(const struct rte_ring * r,
-		struct ivshmem_config * config)
-{
-	struct rte_memzone * mz;
-
-	mz = get_memzone_by_addr(r);
-
-	if (!mz) {
-		RTE_LOG(ERR, EAL, "Cannot find memzone for ring!\n");
-		return -1;
-	}
-
-	return add_memzone_to_metadata(mz, config);
-}
-
-static int
-add_mempool_memzone_to_metadata(const void *addr,
-		struct ivshmem_config *config)
-{
-	struct rte_memzone *mz;
-
-	mz = get_memzone_by_addr(addr);
-
-	if (!mz) {
-		RTE_LOG(ERR, EAL, "Cannot find memzone for mempool!\n");
-		return -1;
-	}
-
-	return add_memzone_to_metadata(mz, config);
-}
-
-static int
-add_mempool_to_metadata(const struct rte_mempool *mp,
-		struct ivshmem_config *config)
-{
-	struct rte_mempool_memhdr *memhdr;
-	int ret;
-
-	ret = add_mempool_memzone_to_metadata(mp, config);
-	if (ret < 0)
-		return -1;
-
-	STAILQ_FOREACH(memhdr, &mp->mem_list, next) {
-		ret = add_mempool_memzone_to_metadata(memhdr->addr, config);
-		if (ret < 0)
-			return -1;
-	}
-
-	/* mempool consists of memzone and ring */
-	return add_ring_to_metadata(mp->pool_data, config);
-}
-
-int
-rte_ivshmem_metadata_add_ring(const struct rte_ring * r, const char * name)
-{
-	struct ivshmem_config * config;
-
-	if (name == NULL || r == NULL)
-		return -1;
-
-	config = get_config_by_name(name);
-
-	if (config == NULL) {
-		RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", name);
-		return -1;
-	}
-
-	return add_ring_to_metadata(r, config);
-}
-
-int
-rte_ivshmem_metadata_add_memzone(const struct rte_memzone * mz, const char * name)
-{
-	struct ivshmem_config * config;
-
-	if (name == NULL || mz == NULL)
-		return -1;
-
-	config = get_config_by_name(name);
-
-	if (config == NULL) {
-		RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", name);
-		return -1;
-	}
-
-	return add_memzone_to_metadata(mz, config);
-}
-
-int
-rte_ivshmem_metadata_add_mempool(const struct rte_mempool * mp, const char * name)
-{
-	struct ivshmem_config * config;
-
-	if (name == NULL || mp == NULL)
-		return -1;
-
-	config = get_config_by_name(name);
-
-	if (config == NULL) {
-		RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", name);
-		return -1;
-	}
-
-	return add_mempool_to_metadata(mp, config);
-}
-
-static inline void
-ivshmem_config_path(char *buffer, size_t bufflen, const char *name)
-{
-	snprintf(buffer, bufflen, IVSHMEM_CONFIG_FILE_FMT, name);
-}
-
-
-
-static inline
-void *ivshmem_metadata_create(const char *name, size_t size,
-		struct flock *lock)
-{
-	int retval, fd;
-	void *metadata_addr;
-	char pathname[PATH_MAX];
-
-	ivshmem_config_path(pathname, sizeof(pathname), name);
-
-	fd = open(pathname, O_RDWR | O_CREAT, 0660);
-	if (fd < 0) {
-		RTE_LOG(ERR, EAL, "Cannot open '%s'\n", pathname);
-		return NULL;
-	}
-
-	size = METADATA_SIZE_ALIGNED;
-
-	retval = fcntl(fd, F_SETLK, lock);
-	if (retval < 0){
-		close(fd);
-		RTE_LOG(ERR, EAL, "Cannot create lock on '%s'. Is another "
-				"process using it?\n", pathname);
-		return NULL;
-	}
-
-	retval = ftruncate(fd, size);
-	if (retval < 0){
-		close(fd);
-		RTE_LOG(ERR, EAL, "Cannot resize '%s'\n", pathname);
-		return NULL;
-	}
-
-	metadata_addr = mmap(NULL, size,
-				PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
-
-	if (metadata_addr == MAP_FAILED){
-		RTE_LOG(ERR, EAL, "Cannot mmap memory for '%s'\n", pathname);
-
-		/* we don't care if we can't unlock */
-		fcntl(fd, F_UNLCK, lock);
-		close(fd);
-
-		return NULL;
-	}
-
-	return metadata_addr;
-}
-
-int rte_ivshmem_metadata_create(const char *name)
-{
-	struct ivshmem_config * ivshmem_config;
-	unsigned index;
-
-	if (pagesz == 0)
-		pagesz = getpagesize();
-
-	if (name == NULL)
-		return -1;
-
-	rte_spinlock_lock(&global_cfg_sl);
-
-	for (index = 0; index < RTE_DIM(ivshmem_global_config); index++) {
-		if (ivshmem_global_config[index].metadata == NULL) {
-			ivshmem_config = &ivshmem_global_config[index];
-			break;
-		}
-	}
-
-	if (index == RTE_DIM(ivshmem_global_config)) {
-		RTE_LOG(ERR, EAL, "Cannot create more ivshmem config files. "
-		"Maximum has been reached\n");
-		rte_spinlock_unlock(&global_cfg_sl);
-		return -1;
-	}
-
-	ivshmem_config->lock.l_type = F_WRLCK;
-	ivshmem_config->lock.l_whence = SEEK_SET;
-
-	ivshmem_config->lock.l_start = 0;
-	ivshmem_config->lock.l_len = METADATA_SIZE_ALIGNED;
-
-	ivshmem_global_config[index].metadata = ((struct rte_ivshmem_metadata *)
-			ivshmem_metadata_create(
-					name,
-					sizeof(struct rte_ivshmem_metadata),
-					&ivshmem_config->lock));
-
-	if (ivshmem_global_config[index].metadata == NULL) {
-		rte_spinlock_unlock(&global_cfg_sl);
-		return -1;
-	}
-
-	/* Metadata setup */
-	memset(ivshmem_config->metadata, 0, sizeof(struct rte_ivshmem_metadata));
-	ivshmem_config->metadata->magic_number = IVSHMEM_MAGIC;
-	snprintf(ivshmem_config->metadata->name,
-			sizeof(ivshmem_config->metadata->name), "%s", name);
-
-	rte_spinlock_unlock(&global_cfg_sl);
-
-	return 0;
-}
-
-int
-rte_ivshmem_metadata_cmdline_generate(char *buffer, unsigned size, const char *name)
-{
-	const struct memseg_cache_entry * ms_cache, *entry;
-	struct ivshmem_config * config;
-	char cmdline[IVSHMEM_QEMU_CMDLINE_BUFSIZE], *cmdline_ptr;
-	char cfg_file_path[PATH_MAX];
-	unsigned remaining_len, tmplen, iter;
-	uint64_t shared_mem_size, zero_size, total_size;
-
-	if (buffer == NULL || name == NULL)
-		return -1;
-
-	config = get_config_by_name(name);
-
-	if (config == NULL) {
-		RTE_LOG(ERR, EAL, "Config %s not found!\n", name);
-		return -1;
-	}
-
-	rte_spinlock_lock(&config->sl);
-
-	/* prepare metadata file path */
-	snprintf(cfg_file_path, sizeof(cfg_file_path), IVSHMEM_CONFIG_FILE_FMT,
-			config->metadata->name);
-
-	ms_cache = config->memseg_cache;
-
-	cmdline_ptr = cmdline;
-	remaining_len = sizeof(cmdline);
-
-	shared_mem_size = 0;
-	iter = 0;
-
-	while ((ms_cache[iter].len != 0) && (iter < RTE_DIM(config->metadata->entry))) {
-
-		entry = &ms_cache[iter];
-
-		/* Offset and sizes within the current pathname */
-		tmplen = snprintf(cmdline_ptr, remaining_len, IVSHMEM_QEMU_CMD_FD_FMT,
-				entry->filepath, entry->offset, entry->len);
-
-		shared_mem_size += entry->len;
-
-		cmdline_ptr = RTE_PTR_ADD(cmdline_ptr, tmplen);
-		remaining_len -= tmplen;
-
-		if (remaining_len == 0) {
-			RTE_LOG(ERR, EAL, "Command line too long!\n");
-			rte_spinlock_unlock(&config->sl);
-			return -1;
-		}
-
-		iter++;
-	}
-
-	total_size = rte_align64pow2(shared_mem_size + METADATA_SIZE_ALIGNED);
-	zero_size = total_size - shared_mem_size - METADATA_SIZE_ALIGNED;
-
-	/* add /dev/zero to command-line to fill the space */
-	tmplen = snprintf(cmdline_ptr, remaining_len, IVSHMEM_QEMU_CMD_FD_FMT,
-			"/dev/zero",
-			(uint64_t)0x0,
-			zero_size);
-
-	cmdline_ptr = RTE_PTR_ADD(cmdline_ptr, tmplen);
-	remaining_len -= tmplen;
-
-	if (remaining_len == 0) {
-		RTE_LOG(ERR, EAL, "Command line too long!\n");
-		rte_spinlock_unlock(&config->sl);
-		return -1;
-	}
-
-	/* add metadata file to the end of command-line */
-	tmplen = snprintf(cmdline_ptr, remaining_len, IVSHMEM_QEMU_CMD_FD_FMT,
-			cfg_file_path,
-			(uint64_t)0x0,
-			METADATA_SIZE_ALIGNED);
-
-	cmdline_ptr = RTE_PTR_ADD(cmdline_ptr, tmplen);
-	remaining_len -= tmplen;
-
-	if (remaining_len == 0) {
-		RTE_LOG(ERR, EAL, "Command line too long!\n");
-		rte_spinlock_unlock(&config->sl);
-		return -1;
-	}
-
-	/* if current length of the command line is bigger than the buffer supplied
-	 * by the user, or if command-line is bigger than what IVSHMEM accepts */
-	if ((sizeof(cmdline) - remaining_len) > size) {
-		RTE_LOG(ERR, EAL, "Buffer is too short!\n");
-		rte_spinlock_unlock(&config->sl);
-		return -1;
-	}
-	/* complete the command-line */
-	snprintf(buffer, size,
-			IVSHMEM_QEMU_CMD_LINE_HEADER_FMT,
-			total_size >> 20,
-			cmdline);
-
-	rte_spinlock_unlock(&config->sl);
-
-	return 0;
-}
-
-void
-rte_ivshmem_metadata_dump(FILE *f, const char *name)
-{
-	unsigned i = 0;
-	struct ivshmem_config * config;
-	struct rte_ivshmem_metadata_entry *entry;
-#ifdef RTE_LIBRTE_IVSHMEM_DEBUG
-	uint64_t addr;
-	uint64_t end, hugepage_sz;
-	struct memseg_cache_entry e;
-#endif
-
-	if (name == NULL)
-		return;
-
-	/* return error if we try to use an unknown config file */
-	config = get_config_by_name(name);
-	if (config == NULL) {
-		RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", name);
-		return;
-	}
-
-	rte_spinlock_lock(&config->sl);
-
-	entry = &config->metadata->entry[0];
-
-	while (entry->mz.addr != NULL && i < RTE_DIM(config->metadata->entry)) {
-
-		fprintf(f, "Entry %u: name:<%-20s>, phys:0x%-15lx, len:0x%-15lx, "
-			"virt:%-15p, off:0x%-15lx\n",
-			i,
-			entry->mz.name,
-			entry->mz.phys_addr,
-			entry->mz.len,
-			entry->mz.addr,
-			entry->offset);
-		i++;
-
-#ifdef RTE_LIBRTE_IVSHMEM_DEBUG
-		fprintf(f, "\tHugepage files:\n");
-
-		hugepage_sz = entry->mz.hugepage_sz;
-		addr = RTE_ALIGN_FLOOR(entry->mz.addr_64, hugepage_sz);
-		end = addr + RTE_ALIGN_CEIL(entry->mz.len + (entry->mz.addr_64 - addr),
-				hugepage_sz);
-
-		for (; addr < end; addr += hugepage_sz) {
-			memset(&e, 0, sizeof(e));
-
-			get_hugefile_by_virt_addr(addr, &e);
-
-			fprintf(f, "\t0x%"PRIx64 "-0x%" PRIx64 " offset: 0x%" PRIx64 " %s\n",
-					addr, addr + hugepage_sz, e.offset, e.filepath);
-		}
-#endif
-		entry++;
-	}
-
-	rte_spinlock_unlock(&config->sl);
-}
diff --git a/lib/librte_ivshmem/rte_ivshmem.h b/lib/librte_ivshmem/rte_ivshmem.h
deleted file mode 100644
index a5d36d6b..00000000
--- a/lib/librte_ivshmem/rte_ivshmem.h
+++ /dev/null
@@ -1,165 +0,0 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef RTE_IVSHMEM_H_
-#define RTE_IVSHMEM_H_
-
-#include <rte_memzone.h>
-#include <rte_mempool.h>
-
-/**
- * @file
- *
- * The RTE IVSHMEM interface provides functions to create metadata files
- * describing memory segments to be shared via QEMU IVSHMEM.
- */
-
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define IVSHMEM_MAGIC 0x0BADC0DE
-#define IVSHMEM_NAME_LEN 32
-
-/**
- * Structure that holds IVSHMEM shared metadata entry.
- */
-struct rte_ivshmem_metadata_entry {
-	struct rte_memzone mz;	/**< shared memzone */
-	uint64_t offset;	/**< offset of memzone within IVSHMEM device */
-};
-
-/**
- * Structure that holds IVSHMEM metadata.
- */
-struct rte_ivshmem_metadata {
-	int magic_number;				/**< magic number */
-	char name[IVSHMEM_NAME_LEN];	/**< name of the metadata file */
-	struct rte_ivshmem_metadata_entry entry[RTE_LIBRTE_IVSHMEM_MAX_ENTRIES];
-			/**< metadata entries */
-};
-
-/**
- * Creates metadata file with a given name
- *
- * @param name
- *  Name of metadata file to be created
- *
- * @return
- *  - On success, zero
- *  - On failure, a negative value
- */
-int rte_ivshmem_metadata_create(const char * name);
-
-/**
- * Adds memzone to a specific metadata file
- *
- * @param mz
- *  Memzone to be added
- * @param md_name
- *  Name of metadata file for the memzone to be added to
- *
- * @return
- *  - On success, zero
- *  - On failure, a negative value
- */
-int rte_ivshmem_metadata_add_memzone(const struct rte_memzone * mz,
-		const char * md_name);
-
-/**
- * Adds a ring descriptor to a specific metadata file
- *
- * @param r
- *  Ring descriptor to be added
- * @param md_name
- *  Name of metadata file for the ring to be added to
- *
- * @return
- *  - On success, zero
- *  - On failure, a negative value
- */
-int rte_ivshmem_metadata_add_ring(const struct rte_ring * r,
-		const char * md_name);
-
-/**
- * Adds a mempool to a specific metadata file
- *
- * @param mp
- *  Mempool to be added
- * @param md_name
- *  Name of metadata file for the mempool to be added to
- *
- * @return
- *  - On success, zero
- *  - On failure, a negative value
- */
-int rte_ivshmem_metadata_add_mempool(const struct rte_mempool * mp,
-		const char * md_name);
-
-
-/**
- * Generates the QEMU command-line for IVSHMEM device for a given metadata file.
- * This function is to be called after all the objects were added.
- *
- * @param buffer
- *  Buffer to be filled with the command line arguments.
- * @param size
- *  Size of the buffer.
- * @param name
- *  Name of metadata file to generate QEMU command-line parameters for
- *
- * @return
- *  - On success, zero
- *  - On failure, a negative value
- */
-int rte_ivshmem_metadata_cmdline_generate(char *buffer, unsigned size,
-		const char *name);
-
-
-/**
- * Dump all metadata entries from a given metadata file to the console.
- *
- * @param f
- *   A pointer to a file for output
- * @name
- *  Name of the metadata file to be dumped to console.
- */
-void rte_ivshmem_metadata_dump(FILE *f, const char *name);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* RTE_IVSHMEM_H_ */
diff --git a/lib/librte_ivshmem/rte_ivshmem_version.map b/lib/librte_ivshmem/rte_ivshmem_version.map
deleted file mode 100644
index 5a393ddc..00000000
--- a/lib/librte_ivshmem/rte_ivshmem_version.map
+++ /dev/null
@@ -1,12 +0,0 @@
-DPDK_2.0 {
-	global:
-
-	rte_ivshmem_metadata_add_mempool;
-	rte_ivshmem_metadata_add_memzone;
-	rte_ivshmem_metadata_add_ring;
-	rte_ivshmem_metadata_cmdline_generate;
-	rte_ivshmem_metadata_create;
-	rte_ivshmem_metadata_dump;
-
-	local: *;
-};
diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c
index 3028fd43..a80cefd2 100644
--- a/lib/librte_kni/rte_kni.c
+++ b/lib/librte_kni/rte_kni.c
@@ -210,14 +210,18 @@ rte_kni_init(unsigned int max_kni_ifaces)
 	if (max_kni_ifaces == 0) {
 		RTE_LOG(ERR, KNI, "Invalid number of max_kni_ifaces %d\n",
 							max_kni_ifaces);
-		rte_panic("Unable to initialize KNI\n");
+		RTE_LOG(ERR, KNI, "Unable to initialize KNI\n");
+		return;
 	}
 
 	/* Check FD and open */
 	if (kni_fd < 0) {
 		kni_fd = open("/dev/" KNI_DEVICE, O_RDWR);
-		if (kni_fd < 0)
-			rte_panic("Can not open /dev/%s\n", KNI_DEVICE);
+		if (kni_fd < 0) {
+			RTE_LOG(ERR, KNI,
+				"Can not open /dev/%s\n", KNI_DEVICE);
+			return;
+		}
 	}
 
 	/* Allocate slot objects */
@@ -307,8 +311,8 @@ rte_kni_init(unsigned int max_kni_ifaces)
 	return;
 
 kni_fail:
-	rte_panic("Unable to allocate memory for max_kni_ifaces:%d. Increase the amount of hugepages memory\n",
-			 max_kni_ifaces);
+	RTE_LOG(ERR, KNI, "Unable to allocate memory for max_kni_ifaces:%d."
+		"Increase the amount of hugepages memory\n", max_kni_ifaces);
 }
 
 
@@ -321,9 +325,7 @@ rte_kni_alloc(struct rte_mempool *pktmbuf_pool,
 	struct rte_kni_device_info dev_info;
 	struct rte_kni *ctx;
 	char intf_name[RTE_KNI_NAMESIZE];
-	char mz_name[RTE_MEMZONE_NAMESIZE];
 	const struct rte_memzone *mz;
-	const struct rte_mempool *mp;
 	struct rte_kni_memzone_slot *slot = NULL;
 
 	if (!pktmbuf_pool || !conf || !conf->name[0])
@@ -414,19 +416,6 @@ rte_kni_alloc(struct rte_mempool *pktmbuf_pool,
 	dev_info.sync_va = mz->addr;
 	dev_info.sync_phys = mz->phys_addr;
 
-
-	/* MBUF mempool */
-	snprintf(mz_name, sizeof(mz_name), RTE_MEMPOOL_MZ_FORMAT,
-		pktmbuf_pool->name);
-	mz = rte_memzone_lookup(mz_name);
-	KNI_MEM_CHECK(mz == NULL);
-	mp = (struct rte_mempool *)mz->addr;
-	/* KNI currently requires to have only one memory chunk */
-	if (mp->nb_mem_chunks != 1)
-		goto kni_fail;
-
-	dev_info.mbuf_va = STAILQ_FIRST(&mp->mem_list)->addr;
-	dev_info.mbuf_phys = STAILQ_FIRST(&mp->mem_list)->phys_addr;
 	ctx->pktmbuf_pool = pktmbuf_pool;
 	ctx->group_id = conf->group_id;
 	ctx->slot_id = slot->id;
@@ -462,6 +451,20 @@ kni_free_fifo(struct rte_kni_fifo *fifo)
 	} while (ret);
 }
 
+static void
+kni_free_fifo_phy(struct rte_kni_fifo *fifo)
+{
+	void *mbuf_phys;
+	int ret;
+
+	do {
+		ret = kni_fifo_get(fifo, &mbuf_phys, 1);
+		/*
+		 * TODO: free mbufs
+		 */
+	} while (ret);
+}
+
 int
 rte_kni_release(struct rte_kni *kni)
 {
@@ -479,8 +482,8 @@ rte_kni_release(struct rte_kni *kni)
 
 	/* mbufs in all fifo should be released, except request/response */
 	kni_free_fifo(kni->tx_q);
-	kni_free_fifo(kni->rx_q);
-	kni_free_fifo(kni->alloc_q);
+	kni_free_fifo_phy(kni->rx_q);
+	kni_free_fifo_phy(kni->alloc_q);
 	kni_free_fifo(kni->free_q);
 
 	slot_id = kni->slot_id;
@@ -490,8 +493,9 @@ rte_kni_release(struct rte_kni *kni)
 
 	/* Release memzone */
 	if (slot_id > kni_memzone_pool.max_ifaces) {
-		rte_panic("KNI pool: corrupted slot ID: %d, max: %d\n",
+		RTE_LOG(ERR, KNI, "KNI pool: corrupted slot ID: %d, max: %d\n",
 			slot_id, kni_memzone_pool.max_ifaces);
+		return -1;
 	}
 	kni_memzone_pool_release(&kni_memzone_pool.slots[slot_id]);
 
@@ -513,7 +517,8 @@ rte_kni_handle_request(struct rte_kni *kni)
 		return 0; /* It is OK of can not getting the request mbuf */
 
 	if (req != kni->sync_addr) {
-		rte_panic("Wrong req pointer %p\n", req);
+		RTE_LOG(ERR, KNI, "Wrong req pointer %p\n", req);
+		return -1;
 	}
 
 	/* Analyze the request and call the relevant actions for it */
@@ -544,10 +549,25 @@ rte_kni_handle_request(struct rte_kni *kni)
 	return 0;
 }
 
+static void *
+va2pa(struct rte_mbuf *m)
+{
+	return (void *)((unsigned long)m -
+			((unsigned long)m->buf_addr -
+			 (unsigned long)m->buf_physaddr));
+}
+
 unsigned
 rte_kni_tx_burst(struct rte_kni *kni, struct rte_mbuf **mbufs, unsigned num)
 {
-	unsigned ret = kni_fifo_put(kni->rx_q, (void **)mbufs, num);
+	void *phy_mbufs[num];
+	unsigned int ret;
+	unsigned int i;
+
+	for (i = 0; i < num; i++)
+		phy_mbufs[i] = va2pa(mbufs[i]);
+
+	ret = kni_fifo_put(kni->rx_q, phy_mbufs, num);
 
 	/* Get mbufs from free_q and then free them */
 	kni_free_mbufs(kni);
@@ -585,6 +605,7 @@ kni_allocate_mbufs(struct rte_kni *kni)
 {
 	int i, ret;
 	struct rte_mbuf *pkts[MAX_MBUF_BURST_NUM];
+	void *phys[MAX_MBUF_BURST_NUM];
 
 	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pool) !=
 			 offsetof(struct rte_kni_mbuf, pool));
@@ -614,13 +635,14 @@ kni_allocate_mbufs(struct rte_kni *kni)
 			RTE_LOG(ERR, KNI, "Out of memory\n");
 			break;
 		}
+		phys[i] = va2pa(pkts[i]);
 	}
 
 	/* No pkt mbuf alocated */
 	if (i <= 0)
 		return;
 
-	ret = kni_fifo_put(kni->alloc_q, (void **)pkts, i);
+	ret = kni_fifo_put(kni->alloc_q, phys, i);
 
 	/* Check if any mbufs not put into alloc_q, and then free them */
 	if (ret >= 0 && ret < i && ret < MAX_MBUF_BURST_NUM) {
diff --git a/lib/librte_kni/rte_kni.h b/lib/librte_kni/rte_kni.h
index 7363e6cf..37deb472 100644
--- a/lib/librte_kni/rte_kni.h
+++ b/lib/librte_kni/rte_kni.h
@@ -42,7 +42,7 @@
  * interfaces that may be used by the RTE application to receive/transmit
  * packets from/to Linux kernel net interfaces.
  *
- * This library provide two APIs to burst receive packets from KNI interfaces,
+ * This library provides two APIs to burst receive packets from KNI interfaces,
  * and burst transmit packets to KNI interfaces.
  */
 
@@ -88,6 +88,7 @@ struct rte_kni_conf {
 	struct rte_pci_addr addr;
 	struct rte_pci_id id;
 
+	__extension__
 	uint8_t force_bind : 1; /* Flag to bind kernel thread */
 };
 
diff --git a/lib/librte_lpm/Makefile b/lib/librte_lpm/Makefile
index 656ade27..3dc549dc 100644
--- a/lib/librte_lpm/Makefile
+++ b/lib/librte_lpm/Makefile
@@ -51,6 +51,8 @@ ifneq ($(filter y,$(CONFIG_RTE_ARCH_ARM) $(CONFIG_RTE_ARCH_ARM64)),)
 SYMLINK-$(CONFIG_RTE_LIBRTE_LPM)-include += rte_lpm_neon.h
 else ifeq ($(CONFIG_RTE_ARCH_X86),y)
 SYMLINK-$(CONFIG_RTE_LIBRTE_LPM)-include += rte_lpm_sse.h
+else ifeq ($(CONFIG_RTE_ARCH_PPC_64),y)
+SYMLINK-$(CONFIG_RTE_LIBRTE_LPM)-include += rte_lpm_altivec.h
 endif
 
 # this lib needs eal
diff --git a/lib/librte_lpm/rte_lpm.c b/lib/librte_lpm/rte_lpm.c
index e1b5d94a..8c15c4c9 100644
--- a/lib/librte_lpm/rte_lpm.c
+++ b/lib/librte_lpm/rte_lpm.c
@@ -942,14 +942,9 @@ add_depth_big_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked, uint8_t depth,
 
 		/* Insert new rule into the tbl8 entry. */
 		for (i = tbl8_index; i < tbl8_index + tbl8_range; i++) {
-			if (!lpm->tbl8[i].valid ||
-					lpm->tbl8[i].depth <= depth) {
-				lpm->tbl8[i].valid = VALID;
-				lpm->tbl8[i].depth = depth;
-				lpm->tbl8[i].next_hop = next_hop;
-
-				continue;
-			}
+			lpm->tbl8[i].valid = VALID;
+			lpm->tbl8[i].depth = depth;
+			lpm->tbl8[i].next_hop = next_hop;
 		}
 
 		/*
@@ -1073,14 +1068,9 @@ add_depth_big_v1604(struct rte_lpm *lpm, uint32_t ip_masked, uint8_t depth,
 
 		/* Insert new rule into the tbl8 entry. */
 		for (i = tbl8_index; i < tbl8_index + tbl8_range; i++) {
-			if (!lpm->tbl8[i].valid ||
-					lpm->tbl8[i].depth <= depth) {
-				lpm->tbl8[i].valid = VALID;
-				lpm->tbl8[i].depth = depth;
-				lpm->tbl8[i].next_hop = next_hop;
-
-				continue;
-			}
+			lpm->tbl8[i].valid = VALID;
+			lpm->tbl8[i].depth = depth;
+			lpm->tbl8[i].next_hop = next_hop;
 		}
 
 		/*
diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h
index 2df1d672..682865e4 100644
--- a/lib/librte_lpm/rte_lpm.h
+++ b/lib/librte_lpm/rte_lpm.h
@@ -93,12 +93,14 @@ extern "C" {
 
 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
 /** @internal Tbl24 entry structure. */
+__extension__
 struct rte_lpm_tbl_entry_v20 {
 	/**
 	 * Stores Next hop (tbl8 or tbl24 when valid_group is not set) or
 	 * a group index pointing to a tbl8 structure (tbl24 only, when
 	 * valid_group is set)
 	 */
+	RTE_STD_C11
 	union {
 		uint8_t next_hop;
 		uint8_t group_idx;
@@ -116,6 +118,7 @@ struct rte_lpm_tbl_entry_v20 {
 	uint8_t depth       :6; /**< Rule depth. */
 };
 
+__extension__
 struct rte_lpm_tbl_entry {
 	/**
 	 * Stores Next hop (tbl8 or tbl24 when valid_group is not set) or
@@ -137,6 +140,7 @@ struct rte_lpm_tbl_entry {
 };
 
 #else
+__extension__
 struct rte_lpm_tbl_entry_v20 {
 	uint8_t depth       :6;
 	uint8_t valid_group :1;
@@ -147,6 +151,7 @@ struct rte_lpm_tbl_entry_v20 {
 	};
 };
 
+__extension__
 struct rte_lpm_tbl_entry {
 	uint32_t depth       :6;
 	uint32_t valid_group :1;
@@ -193,7 +198,7 @@ struct rte_lpm_v20 {
 			__rte_cache_aligned; /**< LPM tbl24 table. */
 	struct rte_lpm_tbl_entry_v20 tbl8[RTE_LPM_TBL8_NUM_ENTRIES]
 			__rte_cache_aligned; /**< LPM tbl8 table. */
-	struct rte_lpm_rule_v20 rules_tbl[0] \
+	struct rte_lpm_rule_v20 rules_tbl[]
 			__rte_cache_aligned; /**< LPM rules. */
 };
 
@@ -480,6 +485,8 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
 
 #if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
 #include "rte_lpm_neon.h"
+#elif defined(RTE_ARCH_PPC_64)
+#include "rte_lpm_altivec.h"
 #else
 #include "rte_lpm_sse.h"
 #endif
diff --git a/lib/librte_lpm/rte_lpm_altivec.h b/lib/librte_lpm/rte_lpm_altivec.h
new file mode 100644
index 00000000..e26e0875
--- /dev/null
+++ b/lib/librte_lpm/rte_lpm_altivec.h
@@ -0,0 +1,154 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) IBM Corporation 2016.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of IBM Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_LPM_ALTIVEC_H_
+#define _RTE_LPM_ALTIVEC_H_
+
+#include <rte_branch_prediction.h>
+#include <rte_byteorder.h>
+#include <rte_common.h>
+#include <rte_vect.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static inline void
+rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
+	uint32_t defv)
+{
+	vector signed int i24;
+	rte_xmm_t i8;
+	uint32_t tbl[4];
+	uint64_t idx, pt, pt2;
+	const uint32_t *ptbl;
+
+	const uint32_t mask = UINT8_MAX;
+	const vector signed int mask8 = (xmm_t){mask, mask, mask, mask};
+
+	/*
+	 * RTE_LPM_VALID_EXT_ENTRY_BITMASK for 2 LPM entries
+	 * as one 64-bit value (0x0300000003000000).
+	 */
+	const uint64_t mask_xv =
+		((uint64_t)RTE_LPM_VALID_EXT_ENTRY_BITMASK |
+		(uint64_t)RTE_LPM_VALID_EXT_ENTRY_BITMASK << 32);
+
+	/*
+	 * RTE_LPM_LOOKUP_SUCCESS for 2 LPM entries
+	 * as one 64-bit value (0x0100000001000000).
+	 */
+	const uint64_t mask_v =
+		((uint64_t)RTE_LPM_LOOKUP_SUCCESS |
+		(uint64_t)RTE_LPM_LOOKUP_SUCCESS << 32);
+
+	/* get 4 indexes for tbl24[]. */
+	i24 = vec_sr((xmm_t) ip,
+		(vector unsigned int){CHAR_BIT, CHAR_BIT, CHAR_BIT, CHAR_BIT});
+
+	/* extract values from tbl24[] */
+	idx = (uint32_t)i24[0];
+	idx = idx < (1<<24) ? idx : (1<<24)-1;
+	ptbl = (const uint32_t *)&lpm->tbl24[idx];
+	tbl[0] = *ptbl;
+
+	idx = (uint32_t) i24[1];
+	idx = idx < (1<<24) ? idx : (1<<24)-1;
+	ptbl = (const uint32_t *)&lpm->tbl24[idx];
+	tbl[1] = *ptbl;
+
+	idx = (uint32_t) i24[2];
+	idx = idx < (1<<24) ? idx : (1<<24)-1;
+	ptbl = (const uint32_t *)&lpm->tbl24[idx];
+	tbl[2] = *ptbl;
+
+	idx = (uint32_t) i24[3];
+	idx = idx < (1<<24) ? idx : (1<<24)-1;
+	ptbl = (const uint32_t *)&lpm->tbl24[idx];
+	tbl[3] = *ptbl;
+
+	/* get 4 indexes for tbl8[]. */
+	i8.x = vec_and(ip, mask8);
+
+	pt = (uint64_t)tbl[0] |
+		(uint64_t)tbl[1] << 32;
+	pt2 = (uint64_t)tbl[2] |
+		(uint64_t)tbl[3] << 32;
+
+	/* search successfully finished for all 4 IP addresses. */
+	if (likely((pt & mask_xv) == mask_v) &&
+			likely((pt2 & mask_xv) == mask_v)) {
+		*(uint64_t *)hop = pt & RTE_LPM_MASKX4_RES;
+		*(uint64_t *)(hop + 2) = pt2 & RTE_LPM_MASKX4_RES;
+		return;
+	}
+
+	if (unlikely((pt & RTE_LPM_VALID_EXT_ENTRY_BITMASK) ==
+			RTE_LPM_VALID_EXT_ENTRY_BITMASK)) {
+		i8.u32[0] = i8.u32[0] +
+			(uint8_t)tbl[0] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
+		ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[0]];
+		tbl[0] = *ptbl;
+	}
+	if (unlikely((pt >> 32 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) ==
+			RTE_LPM_VALID_EXT_ENTRY_BITMASK)) {
+		i8.u32[1] = i8.u32[1] +
+			(uint8_t)tbl[1] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
+		ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[1]];
+		tbl[1] = *ptbl;
+	}
+	if (unlikely((pt2 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) ==
+			RTE_LPM_VALID_EXT_ENTRY_BITMASK)) {
+		i8.u32[2] = i8.u32[2] +
+			(uint8_t)tbl[2] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
+		ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[2]];
+		tbl[2] = *ptbl;
+	}
+	if (unlikely((pt2 >> 32 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) ==
+			RTE_LPM_VALID_EXT_ENTRY_BITMASK)) {
+		i8.u32[3] = i8.u32[3] +
+			(uint8_t)tbl[3] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
+		ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[3]];
+		tbl[3] = *ptbl;
+	}
+
+	hop[0] = (tbl[0] & RTE_LPM_LOOKUP_SUCCESS) ? tbl[0] & 0x00FFFFFF : defv;
+	hop[1] = (tbl[1] & RTE_LPM_LOOKUP_SUCCESS) ? tbl[1] & 0x00FFFFFF : defv;
+	hop[2] = (tbl[2] & RTE_LPM_LOOKUP_SUCCESS) ? tbl[2] & 0x00FFFFFF : defv;
+	hop[3] = (tbl[3] & RTE_LPM_LOOKUP_SUCCESS) ? tbl[3] & 0x00FFFFFF : defv;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_LPM_ALTIVEC_H_ */
diff --git a/lib/librte_lpm/rte_lpm_neon.h b/lib/librte_lpm/rte_lpm_neon.h
index 7c643159..7efd9a0d 100644
--- a/lib/librte_lpm/rte_lpm_neon.h
+++ b/lib/librte_lpm/rte_lpm_neon.h
@@ -43,6 +43,7 @@
 #include <rte_byteorder.h>
 #include <rte_common.h>
 #include <rte_vect.h>
+#include <rte_lpm.h>
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/lib/librte_lpm/rte_lpm_sse.h b/lib/librte_lpm/rte_lpm_sse.h
index da830995..ef33c6a1 100644
--- a/lib/librte_lpm/rte_lpm_sse.h
+++ b/lib/librte_lpm/rte_lpm_sse.h
@@ -38,6 +38,7 @@
 #include <rte_byteorder.h>
 #include <rte_common.h>
 #include <rte_vect.h>
+#include <rte_lpm.h>
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/lib/librte_mbuf/Makefile b/lib/librte_mbuf/Makefile
index 8d62b0d3..4ae2e8c8 100644
--- a/lib/librte_mbuf/Makefile
+++ b/lib/librte_mbuf/Makefile
@@ -41,10 +41,10 @@ EXPORT_MAP := rte_mbuf_version.map
 LIBABIVER := 2
 
 # all source are stored in SRCS-y
-SRCS-$(CONFIG_RTE_LIBRTE_MBUF) := rte_mbuf.c
+SRCS-$(CONFIG_RTE_LIBRTE_MBUF) := rte_mbuf.c rte_mbuf_ptype.c
 
 # install includes
-SYMLINK-$(CONFIG_RTE_LIBRTE_MBUF)-include := rte_mbuf.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_MBUF)-include := rte_mbuf.h rte_mbuf_ptype.h
 
 # this lib needs eal
 DEPDIRS-$(CONFIG_RTE_LIBRTE_MBUF) += lib/librte_eal lib/librte_mempool
diff --git a/lib/librte_mbuf/rte_mbuf.c b/lib/librte_mbuf/rte_mbuf.c
index d2c87526..63f43c89 100644
--- a/lib/librte_mbuf/rte_mbuf.c
+++ b/lib/librte_mbuf/rte_mbuf.c
@@ -53,12 +53,12 @@
 #include <rte_lcore.h>
 #include <rte_atomic.h>
 #include <rte_branch_prediction.h>
-#include <rte_ring.h>
 #include <rte_mempool.h>
 #include <rte_mbuf.h>
 #include <rte_string_fns.h>
 #include <rte_hexdump.h>
 #include <rte_errno.h>
+#include <rte_memcpy.h>
 
 /*
  * ctrlmbuf constructor, given as a callback function to
@@ -264,6 +264,40 @@ rte_pktmbuf_dump(FILE *f, const struct rte_mbuf *m, unsigned dump_len)
 	}
 }
 
+/* read len data bytes in a mbuf at specified offset (internal) */
+const void *__rte_pktmbuf_read(const struct rte_mbuf *m, uint32_t off,
+	uint32_t len, void *buf)
+{
+	const struct rte_mbuf *seg = m;
+	uint32_t buf_off = 0, copy_len;
+
+	if (off + len > rte_pktmbuf_pkt_len(m))
+		return NULL;
+
+	while (off >= rte_pktmbuf_data_len(seg)) {
+		off -= rte_pktmbuf_data_len(seg);
+		seg = seg->next;
+	}
+
+	if (off + len <= rte_pktmbuf_data_len(seg))
+		return rte_pktmbuf_mtod_offset(seg, char *, off);
+
+	/* rare case: header is split among several segments */
+	while (len > 0) {
+		copy_len = rte_pktmbuf_data_len(seg) - off;
+		if (copy_len > len)
+			copy_len = len;
+		rte_memcpy((char *)buf + buf_off,
+			rte_pktmbuf_mtod_offset(seg, char *, off), copy_len);
+		off = 0;
+		buf_off += copy_len;
+		len -= copy_len;
+		seg = seg->next;
+	}
+
+	return buf;
+}
+
 /*
  * Get the name of a RX offload flag. Must be kept synchronized with flag
  * definitions in rte_mbuf.h.
@@ -275,16 +309,78 @@ const char *rte_get_rx_ol_flag_name(uint64_t mask)
 	case PKT_RX_RSS_HASH: return "PKT_RX_RSS_HASH";
 	case PKT_RX_FDIR: return "PKT_RX_FDIR";
 	case PKT_RX_L4_CKSUM_BAD: return "PKT_RX_L4_CKSUM_BAD";
+	case PKT_RX_L4_CKSUM_GOOD: return "PKT_RX_L4_CKSUM_GOOD";
+	case PKT_RX_L4_CKSUM_NONE: return "PKT_RX_L4_CKSUM_NONE";
 	case PKT_RX_IP_CKSUM_BAD: return "PKT_RX_IP_CKSUM_BAD";
+	case PKT_RX_IP_CKSUM_GOOD: return "PKT_RX_IP_CKSUM_GOOD";
+	case PKT_RX_IP_CKSUM_NONE: return "PKT_RX_IP_CKSUM_NONE";
 	case PKT_RX_EIP_CKSUM_BAD: return "PKT_RX_EIP_CKSUM_BAD";
 	case PKT_RX_VLAN_STRIPPED: return "PKT_RX_VLAN_STRIPPED";
 	case PKT_RX_IEEE1588_PTP: return "PKT_RX_IEEE1588_PTP";
 	case PKT_RX_IEEE1588_TMST: return "PKT_RX_IEEE1588_TMST";
 	case PKT_RX_QINQ_STRIPPED: return "PKT_RX_QINQ_STRIPPED";
+	case PKT_RX_LRO: return "PKT_RX_LRO";
 	default: return NULL;
 	}
 }
 
+struct flag_mask {
+	uint64_t flag;
+	uint64_t mask;
+	const char *default_name;
+};
+
+/* write the list of rx ol flags in buffer buf */
+int
+rte_get_rx_ol_flag_list(uint64_t mask, char *buf, size_t buflen)
+{
+	const struct flag_mask rx_flags[] = {
+		{ PKT_RX_VLAN_PKT, PKT_RX_VLAN_PKT, NULL },
+		{ PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, NULL },
+		{ PKT_RX_FDIR, PKT_RX_FDIR, NULL },
+		{ PKT_RX_L4_CKSUM_BAD, PKT_RX_L4_CKSUM_MASK, NULL },
+		{ PKT_RX_L4_CKSUM_GOOD, PKT_RX_L4_CKSUM_MASK, NULL },
+		{ PKT_RX_L4_CKSUM_NONE, PKT_RX_L4_CKSUM_MASK, NULL },
+		{ PKT_RX_L4_CKSUM_UNKNOWN, PKT_RX_L4_CKSUM_MASK,
+		  "PKT_RX_L4_CKSUM_UNKNOWN" },
+		{ PKT_RX_IP_CKSUM_BAD, PKT_RX_IP_CKSUM_MASK, NULL },
+		{ PKT_RX_IP_CKSUM_GOOD, PKT_RX_IP_CKSUM_MASK, NULL },
+		{ PKT_RX_IP_CKSUM_NONE, PKT_RX_IP_CKSUM_MASK, NULL },
+		{ PKT_RX_IP_CKSUM_UNKNOWN, PKT_RX_IP_CKSUM_MASK,
+		  "PKT_RX_IP_CKSUM_UNKNOWN" },
+		{ PKT_RX_EIP_CKSUM_BAD, PKT_RX_EIP_CKSUM_BAD, NULL },
+		{ PKT_RX_VLAN_STRIPPED, PKT_RX_VLAN_STRIPPED, NULL },
+		{ PKT_RX_IEEE1588_PTP, PKT_RX_IEEE1588_PTP, NULL },
+		{ PKT_RX_IEEE1588_TMST, PKT_RX_IEEE1588_TMST, NULL },
+		{ PKT_RX_QINQ_STRIPPED, PKT_RX_QINQ_STRIPPED, NULL },
+		{ PKT_RX_LRO, PKT_RX_LRO, NULL },
+	};
+	const char *name;
+	unsigned int i;
+	int ret;
+
+	if (buflen == 0)
+		return -1;
+
+	buf[0] = '\0';
+	for (i = 0; i < RTE_DIM(rx_flags); i++) {
+		if ((mask & rx_flags[i].mask) != rx_flags[i].flag)
+			continue;
+		name = rte_get_rx_ol_flag_name(rx_flags[i].flag);
+		if (name == NULL)
+			name = rx_flags[i].default_name;
+		ret = snprintf(buf, buflen, "%s ", name);
+		if (ret < 0)
+			return -1;
+		if ((size_t)ret >= buflen)
+			return -1;
+		buf += ret;
+		buflen -= ret;
+	}
+
+	return 0;
+}
+
 /*
  * Get the name of a TX offload flag. Must be kept synchronized with flag
  * definitions in rte_mbuf.h.
@@ -304,6 +400,63 @@ const char *rte_get_tx_ol_flag_name(uint64_t mask)
 	case PKT_TX_OUTER_IP_CKSUM: return "PKT_TX_OUTER_IP_CKSUM";
 	case PKT_TX_OUTER_IPV4: return "PKT_TX_OUTER_IPV4";
 	case PKT_TX_OUTER_IPV6: return "PKT_TX_OUTER_IPV6";
+	case PKT_TX_TUNNEL_VXLAN: return "PKT_TX_TUNNEL_VXLAN";
+	case PKT_TX_TUNNEL_GRE: return "PKT_TX_TUNNEL_GRE";
+	case PKT_TX_TUNNEL_IPIP: return "PKT_TX_TUNNEL_IPIP";
+	case PKT_TX_TUNNEL_GENEVE: return "PKT_TX_TUNNEL_GENEVE";
 	default: return NULL;
 	}
 }
+
+/* write the list of tx ol flags in buffer buf */
+int
+rte_get_tx_ol_flag_list(uint64_t mask, char *buf, size_t buflen)
+{
+	const struct flag_mask tx_flags[] = {
+		{ PKT_TX_VLAN_PKT, PKT_TX_VLAN_PKT, NULL },
+		{ PKT_TX_IP_CKSUM, PKT_TX_IP_CKSUM, NULL },
+		{ PKT_TX_TCP_CKSUM, PKT_TX_L4_MASK, NULL },
+		{ PKT_TX_SCTP_CKSUM, PKT_TX_L4_MASK, NULL },
+		{ PKT_TX_UDP_CKSUM, PKT_TX_L4_MASK, NULL },
+		{ PKT_TX_L4_NO_CKSUM, PKT_TX_L4_MASK, "PKT_TX_L4_NO_CKSUM" },
+		{ PKT_TX_IEEE1588_TMST, PKT_TX_IEEE1588_TMST, NULL },
+		{ PKT_TX_TCP_SEG, PKT_TX_TCP_SEG, NULL },
+		{ PKT_TX_IPV4, PKT_TX_IPV4, NULL },
+		{ PKT_TX_IPV6, PKT_TX_IPV6, NULL },
+		{ PKT_TX_OUTER_IP_CKSUM, PKT_TX_OUTER_IP_CKSUM, NULL },
+		{ PKT_TX_OUTER_IPV4, PKT_TX_OUTER_IPV4, NULL },
+		{ PKT_TX_OUTER_IPV6, PKT_TX_OUTER_IPV6, NULL },
+		{ PKT_TX_TUNNEL_VXLAN, PKT_TX_TUNNEL_MASK,
+		  "PKT_TX_TUNNEL_NONE" },
+		{ PKT_TX_TUNNEL_GRE, PKT_TX_TUNNEL_MASK,
+		  "PKT_TX_TUNNEL_NONE" },
+		{ PKT_TX_TUNNEL_IPIP, PKT_TX_TUNNEL_MASK,
+		  "PKT_TX_TUNNEL_NONE" },
+		{ PKT_TX_TUNNEL_GENEVE, PKT_TX_TUNNEL_MASK,
+		  "PKT_TX_TUNNEL_NONE" },
+	};
+	const char *name;
+	unsigned int i;
+	int ret;
+
+	if (buflen == 0)
+		return -1;
+
+	buf[0] = '\0';
+	for (i = 0; i < RTE_DIM(tx_flags); i++) {
+		if ((mask & tx_flags[i].mask) != tx_flags[i].flag)
+			continue;
+		name = rte_get_tx_ol_flag_name(tx_flags[i].flag);
+		if (name == NULL)
+			name = tx_flags[i].default_name;
+		ret = snprintf(buf, buflen, "%s ", name);
+		if (ret < 0)
+			return -1;
+		if ((size_t)ret >= buflen)
+			return -1;
+		buf += ret;
+		buflen -= ret;
+	}
+
+	return 0;
+}
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index 101485fb..ead7c6ea 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -44,7 +44,7 @@
  * buffers. The message buffers are stored in a mempool, using the
  * RTE mempool library.
  *
- * This library provide an API to allocate/free packet mbufs, which are
+ * This library provides an API to allocate/free packet mbufs, which are
  * used to carry network packets.
  *
  * To understand the concepts of packet buffers or mbufs, you
@@ -60,6 +60,7 @@
 #include <rte_atomic.h>
 #include <rte_prefetch.h>
 #include <rte_branch_prediction.h>
+#include <rte_mbuf_ptype.h>
 
 #ifdef __cplusplus
 extern "C" {
@@ -90,8 +91,25 @@ extern "C" {
 
 #define PKT_RX_RSS_HASH      (1ULL << 1)  /**< RX packet with RSS hash result. */
 #define PKT_RX_FDIR          (1ULL << 2)  /**< RX packet with FDIR match indicate. */
-#define PKT_RX_L4_CKSUM_BAD  (1ULL << 3)  /**< L4 cksum of RX pkt. is not OK. */
-#define PKT_RX_IP_CKSUM_BAD  (1ULL << 4)  /**< IP cksum of RX pkt. is not OK. */
+
+/**
+ * Deprecated.
+ * Checking this flag alone is deprecated: check the 2 bits of
+ * PKT_RX_L4_CKSUM_MASK.
+ * This flag was set when the L4 checksum of a packet was detected as
+ * wrong by the hardware.
+ */
+#define PKT_RX_L4_CKSUM_BAD  (1ULL << 3)
+
+/**
+ * Deprecated.
+ * Checking this flag alone is deprecated: check the 2 bits of
+ * PKT_RX_IP_CKSUM_MASK.
+ * This flag was set when the IP checksum of a packet was detected as
+ * wrong by the hardware.
+ */
+#define PKT_RX_IP_CKSUM_BAD  (1ULL << 4)
+
 #define PKT_RX_EIP_CKSUM_BAD (1ULL << 5)  /**< External IP header checksum error. */
 
 /**
@@ -101,7 +119,35 @@ extern "C" {
  */
 #define PKT_RX_VLAN_STRIPPED (1ULL << 6)
 
-/* hole, some bits can be reused here  */
+/**
+ * Mask of bits used to determine the status of RX IP checksum.
+ * - PKT_RX_IP_CKSUM_UNKNOWN: no information about the RX IP checksum
+ * - PKT_RX_IP_CKSUM_BAD: the IP checksum in the packet is wrong
+ * - PKT_RX_IP_CKSUM_GOOD: the IP checksum in the packet is valid
+ * - PKT_RX_IP_CKSUM_NONE: the IP checksum is not correct in the packet
+ *   data, but the integrity of the IP header is verified.
+ */
+#define PKT_RX_IP_CKSUM_MASK ((1ULL << 4) | (1ULL << 7))
+
+#define PKT_RX_IP_CKSUM_UNKNOWN 0
+#define PKT_RX_IP_CKSUM_BAD     (1ULL << 4)
+#define PKT_RX_IP_CKSUM_GOOD    (1ULL << 7)
+#define PKT_RX_IP_CKSUM_NONE    ((1ULL << 4) | (1ULL << 7))
+
+/**
+ * Mask of bits used to determine the status of RX L4 checksum.
+ * - PKT_RX_L4_CKSUM_UNKNOWN: no information about the RX L4 checksum
+ * - PKT_RX_L4_CKSUM_BAD: the L4 checksum in the packet is wrong
+ * - PKT_RX_L4_CKSUM_GOOD: the L4 checksum in the packet is valid
+ * - PKT_RX_L4_CKSUM_NONE: the L4 checksum is not correct in the packet
+ *   data, but the integrity of the L4 data is verified.
+ */
+#define PKT_RX_L4_CKSUM_MASK ((1ULL << 3) | (1ULL << 8))
+
+#define PKT_RX_L4_CKSUM_UNKNOWN 0
+#define PKT_RX_L4_CKSUM_BAD     (1ULL << 3)
+#define PKT_RX_L4_CKSUM_GOOD    (1ULL << 8)
+#define PKT_RX_L4_CKSUM_NONE    ((1ULL << 3) | (1ULL << 8))
 
 #define PKT_RX_IEEE1588_PTP  (1ULL << 9)  /**< RX IEEE1588 L2 Ethernet PT Packet. */
 #define PKT_RX_IEEE1588_TMST (1ULL << 10) /**< RX IEEE1588 L2/L4 timestamped packet.*/
@@ -124,11 +170,30 @@ extern "C" {
  */
 #define PKT_RX_QINQ_PKT      PKT_RX_QINQ_STRIPPED
 
+/**
+ * When packets are coalesced by a hardware or virtual driver, this flag
+ * can be set in the RX mbuf, meaning that the m->tso_segsz field is
+ * valid and is set to the segment size of original packets.
+ */
+#define PKT_RX_LRO           (1ULL << 16)
+
 /* add new RX flags here */
 
 /* add new TX flags here */
 
 /**
+ * Bits 45:48 used for the tunnel type.
+ * When doing Tx offload like TSO or checksum, the HW needs to configure the
+ * tunnel type into the HW descriptors.
+ */
+#define PKT_TX_TUNNEL_VXLAN   (0x1ULL << 45)
+#define PKT_TX_TUNNEL_GRE     (0x2ULL << 45)
+#define PKT_TX_TUNNEL_IPIP    (0x3ULL << 45)
+#define PKT_TX_TUNNEL_GENEVE  (0x4ULL << 45)
+/* add new TX TUNNEL type here */
+#define PKT_TX_TUNNEL_MASK    (0xFULL << 45)
+
+/**
  * Second VLAN insertion (QinQ) flag.
  */
 #define PKT_TX_QINQ_PKT    (1ULL << 49)   /**< TX packet with double VLAN inserted. */
@@ -225,500 +290,6 @@ extern "C" {
 /* Use final bit of flags to indicate a control mbuf */
 #define CTRL_MBUF_FLAG       (1ULL << 63) /**< Mbuf contains control data */
 
-/*
- * 32 bits are divided into several fields to mark packet types. Note that
- * each field is indexical.
- * - Bit 3:0 is for L2 types.
- * - Bit 7:4 is for L3 or outer L3 (for tunneling case) types.
- * - Bit 11:8 is for L4 or outer L4 (for tunneling case) types.
- * - Bit 15:12 is for tunnel types.
- * - Bit 19:16 is for inner L2 types.
- * - Bit 23:20 is for inner L3 types.
- * - Bit 27:24 is for inner L4 types.
- * - Bit 31:28 is reserved.
- *
- * To be compatible with Vector PMD, RTE_PTYPE_L3_IPV4, RTE_PTYPE_L3_IPV4_EXT,
- * RTE_PTYPE_L3_IPV6, RTE_PTYPE_L3_IPV6_EXT, RTE_PTYPE_L4_TCP, RTE_PTYPE_L4_UDP
- * and RTE_PTYPE_L4_SCTP should be kept as below in a contiguous 7 bits.
- *
- * Note that L3 types values are selected for checking IPV4/IPV6 header from
- * performance point of view. Reading annotations of RTE_ETH_IS_IPV4_HDR and
- * RTE_ETH_IS_IPV6_HDR is needed for any future changes of L3 type values.
- *
- * Note that the packet types of the same packet recognized by different
- * hardware may be different, as different hardware may have different
- * capability of packet type recognition.
- *
- * examples:
- * <'ether type'=0x0800
- * | 'version'=4, 'protocol'=0x29
- * | 'version'=6, 'next header'=0x3A
- * | 'ICMPv6 header'>
- * will be recognized on i40e hardware as packet type combination of,
- * RTE_PTYPE_L2_ETHER |
- * RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
- * RTE_PTYPE_TUNNEL_IP |
- * RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
- * RTE_PTYPE_INNER_L4_ICMP.
- *
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=0x2F
- * | 'GRE header'
- * | 'version'=6, 'next header'=0x11
- * | 'UDP header'>
- * will be recognized on i40e hardware as packet type combination of,
- * RTE_PTYPE_L2_ETHER |
- * RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
- * RTE_PTYPE_TUNNEL_GRENAT |
- * RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
- * RTE_PTYPE_INNER_L4_UDP.
- */
-#define RTE_PTYPE_UNKNOWN                   0x00000000
-/**
- * Ethernet packet type.
- * It is used for outer packet for tunneling cases.
- *
- * Packet format:
- * <'ether type'=[0x0800|0x86DD]>
- */
-#define RTE_PTYPE_L2_ETHER                  0x00000001
-/**
- * Ethernet packet type for time sync.
- *
- * Packet format:
- * <'ether type'=0x88F7>
- */
-#define RTE_PTYPE_L2_ETHER_TIMESYNC         0x00000002
-/**
- * ARP (Address Resolution Protocol) packet type.
- *
- * Packet format:
- * <'ether type'=0x0806>
- */
-#define RTE_PTYPE_L2_ETHER_ARP              0x00000003
-/**
- * LLDP (Link Layer Discovery Protocol) packet type.
- *
- * Packet format:
- * <'ether type'=0x88CC>
- */
-#define RTE_PTYPE_L2_ETHER_LLDP             0x00000004
-/**
- * NSH (Network Service Header) packet type.
- *
- * Packet format:
- * <'ether type'=0x894F>
- */
-#define RTE_PTYPE_L2_ETHER_NSH              0x00000005
-/**
- * Mask of layer 2 packet types.
- * It is used for outer packet for tunneling cases.
- */
-#define RTE_PTYPE_L2_MASK                   0x0000000f
-/**
- * IP (Internet Protocol) version 4 packet type.
- * It is used for outer packet for tunneling cases, and does not contain any
- * header option.
- *
- * Packet format:
- * <'ether type'=0x0800
- * | 'version'=4, 'ihl'=5>
- */
-#define RTE_PTYPE_L3_IPV4                   0x00000010
-/**
- * IP (Internet Protocol) version 4 packet type.
- * It is used for outer packet for tunneling cases, and contains header
- * options.
- *
- * Packet format:
- * <'ether type'=0x0800
- * | 'version'=4, 'ihl'=[6-15], 'options'>
- */
-#define RTE_PTYPE_L3_IPV4_EXT               0x00000030
-/**
- * IP (Internet Protocol) version 6 packet type.
- * It is used for outer packet for tunneling cases, and does not contain any
- * extension header.
- *
- * Packet format:
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=0x3B>
- */
-#define RTE_PTYPE_L3_IPV6                   0x00000040
-/**
- * IP (Internet Protocol) version 4 packet type.
- * It is used for outer packet for tunneling cases, and may or maynot contain
- * header options.
- *
- * Packet format:
- * <'ether type'=0x0800
- * | 'version'=4, 'ihl'=[5-15], <'options'>>
- */
-#define RTE_PTYPE_L3_IPV4_EXT_UNKNOWN       0x00000090
-/**
- * IP (Internet Protocol) version 6 packet type.
- * It is used for outer packet for tunneling cases, and contains extension
- * headers.
- *
- * Packet format:
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=[0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87],
- *   'extension headers'>
- */
-#define RTE_PTYPE_L3_IPV6_EXT               0x000000c0
-/**
- * IP (Internet Protocol) version 6 packet type.
- * It is used for outer packet for tunneling cases, and may or maynot contain
- * extension headers.
- *
- * Packet format:
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=[0x3B|0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87],
- *   <'extension headers'>>
- */
-#define RTE_PTYPE_L3_IPV6_EXT_UNKNOWN       0x000000e0
-/**
- * Mask of layer 3 packet types.
- * It is used for outer packet for tunneling cases.
- */
-#define RTE_PTYPE_L3_MASK                   0x000000f0
-/**
- * TCP (Transmission Control Protocol) packet type.
- * It is used for outer packet for tunneling cases.
- *
- * Packet format:
- * <'ether type'=0x0800
- * | 'version'=4, 'protocol'=6, 'MF'=0>
- * or,
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=6>
- */
-#define RTE_PTYPE_L4_TCP                    0x00000100
-/**
- * UDP (User Datagram Protocol) packet type.
- * It is used for outer packet for tunneling cases.
- *
- * Packet format:
- * <'ether type'=0x0800
- * | 'version'=4, 'protocol'=17, 'MF'=0>
- * or,
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=17>
- */
-#define RTE_PTYPE_L4_UDP                    0x00000200
-/**
- * Fragmented IP (Internet Protocol) packet type.
- * It is used for outer packet for tunneling cases.
- *
- * It refers to those packets of any IP types, which can be recognized as
- * fragmented. A fragmented packet cannot be recognized as any other L4 types
- * (RTE_PTYPE_L4_TCP, RTE_PTYPE_L4_UDP, RTE_PTYPE_L4_SCTP, RTE_PTYPE_L4_ICMP,
- * RTE_PTYPE_L4_NONFRAG).
- *
- * Packet format:
- * <'ether type'=0x0800
- * | 'version'=4, 'MF'=1>
- * or,
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=44>
- */
-#define RTE_PTYPE_L4_FRAG                   0x00000300
-/**
- * SCTP (Stream Control Transmission Protocol) packet type.
- * It is used for outer packet for tunneling cases.
- *
- * Packet format:
- * <'ether type'=0x0800
- * | 'version'=4, 'protocol'=132, 'MF'=0>
- * or,
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=132>
- */
-#define RTE_PTYPE_L4_SCTP                   0x00000400
-/**
- * ICMP (Internet Control Message Protocol) packet type.
- * It is used for outer packet for tunneling cases.
- *
- * Packet format:
- * <'ether type'=0x0800
- * | 'version'=4, 'protocol'=1, 'MF'=0>
- * or,
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=1>
- */
-#define RTE_PTYPE_L4_ICMP                   0x00000500
-/**
- * Non-fragmented IP (Internet Protocol) packet type.
- * It is used for outer packet for tunneling cases.
- *
- * It refers to those packets of any IP types, while cannot be recognized as
- * any of above L4 types (RTE_PTYPE_L4_TCP, RTE_PTYPE_L4_UDP,
- * RTE_PTYPE_L4_FRAG, RTE_PTYPE_L4_SCTP, RTE_PTYPE_L4_ICMP).
- *
- * Packet format:
- * <'ether type'=0x0800
- * | 'version'=4, 'protocol'!=[6|17|132|1], 'MF'=0>
- * or,
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'!=[6|17|44|132|1]>
- */
-#define RTE_PTYPE_L4_NONFRAG                0x00000600
-/**
- * Mask of layer 4 packet types.
- * It is used for outer packet for tunneling cases.
- */
-#define RTE_PTYPE_L4_MASK                   0x00000f00
-/**
- * IP (Internet Protocol) in IP (Internet Protocol) tunneling packet type.
- *
- * Packet format:
- * <'ether type'=0x0800
- * | 'version'=4, 'protocol'=[4|41]>
- * or,
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=[4|41]>
- */
-#define RTE_PTYPE_TUNNEL_IP                 0x00001000
-/**
- * GRE (Generic Routing Encapsulation) tunneling packet type.
- *
- * Packet format:
- * <'ether type'=0x0800
- * | 'version'=4, 'protocol'=47>
- * or,
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=47>
- */
-#define RTE_PTYPE_TUNNEL_GRE                0x00002000
-/**
- * VXLAN (Virtual eXtensible Local Area Network) tunneling packet type.
- *
- * Packet format:
- * <'ether type'=0x0800
- * | 'version'=4, 'protocol'=17
- * | 'destination port'=4798>
- * or,
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=17
- * | 'destination port'=4798>
- */
-#define RTE_PTYPE_TUNNEL_VXLAN              0x00003000
-/**
- * NVGRE (Network Virtualization using Generic Routing Encapsulation) tunneling
- * packet type.
- *
- * Packet format:
- * <'ether type'=0x0800
- * | 'version'=4, 'protocol'=47
- * | 'protocol type'=0x6558>
- * or,
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=47
- * | 'protocol type'=0x6558'>
- */
-#define RTE_PTYPE_TUNNEL_NVGRE              0x00004000
-/**
- * GENEVE (Generic Network Virtualization Encapsulation) tunneling packet type.
- *
- * Packet format:
- * <'ether type'=0x0800
- * | 'version'=4, 'protocol'=17
- * | 'destination port'=6081>
- * or,
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=17
- * | 'destination port'=6081>
- */
-#define RTE_PTYPE_TUNNEL_GENEVE             0x00005000
-/**
- * Tunneling packet type of Teredo, VXLAN (Virtual eXtensible Local Area
- * Network) or GRE (Generic Routing Encapsulation) could be recognized as this
- * packet type, if they can not be recognized independently as of hardware
- * capability.
- */
-#define RTE_PTYPE_TUNNEL_GRENAT             0x00006000
-/**
- * Mask of tunneling packet types.
- */
-#define RTE_PTYPE_TUNNEL_MASK               0x0000f000
-/**
- * Ethernet packet type.
- * It is used for inner packet type only.
- *
- * Packet format (inner only):
- * <'ether type'=[0x800|0x86DD]>
- */
-#define RTE_PTYPE_INNER_L2_ETHER            0x00010000
-/**
- * Ethernet packet type with VLAN (Virtual Local Area Network) tag.
- *
- * Packet format (inner only):
- * <'ether type'=[0x800|0x86DD], vlan=[1-4095]>
- */
-#define RTE_PTYPE_INNER_L2_ETHER_VLAN       0x00020000
-/**
- * Mask of inner layer 2 packet types.
- */
-#define RTE_PTYPE_INNER_L2_MASK             0x000f0000
-/**
- * IP (Internet Protocol) version 4 packet type.
- * It is used for inner packet only, and does not contain any header option.
- *
- * Packet format (inner only):
- * <'ether type'=0x0800
- * | 'version'=4, 'ihl'=5>
- */
-#define RTE_PTYPE_INNER_L3_IPV4             0x00100000
-/**
- * IP (Internet Protocol) version 4 packet type.
- * It is used for inner packet only, and contains header options.
- *
- * Packet format (inner only):
- * <'ether type'=0x0800
- * | 'version'=4, 'ihl'=[6-15], 'options'>
- */
-#define RTE_PTYPE_INNER_L3_IPV4_EXT         0x00200000
-/**
- * IP (Internet Protocol) version 6 packet type.
- * It is used for inner packet only, and does not contain any extension header.
- *
- * Packet format (inner only):
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=0x3B>
- */
-#define RTE_PTYPE_INNER_L3_IPV6             0x00300000
-/**
- * IP (Internet Protocol) version 4 packet type.
- * It is used for inner packet only, and may or maynot contain header options.
- *
- * Packet format (inner only):
- * <'ether type'=0x0800
- * | 'version'=4, 'ihl'=[5-15], <'options'>>
- */
-#define RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN 0x00400000
-/**
- * IP (Internet Protocol) version 6 packet type.
- * It is used for inner packet only, and contains extension headers.
- *
- * Packet format (inner only):
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=[0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87],
- *   'extension headers'>
- */
-#define RTE_PTYPE_INNER_L3_IPV6_EXT         0x00500000
-/**
- * IP (Internet Protocol) version 6 packet type.
- * It is used for inner packet only, and may or maynot contain extension
- * headers.
- *
- * Packet format (inner only):
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=[0x3B|0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87],
- *   <'extension headers'>>
- */
-#define RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN 0x00600000
-/**
- * Mask of inner layer 3 packet types.
- */
-#define RTE_PTYPE_INNER_L3_MASK             0x00f00000
-/**
- * TCP (Transmission Control Protocol) packet type.
- * It is used for inner packet only.
- *
- * Packet format (inner only):
- * <'ether type'=0x0800
- * | 'version'=4, 'protocol'=6, 'MF'=0>
- * or,
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=6>
- */
-#define RTE_PTYPE_INNER_L4_TCP              0x01000000
-/**
- * UDP (User Datagram Protocol) packet type.
- * It is used for inner packet only.
- *
- * Packet format (inner only):
- * <'ether type'=0x0800
- * | 'version'=4, 'protocol'=17, 'MF'=0>
- * or,
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=17>
- */
-#define RTE_PTYPE_INNER_L4_UDP              0x02000000
-/**
- * Fragmented IP (Internet Protocol) packet type.
- * It is used for inner packet only, and may or maynot have layer 4 packet.
- *
- * Packet format (inner only):
- * <'ether type'=0x0800
- * | 'version'=4, 'MF'=1>
- * or,
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=44>
- */
-#define RTE_PTYPE_INNER_L4_FRAG             0x03000000
-/**
- * SCTP (Stream Control Transmission Protocol) packet type.
- * It is used for inner packet only.
- *
- * Packet format (inner only):
- * <'ether type'=0x0800
- * | 'version'=4, 'protocol'=132, 'MF'=0>
- * or,
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=132>
- */
-#define RTE_PTYPE_INNER_L4_SCTP             0x04000000
-/**
- * ICMP (Internet Control Message Protocol) packet type.
- * It is used for inner packet only.
- *
- * Packet format (inner only):
- * <'ether type'=0x0800
- * | 'version'=4, 'protocol'=1, 'MF'=0>
- * or,
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'=1>
- */
-#define RTE_PTYPE_INNER_L4_ICMP             0x05000000
-/**
- * Non-fragmented IP (Internet Protocol) packet type.
- * It is used for inner packet only, and may or maynot have other unknown layer
- * 4 packet types.
- *
- * Packet format (inner only):
- * <'ether type'=0x0800
- * | 'version'=4, 'protocol'!=[6|17|132|1], 'MF'=0>
- * or,
- * <'ether type'=0x86DD
- * | 'version'=6, 'next header'!=[6|17|44|132|1]>
- */
-#define RTE_PTYPE_INNER_L4_NONFRAG          0x06000000
-/**
- * Mask of inner layer 4 packet types.
- */
-#define RTE_PTYPE_INNER_L4_MASK             0x0f000000
-
-/**
- * Check if the (outer) L3 header is IPv4. To avoid comparing IPv4 types one by
- * one, bit 4 is selected to be used for IPv4 only. Then checking bit 4 can
- * determine if it is an IPV4 packet.
- */
-#define  RTE_ETH_IS_IPV4_HDR(ptype) ((ptype) & RTE_PTYPE_L3_IPV4)
-
-/**
- * Check if the (outer) L3 header is IPv4. To avoid comparing IPv4 types one by
- * one, bit 6 is selected to be used for IPv4 only. Then checking bit 6 can
- * determine if it is an IPV4 packet.
- */
-#define  RTE_ETH_IS_IPV6_HDR(ptype) ((ptype) & RTE_PTYPE_L3_IPV6)
-
-/* Check if it is a tunneling packet */
-#define RTE_ETH_IS_TUNNEL_PKT(ptype) ((ptype) & (RTE_PTYPE_TUNNEL_MASK | \
-                                                 RTE_PTYPE_INNER_L2_MASK | \
-                                                 RTE_PTYPE_INNER_L3_MASK | \
-                                                 RTE_PTYPE_INNER_L4_MASK))
-
 /** Alignment constraint of mbuf private area. */
 #define RTE_MBUF_PRIV_ALIGN 8
 
@@ -733,6 +304,20 @@ extern "C" {
 const char *rte_get_rx_ol_flag_name(uint64_t mask);
 
 /**
+ * Dump the list of RX offload flags in a buffer
+ *
+ * @param mask
+ *   The mask describing the RX flags.
+ * @param buf
+ *   The output buffer.
+ * @param buflen
+ *   The length of the buffer.
+ * @return
+ *   0 on success, (-1) on error.
+ */
+int rte_get_rx_ol_flag_list(uint64_t mask, char *buf, size_t buflen);
+
+/**
  * Get the name of a TX offload flag
  *
  * @param mask
@@ -745,6 +330,20 @@ const char *rte_get_rx_ol_flag_name(uint64_t mask);
 const char *rte_get_tx_ol_flag_name(uint64_t mask);
 
 /**
+ * Dump the list of TX offload flags in a buffer
+ *
+ * @param mask
+ *   The mask describing the TX flags.
+ * @param buf
+ *   The output buffer.
+ * @param buflen
+ *   The length of the buffer.
+ * @return
+ *   0 on success, (-1) on error.
+ */
+int rte_get_tx_ol_flag_list(uint64_t mask, char *buf, size_t buflen);
+
+/**
  * Some NICs need at least 2KB buffer to RX standard Ethernet frame without
  * splitting it into multiple segments.
  * So, for mbufs that planned to be involved into RX/TX, the recommended
@@ -756,8 +355,11 @@ const char *rte_get_tx_ol_flag_name(uint64_t mask);
 
 /* define a set of marker types that can be used to refer to set points in the
  * mbuf */
+__extension__
 typedef void    *MARKER[0];   /**< generic marker for a point in a structure */
+__extension__
 typedef uint8_t  MARKER8[0];  /**< generic marker with 1B alignment */
+__extension__
 typedef uint64_t MARKER64[0]; /**< marker that allows us to overwrite 8 bytes
                                * with a single assignment */
 
@@ -784,6 +386,7 @@ struct rte_mbuf {
 	 * or non-atomic) is controlled by the CONFIG_RTE_MBUF_REFCNT_ATOMIC
 	 * config option.
 	 */
+	RTE_STD_C11
 	union {
 		rte_atomic16_t refcnt_atomic; /**< Atomically accessed refcnt */
 		uint16_t refcnt;              /**< Non-atomically accessed refcnt */
@@ -803,6 +406,7 @@ struct rte_mbuf {
 	 * would have RTE_PTYPE_L2_ETHER and not RTE_PTYPE_L2_VLAN because the
 	 * vlan is stripped from the data.
 	 */
+	RTE_STD_C11
 	union {
 		uint32_t packet_type; /**< L2/L3/L4 and tunnel information. */
 		struct {
@@ -824,6 +428,7 @@ struct rte_mbuf {
 	union {
 		uint32_t rss;     /**< RSS hash result if RSS enabled */
 		struct {
+			RTE_STD_C11
 			union {
 				struct {
 					uint16_t hash;
@@ -851,6 +456,7 @@ struct rte_mbuf {
 	/* second cache line - fields only used in slow path or on TX */
 	MARKER cacheline1 __rte_cache_min_aligned;
 
+	RTE_STD_C11
 	union {
 		void *userdata;   /**< Can be used for external metadata */
 		uint64_t udata64; /**< Allow 8-byte userdata on 32-bit */
@@ -860,10 +466,15 @@ struct rte_mbuf {
 	struct rte_mbuf *next;    /**< Next segment of scattered packet. */
 
 	/* fields to support TX offloads */
+	RTE_STD_C11
 	union {
 		uint64_t tx_offload;       /**< combined for easy fetch */
+		__extension__
 		struct {
-			uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
+			uint64_t l2_len:7;
+			/**< L2 (MAC) Header Length for non-tunneling pkt.
+			 * Outer_L4_len + ... + Inner_L2_len for tunneling pkt.
+			 */
 			uint64_t l3_len:9; /**< L3 (IP) Header Length. */
 			uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
 			uint64_t tso_segsz:16; /**< TCP TSO segment size */
@@ -1157,13 +768,6 @@ static inline struct rte_mbuf *rte_mbuf_raw_alloc(struct rte_mempool *mp)
 	return m;
 }
 
-/* compat with older versions */
-__rte_deprecated static inline struct rte_mbuf *
-__rte_mbuf_raw_alloc(struct rte_mempool *mp)
-{
-	return rte_mbuf_raw_alloc(mp);
-}
-
 /**
  * @internal Put mbuf back into its original mempool.
  * The use of that function is reserved for RTE internal needs.
@@ -1385,6 +989,19 @@ rte_pktmbuf_priv_size(struct rte_mempool *mp)
 }
 
 /**
+ * Reset the data_off field of a packet mbuf to its default value.
+ *
+ * The given mbuf must have only one segment, which should be empty.
+ *
+ * @param m
+ *   The packet mbuf's data_off field has to be reset.
+ */
+static inline void rte_pktmbuf_reset_headroom(struct rte_mbuf *m)
+{
+	m->data_off = RTE_MIN(RTE_PKTMBUF_HEADROOM, (uint16_t)m->buf_len);
+}
+
+/**
  * Reset the fields of a packet mbuf to their default values.
  *
  * The given mbuf must have only one segment.
@@ -1404,8 +1021,7 @@ static inline void rte_pktmbuf_reset(struct rte_mbuf *m)
 
 	m->ol_flags = 0;
 	m->packet_type = 0;
-	m->data_off = (RTE_PKTMBUF_HEADROOM <= m->buf_len) ?
-			RTE_PKTMBUF_HEADROOM : m->buf_len;
+	rte_pktmbuf_reset_headroom(m);
 
 	m->data_len = 0;
 	__rte_mbuf_sanity_check(m, 1);
@@ -1569,7 +1185,7 @@ static inline void rte_pktmbuf_detach(struct rte_mbuf *m)
 	m->buf_addr = (char *)m + mbuf_size;
 	m->buf_physaddr = rte_mempool_virt2phy(mp, m) + mbuf_size;
 	m->buf_len = (uint16_t)buf_len;
-	m->data_off = RTE_MIN(RTE_PKTMBUF_HEADROOM, (uint16_t)m->buf_len);
+	rte_pktmbuf_reset_headroom(m);
 	m->data_len = 0;
 	m->ol_flags = 0;
 
@@ -1958,6 +1574,41 @@ static inline int rte_pktmbuf_is_contiguous(const struct rte_mbuf *m)
 }
 
 /**
+ * @internal used by rte_pktmbuf_read().
+ */
+const void *__rte_pktmbuf_read(const struct rte_mbuf *m, uint32_t off,
+	uint32_t len, void *buf);
+
+/**
+ * Read len data bytes in a mbuf at specified offset.
+ *
+ * If the data is contiguous, return the pointer in the mbuf data, else
+ * copy the data in the buffer provided by the user and return its
+ * pointer.
+ *
+ * @param m
+ *   The pointer to the mbuf.
+ * @param off
+ *   The offset of the data in the mbuf.
+ * @param len
+ *   The amount of bytes to read.
+ * @param buf
+ *   The buffer where data is copied if it is not contigous in mbuf
+ *   data. Its length should be at least equal to the len parameter.
+ * @return
+ *   The pointer to the data, either in the mbuf if it is contiguous,
+ *   or in the user buffer. If mbuf is too small, NULL is returned.
+ */
+static inline const void *rte_pktmbuf_read(const struct rte_mbuf *m,
+	uint32_t off, uint32_t len, void *buf)
+{
+	if (likely(off + len <= rte_pktmbuf_data_len(m)))
+		return rte_pktmbuf_mtod_offset(m, char *, off);
+	else
+		return __rte_pktmbuf_read(m, off, len, buf);
+}
+
+/**
  * Chain an mbuf to another, thereby creating a segmented packet.
  *
  * Note: The implementation will do a linear walk over the segments to find
@@ -1996,7 +1647,7 @@ static inline int rte_pktmbuf_chain(struct rte_mbuf *head, struct rte_mbuf *tail
 }
 
 /**
- * Dump an mbuf structure to the console.
+ * Dump an mbuf structure to a file.
  *
  * Dump all fields for the given packet mbuf and all its associated
  * segments (in the case of a chained buffer).
diff --git a/lib/librte_mbuf/rte_mbuf_ptype.c b/lib/librte_mbuf/rte_mbuf_ptype.c
new file mode 100644
index 00000000..e5c4fae3
--- /dev/null
+++ b/lib/librte_mbuf/rte_mbuf_ptype.c
@@ -0,0 +1,227 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2016 6WIND S.A.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of 6WIND S.A. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+
+#include <rte_mbuf.h>
+#include <rte_mbuf_ptype.h>
+
+/* get the name of the l2 packet type */
+const char *rte_get_ptype_l2_name(uint32_t ptype)
+{
+	switch (ptype & RTE_PTYPE_L2_MASK) {
+	case RTE_PTYPE_L2_ETHER: return "L2_ETHER";
+	case RTE_PTYPE_L2_ETHER_TIMESYNC: return "L2_ETHER_TIMESYNC";
+	case RTE_PTYPE_L2_ETHER_ARP: return "L2_ETHER_ARP";
+	case RTE_PTYPE_L2_ETHER_LLDP: return "L2_ETHER_LLDP";
+	case RTE_PTYPE_L2_ETHER_NSH: return "L2_ETHER_NSH";
+	case RTE_PTYPE_L2_ETHER_VLAN: return "L2_ETHER_VLAN";
+	case RTE_PTYPE_L2_ETHER_QINQ: return "L2_ETHER_QINQ";
+	default: return "L2_UNKNOWN";
+	}
+}
+
+/* get the name of the l3 packet type */
+const char *rte_get_ptype_l3_name(uint32_t ptype)
+{
+	switch (ptype & RTE_PTYPE_L3_MASK) {
+	case RTE_PTYPE_L3_IPV4: return "L3_IPV4";
+	case RTE_PTYPE_L3_IPV4_EXT: return "L3_IPV4_EXT";
+	case RTE_PTYPE_L3_IPV6: return "L3_IPV6";
+	case RTE_PTYPE_L3_IPV4_EXT_UNKNOWN: return "L3_IPV4_EXT_UNKNOWN";
+	case RTE_PTYPE_L3_IPV6_EXT: return "L3_IPV6_EXT";
+	case RTE_PTYPE_L3_IPV6_EXT_UNKNOWN: return "L3_IPV6_EXT_UNKNOWN";
+	default: return "L3_UNKNOWN";
+	}
+}
+
+/* get the name of the l4 packet type */
+const char *rte_get_ptype_l4_name(uint32_t ptype)
+{
+	switch (ptype & RTE_PTYPE_L4_MASK) {
+	case RTE_PTYPE_L4_TCP: return "L4_TCP";
+	case RTE_PTYPE_L4_UDP: return "L4_UDP";
+	case RTE_PTYPE_L4_FRAG: return "L4_FRAG";
+	case RTE_PTYPE_L4_SCTP: return "L4_SCTP";
+	case RTE_PTYPE_L4_ICMP: return "L4_ICMP";
+	case RTE_PTYPE_L4_NONFRAG: return "L4_NONFRAG";
+	default: return "L4_UNKNOWN";
+	}
+}
+
+/* get the name of the tunnel packet type */
+const char *rte_get_ptype_tunnel_name(uint32_t ptype)
+{
+	switch (ptype & RTE_PTYPE_TUNNEL_MASK) {
+	case RTE_PTYPE_TUNNEL_IP: return "TUNNEL_IP";
+	case RTE_PTYPE_TUNNEL_GRE: return "TUNNEL_GRE";
+	case RTE_PTYPE_TUNNEL_VXLAN: return "TUNNEL_VXLAN";
+	case RTE_PTYPE_TUNNEL_NVGRE: return "TUNNEL_NVGRE";
+	case RTE_PTYPE_TUNNEL_GENEVE: return "TUNNEL_GENEVE";
+	case RTE_PTYPE_TUNNEL_GRENAT: return "TUNNEL_GRENAT";
+	default: return "TUNNEL_UNKNOWN";
+	}
+}
+
+/* get the name of the inner_l2 packet type */
+const char *rte_get_ptype_inner_l2_name(uint32_t ptype)
+{
+	switch (ptype & RTE_PTYPE_INNER_L2_MASK) {
+	case RTE_PTYPE_INNER_L2_ETHER: return "INNER_L2_ETHER";
+	case RTE_PTYPE_INNER_L2_ETHER_VLAN: return "INNER_L2_ETHER_VLAN";
+	case RTE_PTYPE_INNER_L2_ETHER_QINQ: return "INNER_L2_ETHER_QINQ";
+	default: return "INNER_L2_UNKNOWN";
+	}
+}
+
+/* get the name of the inner_l3 packet type */
+const char *rte_get_ptype_inner_l3_name(uint32_t ptype)
+{
+	switch (ptype & RTE_PTYPE_INNER_L3_MASK) {
+	case RTE_PTYPE_INNER_L3_IPV4: return "INNER_L3_IPV4";
+	case RTE_PTYPE_INNER_L3_IPV4_EXT: return "INNER_L3_IPV4_EXT";
+	case RTE_PTYPE_INNER_L3_IPV6: return "INNER_L3_IPV6";
+	case RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN:
+		return "INNER_L3_IPV4_EXT_UNKNOWN";
+	case RTE_PTYPE_INNER_L3_IPV6_EXT: return "INNER_L3_IPV6_EXT";
+	case RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN:
+		return "INNER_L3_IPV6_EXT_UNKNOWN";
+	default: return "INNER_L3_UNKNOWN";
+	}
+}
+
+/* get the name of the inner_l4 packet type */
+const char *rte_get_ptype_inner_l4_name(uint32_t ptype)
+{
+	switch (ptype & RTE_PTYPE_INNER_L4_MASK) {
+	case RTE_PTYPE_INNER_L4_TCP: return "INNER_L4_TCP";
+	case RTE_PTYPE_INNER_L4_UDP: return "INNER_L4_UDP";
+	case RTE_PTYPE_INNER_L4_FRAG: return "INNER_L4_FRAG";
+	case RTE_PTYPE_INNER_L4_SCTP: return "INNER_L4_SCTP";
+	case RTE_PTYPE_INNER_L4_ICMP: return "INNER_L4_ICMP";
+	case RTE_PTYPE_INNER_L4_NONFRAG: return "INNER_L4_NONFRAG";
+	default: return "INNER_L4_UNKNOWN";
+	}
+}
+
+/* write the packet type name into the buffer */
+int rte_get_ptype_name(uint32_t ptype, char *buf, size_t buflen)
+{
+	int ret;
+
+	if (buflen == 0)
+		return -1;
+
+	buf[0] = '\0';
+	if ((ptype & RTE_PTYPE_ALL_MASK) == RTE_PTYPE_UNKNOWN) {
+		ret = snprintf(buf, buflen, "UNKNOWN");
+		if (ret < 0)
+			return -1;
+		if ((size_t)ret >= buflen)
+			return -1;
+		return 0;
+	}
+
+	if ((ptype & RTE_PTYPE_L2_MASK) != 0) {
+		ret = snprintf(buf, buflen, "%s ",
+			rte_get_ptype_l2_name(ptype));
+		if (ret < 0)
+			return -1;
+		if ((size_t)ret >= buflen)
+			return -1;
+		buf += ret;
+		buflen -= ret;
+	}
+	if ((ptype & RTE_PTYPE_L3_MASK) != 0) {
+		ret = snprintf(buf, buflen, "%s ",
+			rte_get_ptype_l3_name(ptype));
+		if (ret < 0)
+			return -1;
+		if ((size_t)ret >= buflen)
+			return -1;
+		buf += ret;
+		buflen -= ret;
+	}
+	if ((ptype & RTE_PTYPE_L4_MASK) != 0) {
+		ret = snprintf(buf, buflen, "%s ",
+			rte_get_ptype_l4_name(ptype));
+		if (ret < 0)
+			return -1;
+		if ((size_t)ret >= buflen)
+			return -1;
+		buf += ret;
+		buflen -= ret;
+	}
+	if ((ptype & RTE_PTYPE_TUNNEL_MASK) != 0) {
+		ret = snprintf(buf, buflen, "%s ",
+			rte_get_ptype_tunnel_name(ptype));
+		if (ret < 0)
+			return -1;
+		if ((size_t)ret >= buflen)
+			return -1;
+		buf += ret;
+		buflen -= ret;
+	}
+	if ((ptype & RTE_PTYPE_INNER_L2_MASK) != 0) {
+		ret = snprintf(buf, buflen, "%s ",
+			rte_get_ptype_inner_l2_name(ptype));
+		if (ret < 0)
+			return -1;
+		if ((size_t)ret >= buflen)
+			return -1;
+		buf += ret;
+		buflen -= ret;
+	}
+	if ((ptype & RTE_PTYPE_INNER_L3_MASK) != 0) {
+		ret = snprintf(buf, buflen, "%s ",
+			rte_get_ptype_inner_l3_name(ptype));
+		if (ret < 0)
+			return -1;
+		if ((size_t)ret >= buflen)
+			return -1;
+		buf += ret;
+		buflen -= ret;
+	}
+	if ((ptype & RTE_PTYPE_INNER_L4_MASK) != 0) {
+		ret = snprintf(buf, buflen, "%s ",
+			rte_get_ptype_inner_l4_name(ptype));
+		if (ret < 0)
+			return -1;
+		if ((size_t)ret >= buflen)
+			return -1;
+		buf += ret;
+		buflen -= ret;
+	}
+
+	return 0;
+}
diff --git a/lib/librte_mbuf/rte_mbuf_ptype.h b/lib/librte_mbuf/rte_mbuf_ptype.h
new file mode 100644
index 00000000..ff6de9d1
--- /dev/null
+++ b/lib/librte_mbuf/rte_mbuf_ptype.h
@@ -0,0 +1,668 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2016 Intel Corporation.
+ *   Copyright 2014-2016 6WIND S.A.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_MBUF_PTYPE_H_
+#define _RTE_MBUF_PTYPE_H_
+
+/**
+ * @file
+ * RTE Mbuf Packet Types
+ *
+ * This file contains declarations for features related to mbuf packet
+ * types. The packet type gives information about the data carried by the
+ * mbuf, and is stored in the mbuf in a 32 bits field.
+ *
+ * The 32 bits are divided into several fields to mark packet types. Note that
+ * each field is indexical.
+ * - Bit 3:0 is for L2 types.
+ * - Bit 7:4 is for L3 or outer L3 (for tunneling case) types.
+ * - Bit 11:8 is for L4 or outer L4 (for tunneling case) types.
+ * - Bit 15:12 is for tunnel types.
+ * - Bit 19:16 is for inner L2 types.
+ * - Bit 23:20 is for inner L3 types.
+ * - Bit 27:24 is for inner L4 types.
+ * - Bit 31:28 is reserved.
+ *
+ * To be compatible with Vector PMD, RTE_PTYPE_L3_IPV4, RTE_PTYPE_L3_IPV4_EXT,
+ * RTE_PTYPE_L3_IPV6, RTE_PTYPE_L3_IPV6_EXT, RTE_PTYPE_L4_TCP, RTE_PTYPE_L4_UDP
+ * and RTE_PTYPE_L4_SCTP should be kept as below in a contiguous 7 bits.
+ *
+ * Note that L3 types values are selected for checking IPV4/IPV6 header from
+ * performance point of view. Reading annotations of RTE_ETH_IS_IPV4_HDR and
+ * RTE_ETH_IS_IPV6_HDR is needed for any future changes of L3 type values.
+ *
+ * Note that the packet types of the same packet recognized by different
+ * hardware may be different, as different hardware may have different
+ * capability of packet type recognition.
+ *
+ * examples:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=0x29
+ * | 'version'=6, 'next header'=0x3A
+ * | 'ICMPv6 header'>
+ * will be recognized on i40e hardware as packet type combination of,
+ * RTE_PTYPE_L2_ETHER |
+ * RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+ * RTE_PTYPE_TUNNEL_IP |
+ * RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+ * RTE_PTYPE_INNER_L4_ICMP.
+ *
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=0x2F
+ * | 'GRE header'
+ * | 'version'=6, 'next header'=0x11
+ * | 'UDP header'>
+ * will be recognized on i40e hardware as packet type combination of,
+ * RTE_PTYPE_L2_ETHER |
+ * RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+ * RTE_PTYPE_TUNNEL_GRENAT |
+ * RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+ * RTE_PTYPE_INNER_L4_UDP.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * No packet type information.
+ */
+#define RTE_PTYPE_UNKNOWN                   0x00000000
+/**
+ * Ethernet packet type.
+ * It is used for outer packet for tunneling cases.
+ *
+ * Packet format:
+ * <'ether type'=[0x0800|0x86DD]>
+ */
+#define RTE_PTYPE_L2_ETHER                  0x00000001
+/**
+ * Ethernet packet type for time sync.
+ *
+ * Packet format:
+ * <'ether type'=0x88F7>
+ */
+#define RTE_PTYPE_L2_ETHER_TIMESYNC         0x00000002
+/**
+ * ARP (Address Resolution Protocol) packet type.
+ *
+ * Packet format:
+ * <'ether type'=0x0806>
+ */
+#define RTE_PTYPE_L2_ETHER_ARP              0x00000003
+/**
+ * LLDP (Link Layer Discovery Protocol) packet type.
+ *
+ * Packet format:
+ * <'ether type'=0x88CC>
+ */
+#define RTE_PTYPE_L2_ETHER_LLDP             0x00000004
+/**
+ * NSH (Network Service Header) packet type.
+ *
+ * Packet format:
+ * <'ether type'=0x894F>
+ */
+#define RTE_PTYPE_L2_ETHER_NSH              0x00000005
+/**
+ * VLAN packet type.
+ *
+ * Packet format:
+ * <'ether type'=[0x8100]>
+ */
+#define RTE_PTYPE_L2_ETHER_VLAN             0x00000006
+/**
+ * QinQ packet type.
+ *
+ * Packet format:
+ * <'ether type'=[0x88A8]>
+ */
+#define RTE_PTYPE_L2_ETHER_QINQ             0x00000007
+/**
+ * Mask of layer 2 packet types.
+ * It is used for outer packet for tunneling cases.
+ */
+#define RTE_PTYPE_L2_MASK                   0x0000000f
+/**
+ * IP (Internet Protocol) version 4 packet type.
+ * It is used for outer packet for tunneling cases, and does not contain any
+ * header option.
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'ihl'=5>
+ */
+#define RTE_PTYPE_L3_IPV4                   0x00000010
+/**
+ * IP (Internet Protocol) version 4 packet type.
+ * It is used for outer packet for tunneling cases, and contains header
+ * options.
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'ihl'=[6-15], 'options'>
+ */
+#define RTE_PTYPE_L3_IPV4_EXT               0x00000030
+/**
+ * IP (Internet Protocol) version 6 packet type.
+ * It is used for outer packet for tunneling cases, and does not contain any
+ * extension header.
+ *
+ * Packet format:
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=0x3B>
+ */
+#define RTE_PTYPE_L3_IPV6                   0x00000040
+/**
+ * IP (Internet Protocol) version 4 packet type.
+ * It is used for outer packet for tunneling cases, and may or maynot contain
+ * header options.
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'ihl'=[5-15], <'options'>>
+ */
+#define RTE_PTYPE_L3_IPV4_EXT_UNKNOWN       0x00000090
+/**
+ * IP (Internet Protocol) version 6 packet type.
+ * It is used for outer packet for tunneling cases, and contains extension
+ * headers.
+ *
+ * Packet format:
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=[0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87],
+ *   'extension headers'>
+ */
+#define RTE_PTYPE_L3_IPV6_EXT               0x000000c0
+/**
+ * IP (Internet Protocol) version 6 packet type.
+ * It is used for outer packet for tunneling cases, and may or maynot contain
+ * extension headers.
+ *
+ * Packet format:
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=[0x3B|0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87],
+ *   <'extension headers'>>
+ */
+#define RTE_PTYPE_L3_IPV6_EXT_UNKNOWN       0x000000e0
+/**
+ * Mask of layer 3 packet types.
+ * It is used for outer packet for tunneling cases.
+ */
+#define RTE_PTYPE_L3_MASK                   0x000000f0
+/**
+ * TCP (Transmission Control Protocol) packet type.
+ * It is used for outer packet for tunneling cases.
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=6, 'MF'=0, 'frag_offset'=0>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=6>
+ */
+#define RTE_PTYPE_L4_TCP                    0x00000100
+/**
+ * UDP (User Datagram Protocol) packet type.
+ * It is used for outer packet for tunneling cases.
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=17, 'MF'=0, 'frag_offset'=0>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=17>
+ */
+#define RTE_PTYPE_L4_UDP                    0x00000200
+/**
+ * Fragmented IP (Internet Protocol) packet type.
+ * It is used for outer packet for tunneling cases.
+ *
+ * It refers to those packets of any IP types, which can be recognized as
+ * fragmented. A fragmented packet cannot be recognized as any other L4 types
+ * (RTE_PTYPE_L4_TCP, RTE_PTYPE_L4_UDP, RTE_PTYPE_L4_SCTP, RTE_PTYPE_L4_ICMP,
+ * RTE_PTYPE_L4_NONFRAG).
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'MF'=1>
+ * or,
+ * <'ether type'=0x0800
+ * | 'version'=4, 'frag_offset'!=0>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=44>
+ */
+#define RTE_PTYPE_L4_FRAG                   0x00000300
+/**
+ * SCTP (Stream Control Transmission Protocol) packet type.
+ * It is used for outer packet for tunneling cases.
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=132, 'MF'=0, 'frag_offset'=0>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=132>
+ */
+#define RTE_PTYPE_L4_SCTP                   0x00000400
+/**
+ * ICMP (Internet Control Message Protocol) packet type.
+ * It is used for outer packet for tunneling cases.
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=1, 'MF'=0, 'frag_offset'=0>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=1>
+ */
+#define RTE_PTYPE_L4_ICMP                   0x00000500
+/**
+ * Non-fragmented IP (Internet Protocol) packet type.
+ * It is used for outer packet for tunneling cases.
+ *
+ * It refers to those packets of any IP types, while cannot be recognized as
+ * any of above L4 types (RTE_PTYPE_L4_TCP, RTE_PTYPE_L4_UDP,
+ * RTE_PTYPE_L4_FRAG, RTE_PTYPE_L4_SCTP, RTE_PTYPE_L4_ICMP).
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'!=[6|17|132|1], 'MF'=0, 'frag_offset'=0>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'!=[6|17|44|132|1]>
+ */
+#define RTE_PTYPE_L4_NONFRAG                0x00000600
+/**
+ * Mask of layer 4 packet types.
+ * It is used for outer packet for tunneling cases.
+ */
+#define RTE_PTYPE_L4_MASK                   0x00000f00
+/**
+ * IP (Internet Protocol) in IP (Internet Protocol) tunneling packet type.
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=[4|41]>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=[4|41]>
+ */
+#define RTE_PTYPE_TUNNEL_IP                 0x00001000
+/**
+ * GRE (Generic Routing Encapsulation) tunneling packet type.
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=47>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=47>
+ */
+#define RTE_PTYPE_TUNNEL_GRE                0x00002000
+/**
+ * VXLAN (Virtual eXtensible Local Area Network) tunneling packet type.
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=17
+ * | 'destination port'=4798>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=17
+ * | 'destination port'=4798>
+ */
+#define RTE_PTYPE_TUNNEL_VXLAN              0x00003000
+/**
+ * NVGRE (Network Virtualization using Generic Routing Encapsulation) tunneling
+ * packet type.
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=47
+ * | 'protocol type'=0x6558>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=47
+ * | 'protocol type'=0x6558'>
+ */
+#define RTE_PTYPE_TUNNEL_NVGRE              0x00004000
+/**
+ * GENEVE (Generic Network Virtualization Encapsulation) tunneling packet type.
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=17
+ * | 'destination port'=6081>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=17
+ * | 'destination port'=6081>
+ */
+#define RTE_PTYPE_TUNNEL_GENEVE             0x00005000
+/**
+ * Tunneling packet type of Teredo, VXLAN (Virtual eXtensible Local Area
+ * Network) or GRE (Generic Routing Encapsulation) could be recognized as this
+ * packet type, if they can not be recognized independently as of hardware
+ * capability.
+ */
+#define RTE_PTYPE_TUNNEL_GRENAT             0x00006000
+/**
+ * Mask of tunneling packet types.
+ */
+#define RTE_PTYPE_TUNNEL_MASK               0x0000f000
+/**
+ * Ethernet packet type.
+ * It is used for inner packet type only.
+ *
+ * Packet format (inner only):
+ * <'ether type'=[0x800|0x86DD]>
+ */
+#define RTE_PTYPE_INNER_L2_ETHER            0x00010000
+/**
+ * Ethernet packet type with VLAN (Virtual Local Area Network) tag.
+ *
+ * Packet format (inner only):
+ * <'ether type'=[0x800|0x86DD], vlan=[1-4095]>
+ */
+#define RTE_PTYPE_INNER_L2_ETHER_VLAN       0x00020000
+/**
+ * QinQ packet type.
+ *
+ * Packet format:
+ * <'ether type'=[0x88A8]>
+ */
+#define RTE_PTYPE_INNER_L2_ETHER_QINQ       0x00030000
+/**
+ * Mask of inner layer 2 packet types.
+ */
+#define RTE_PTYPE_INNER_L2_MASK             0x000f0000
+/**
+ * IP (Internet Protocol) version 4 packet type.
+ * It is used for inner packet only, and does not contain any header option.
+ *
+ * Packet format (inner only):
+ * <'ether type'=0x0800
+ * | 'version'=4, 'ihl'=5>
+ */
+#define RTE_PTYPE_INNER_L3_IPV4             0x00100000
+/**
+ * IP (Internet Protocol) version 4 packet type.
+ * It is used for inner packet only, and contains header options.
+ *
+ * Packet format (inner only):
+ * <'ether type'=0x0800
+ * | 'version'=4, 'ihl'=[6-15], 'options'>
+ */
+#define RTE_PTYPE_INNER_L3_IPV4_EXT         0x00200000
+/**
+ * IP (Internet Protocol) version 6 packet type.
+ * It is used for inner packet only, and does not contain any extension header.
+ *
+ * Packet format (inner only):
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=0x3B>
+ */
+#define RTE_PTYPE_INNER_L3_IPV6             0x00300000
+/**
+ * IP (Internet Protocol) version 4 packet type.
+ * It is used for inner packet only, and may or maynot contain header options.
+ *
+ * Packet format (inner only):
+ * <'ether type'=0x0800
+ * | 'version'=4, 'ihl'=[5-15], <'options'>>
+ */
+#define RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN 0x00400000
+/**
+ * IP (Internet Protocol) version 6 packet type.
+ * It is used for inner packet only, and contains extension headers.
+ *
+ * Packet format (inner only):
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=[0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87],
+ *   'extension headers'>
+ */
+#define RTE_PTYPE_INNER_L3_IPV6_EXT         0x00500000
+/**
+ * IP (Internet Protocol) version 6 packet type.
+ * It is used for inner packet only, and may or maynot contain extension
+ * headers.
+ *
+ * Packet format (inner only):
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=[0x3B|0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87],
+ *   <'extension headers'>>
+ */
+#define RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN 0x00600000
+/**
+ * Mask of inner layer 3 packet types.
+ */
+#define RTE_PTYPE_INNER_L3_MASK             0x00f00000
+/**
+ * TCP (Transmission Control Protocol) packet type.
+ * It is used for inner packet only.
+ *
+ * Packet format (inner only):
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=6, 'MF'=0, 'frag_offset'=0>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=6>
+ */
+#define RTE_PTYPE_INNER_L4_TCP              0x01000000
+/**
+ * UDP (User Datagram Protocol) packet type.
+ * It is used for inner packet only.
+ *
+ * Packet format (inner only):
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=17, 'MF'=0, 'frag_offset'=0>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=17>
+ */
+#define RTE_PTYPE_INNER_L4_UDP              0x02000000
+/**
+ * Fragmented IP (Internet Protocol) packet type.
+ * It is used for inner packet only, and may or maynot have layer 4 packet.
+ *
+ * Packet format (inner only):
+ * <'ether type'=0x0800
+ * | 'version'=4, 'MF'=1>
+ * or,
+ * <'ether type'=0x0800
+ * | 'version'=4, 'frag_offset'!=0>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=44>
+ */
+#define RTE_PTYPE_INNER_L4_FRAG             0x03000000
+/**
+ * SCTP (Stream Control Transmission Protocol) packet type.
+ * It is used for inner packet only.
+ *
+ * Packet format (inner only):
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=132, 'MF'=0, 'frag_offset'=0>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=132>
+ */
+#define RTE_PTYPE_INNER_L4_SCTP             0x04000000
+/**
+ * ICMP (Internet Control Message Protocol) packet type.
+ * It is used for inner packet only.
+ *
+ * Packet format (inner only):
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=1, 'MF'=0, 'frag_offset'=0>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'=1>
+ */
+#define RTE_PTYPE_INNER_L4_ICMP             0x05000000
+/**
+ * Non-fragmented IP (Internet Protocol) packet type.
+ * It is used for inner packet only, and may or maynot have other unknown layer
+ * 4 packet types.
+ *
+ * Packet format (inner only):
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'!=[6|17|132|1], 'MF'=0, 'frag_offset'=0>
+ * or,
+ * <'ether type'=0x86DD
+ * | 'version'=6, 'next header'!=[6|17|44|132|1]>
+ */
+#define RTE_PTYPE_INNER_L4_NONFRAG          0x06000000
+/**
+ * Mask of inner layer 4 packet types.
+ */
+#define RTE_PTYPE_INNER_L4_MASK             0x0f000000
+/**
+ * All valid layer masks.
+ */
+#define RTE_PTYPE_ALL_MASK                  0x0fffffff
+
+/**
+ * Check if the (outer) L3 header is IPv4. To avoid comparing IPv4 types one by
+ * one, bit 4 is selected to be used for IPv4 only. Then checking bit 4 can
+ * determine if it is an IPV4 packet.
+ */
+#define  RTE_ETH_IS_IPV4_HDR(ptype) ((ptype) & RTE_PTYPE_L3_IPV4)
+
+/**
+ * Check if the (outer) L3 header is IPv4. To avoid comparing IPv4 types one by
+ * one, bit 6 is selected to be used for IPv4 only. Then checking bit 6 can
+ * determine if it is an IPV4 packet.
+ */
+#define  RTE_ETH_IS_IPV6_HDR(ptype) ((ptype) & RTE_PTYPE_L3_IPV6)
+
+/* Check if it is a tunneling packet */
+#define RTE_ETH_IS_TUNNEL_PKT(ptype) ((ptype) &				\
+	(RTE_PTYPE_TUNNEL_MASK |					\
+		RTE_PTYPE_INNER_L2_MASK |				\
+		RTE_PTYPE_INNER_L3_MASK |				\
+		RTE_PTYPE_INNER_L4_MASK))
+
+/**
+ * Get the name of the l2 packet type
+ *
+ * @param ptype
+ *   The packet type value.
+ * @return
+ *   A non-null string describing the packet type.
+ */
+const char *rte_get_ptype_l2_name(uint32_t ptype);
+
+/**
+ * Get the name of the l3 packet type
+ *
+ * @param ptype
+ *   The packet type value.
+ * @return
+ *   A non-null string describing the packet type.
+ */
+const char *rte_get_ptype_l3_name(uint32_t ptype);
+
+/**
+ * Get the name of the l4 packet type
+ *
+ * @param ptype
+ *   The packet type value.
+ * @return
+ *   A non-null string describing the packet type.
+ */
+const char *rte_get_ptype_l4_name(uint32_t ptype);
+
+/**
+ * Get the name of the tunnel packet type
+ *
+ * @param ptype
+ *   The packet type value.
+ * @return
+ *   A non-null string describing the packet type.
+ */
+const char *rte_get_ptype_tunnel_name(uint32_t ptype);
+
+/**
+ * Get the name of the inner_l2 packet type
+ *
+ * @param ptype
+ *   The packet type value.
+ * @return
+ *   A non-null string describing the packet type.
+ */
+const char *rte_get_ptype_inner_l2_name(uint32_t ptype);
+
+/**
+ * Get the name of the inner_l3 packet type
+ *
+ * @param ptype
+ *   The packet type value.
+ * @return
+ *   A non-null string describing the packet type.
+ */
+const char *rte_get_ptype_inner_l3_name(uint32_t ptype);
+
+/**
+ * Get the name of the inner_l4 packet type
+ *
+ * @param ptype
+ *   The packet type value.
+ * @return
+ *   A non-null string describing the packet type.
+ */
+const char *rte_get_ptype_inner_l4_name(uint32_t ptype);
+
+/**
+ * Write the packet type name into the buffer
+ *
+ * @param ptype
+ *   The packet type value.
+ * @param buf
+ *   The buffer where the string is written.
+ * @param buflen
+ *   The length of the buffer.
+ * @return
+ *   - 0 on success
+ *   - (-1) if the buffer is too small
+ */
+int rte_get_ptype_name(uint32_t ptype, char *buf, size_t buflen);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MBUF_PTYPE_H_ */
diff --git a/lib/librte_mbuf/rte_mbuf_version.map b/lib/librte_mbuf/rte_mbuf_version.map
index e10f6bdc..6e2ea845 100644
--- a/lib/librte_mbuf/rte_mbuf_version.map
+++ b/lib/librte_mbuf/rte_mbuf_version.map
@@ -18,3 +18,20 @@ DPDK_2.1 {
 	rte_pktmbuf_pool_create;
 
 } DPDK_2.0;
+
+DPDK_16.11 {
+	global:
+
+	__rte_pktmbuf_read;
+	rte_get_ptype_inner_l2_name;
+	rte_get_ptype_inner_l3_name;
+	rte_get_ptype_inner_l4_name;
+	rte_get_ptype_l2_name;
+	rte_get_ptype_l3_name;
+	rte_get_ptype_l4_name;
+	rte_get_ptype_name;
+	rte_get_ptype_tunnel_name;
+	rte_get_rx_ol_flag_list;
+	rte_get_tx_ol_flag_list;
+
+} DPDK_2.1;
diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
index ad7c470e..aa513b97 100644
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -55,7 +55,6 @@
 #include <rte_per_lcore.h>
 #include <rte_lcore.h>
 #include <rte_branch_prediction.h>
-#include <rte_ring.h>
 #include <rte_errno.h>
 #include <rte_string_fns.h>
 #include <rte_spinlock.h>
@@ -911,9 +910,8 @@ rte_mempool_create(const char *name, unsigned n, unsigned elt_size,
 /*
  * Create the mempool over already allocated chunk of memory.
  * That external memory buffer can consists of physically disjoint pages.
- * Setting vaddr to NULL, makes mempool to fallback to original behaviour
- * and allocate space for mempool and it's elements as one big chunk of
- * physically continuos memory.
+ * Setting vaddr to NULL, makes mempool to fallback to rte_mempool_create()
+ * behavior.
  */
 struct rte_mempool *
 rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index 059ad9e5..440f3b1b 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -75,6 +75,7 @@
 #include <rte_branch_prediction.h>
 #include <rte_ring.h>
 #include <rte_memcpy.h>
+#include <rte_common.h>
 
 #ifdef __cplusplus
 extern "C" {
@@ -216,6 +217,7 @@ struct rte_mempool {
 	 * RTE_MEMPOOL_NAMESIZE next time the ABI changes
 	 */
 	char name[RTE_MEMZONE_NAMESIZE]; /**< Name of mempool. */
+	RTE_STD_C11
 	union {
 		void *pool_data;         /**< Ring or pool to store objects. */
 		uint64_t pool_id;        /**< External mempool identifier. */
@@ -587,10 +589,8 @@ typedef void (rte_mempool_ctor_t)(struct rte_mempool *, void *);
 /**
  * Create a new mempool named *name* in memory.
  *
- * This function uses ``memzone_reserve()`` to allocate memory. The
+ * This function uses ``rte_memzone_reserve()`` to allocate memory. The
  * pool contains n elements of elt_size. Its size is set to n.
- * All elements of the mempool are allocated together with the mempool header,
- * in one physically continuous chunk of memory.
  *
  * @param name
  *   The name of the mempool.
@@ -746,7 +746,7 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
  *
  * The mempool is allocated and initialized, but it is not populated: no
  * memory is allocated for the mempool elements. The user has to call
- * rte_mempool_populate_*() or to add memory chunks to the pool. Once
+ * rte_mempool_populate_*() to add memory chunks to the pool. Once
  * populated, the user may also want to initialize each object with
  * rte_mempool_obj_iter().
  *
@@ -798,6 +798,10 @@ rte_mempool_free(struct rte_mempool *mp);
  * Add a virtually and physically contiguous memory chunk in the pool
  * where objects can be instanciated.
  *
+ * If the given physical address is unknown (paddr = RTE_BAD_PHYS_ADDR),
+ * the chunk doesn't need to be physically contiguous (only virtually),
+ * and allocated objects may span two pages.
+ *
  * @param mp
  *   A pointer to the mempool structure.
  * @param vaddr
@@ -946,7 +950,7 @@ uint32_t rte_mempool_mem_iter(struct rte_mempool *mp,
 	rte_mempool_mem_cb_t *mem_cb, void *mem_cb_arg);
 
 /**
- * Dump the status of the mempool to the console.
+ * Dump the status of the mempool to a file.
  *
  * @param f
  *   A pointer to a file for output
diff --git a/lib/librte_meter/rte_meter.h b/lib/librte_meter/rte_meter.h
index 2cd8d814..2ab71849 100644
--- a/lib/librte_meter/rte_meter.h
+++ b/lib/librte_meter/rte_meter.h
@@ -232,13 +232,15 @@ rte_meter_srtcm_color_blind_check(struct rte_meter_srtcm *m,
 	n_periods = time_diff / m->cir_period;
 	m->time += n_periods * m->cir_period;
 
+	/* Put the tokens overflowing from tc into te bucket */
 	tc = m->tc + n_periods * m->cir_bytes_per_period;
-	if (tc > m->cbs)
+	te = m->te;
+	if (tc > m->cbs) {
+		te += (tc - m->cbs);
+		if (te > m->ebs)
+			te = m->ebs;
 		tc = m->cbs;
-
-	te = m->te + n_periods * m->cir_bytes_per_period;
-	if (te > m->ebs)
-		te = m->ebs;
+	}
 
 	/* Color logic */
 	if (tc >= pkt_len) {
@@ -271,13 +273,15 @@ rte_meter_srtcm_color_aware_check(struct rte_meter_srtcm *m,
 	n_periods = time_diff / m->cir_period;
 	m->time += n_periods * m->cir_period;
 
+	/* Put the tokens overflowing from tc into te bucket */
 	tc = m->tc + n_periods * m->cir_bytes_per_period;
-	if (tc > m->cbs)
+	te = m->te;
+	if (tc > m->cbs) {
+		te += (tc - m->cbs);
+		if (te > m->ebs)
+			te = m->ebs;
 		tc = m->cbs;
-
-	te = m->te + n_periods * m->cir_bytes_per_period;
-	if (te > m->ebs)
-		te = m->ebs;
+	}
 
 	/* Color logic */
 	if ((pkt_color == e_RTE_METER_GREEN) && (tc >= pkt_len)) {
diff --git a/lib/librte_net/Makefile b/lib/librte_net/Makefile
index ad2e482d..20cf6644 100644
--- a/lib/librte_net/Makefile
+++ b/lib/librte_net/Makefile
@@ -31,10 +31,20 @@
 
 include $(RTE_SDK)/mk/rte.vars.mk
 
+LIB = librte_net.a
+
 CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
 
+EXPORT_MAP := rte_net_version.map
+LIBABIVER := 1
+
+SRCS-$(CONFIG_RTE_LIBRTE_NET) := rte_net.c
+
 # install includes
-SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include := rte_ip.h rte_tcp.h rte_udp.h rte_sctp.h rte_icmp.h rte_arp.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include := rte_ip.h rte_tcp.h rte_udp.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include += rte_sctp.h rte_icmp.h rte_arp.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include += rte_ether.h rte_gre.h rte_net.h
 
+DEPDIRS-$(CONFIG_RTE_LIBRTE_NET) += lib/librte_eal lib/librte_mbuf
 
-include $(RTE_SDK)/mk/rte.install.mk
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_ether/rte_ether.h b/lib/librte_net/rte_ether.h
index 1d62d8e5..ff3d0654 100644
--- a/lib/librte_ether/rte_ether.h
+++ b/lib/librte_net/rte_ether.h
@@ -84,7 +84,7 @@ extern "C" {
  * See http://standards.ieee.org/regauth/groupmac/tutorial.html
  */
 struct ether_addr {
-	uint8_t addr_bytes[ETHER_ADDR_LEN]; /**< Address bytes in transmission order */
+	uint8_t addr_bytes[ETHER_ADDR_LEN]; /**< Addr bytes in tx order */
 } __attribute__((__packed__));
 
 #define ETHER_LOCAL_ADMIN_ADDR 0x02 /**< Locally assigned Eth. address. */
@@ -224,7 +224,7 @@ static inline int is_local_admin_ether_addr(const struct ether_addr *ea)
  */
 static inline int is_valid_assigned_ether_addr(const struct ether_addr *ea)
 {
-	return is_unicast_ether_addr(ea) && (! is_zero_ether_addr(ea));
+	return is_unicast_ether_addr(ea) && (!is_zero_ether_addr(ea));
 }
 
 /**
@@ -236,7 +236,7 @@ static inline int is_valid_assigned_ether_addr(const struct ether_addr *ea)
 static inline void eth_random_addr(uint8_t *addr)
 {
 	uint64_t rand = rte_rand();
-	uint8_t *p = (uint8_t*)&rand;
+	uint8_t *p = (uint8_t *)&rand;
 
 	rte_memcpy(addr, p, ETHER_ADDR_LEN);
 	addr[0] &= ~ETHER_GROUP_ADDR;       /* clear multicast bit */
@@ -329,6 +329,7 @@ struct vxlan_hdr {
 #define ETHER_TYPE_ARP  0x0806 /**< Arp Protocol. */
 #define ETHER_TYPE_RARP 0x8035 /**< Reverse Arp Protocol. */
 #define ETHER_TYPE_VLAN 0x8100 /**< IEEE 802.1Q VLAN tagging. */
+#define ETHER_TYPE_QINQ 0x88A8 /**< IEEE 802.1ad QinQ tagging. */
 #define ETHER_TYPE_1588 0x88F7 /**< IEEE 802.1AS 1588 Precise Time Protocol. */
 #define ETHER_TYPE_SLOW 0x8809 /**< Slow protocols (LACP and Marker). */
 #define ETHER_TYPE_TEB  0x6558 /**< Transparent Ethernet Bridging. */
diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.h b/lib/librte_net/rte_gre.h
index e1b967b8..46568ff5 100644
--- a/lib/librte_vhost/vhost_user/virtio-net-user.h
+++ b/lib/librte_net/rte_gre.h
@@ -1,8 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *   All rights reserved.
+ *   Copyright 2016 6WIND S.A.
  *
  *   Redistribution and use in source and binary forms, with or without
  *   modification, are permitted provided that the following conditions
@@ -31,32 +30,42 @@
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#ifndef _VIRTIO_NET_USER_H
-#define _VIRTIO_NET_USER_H
+#ifndef _RTE_GRE_H_
+#define _RTE_GRE_H_
 
-#include "vhost-net.h"
-#include "vhost-net-user.h"
+#include <stdint.h>
+#include <rte_byteorder.h>
 
-#define VHOST_USER_PROTOCOL_F_MQ	0
-#define VHOST_USER_PROTOCOL_F_LOG_SHMFD	1
-#define VHOST_USER_PROTOCOL_F_RARP	2
-
-#define VHOST_USER_PROTOCOL_FEATURES	((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \
-					 (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |\
-					 (1ULL << VHOST_USER_PROTOCOL_F_RARP))
-
-int user_set_mem_table(int, struct VhostUserMsg *);
-
-void user_set_vring_call(int, struct VhostUserMsg *);
-
-void user_set_vring_kick(int, struct VhostUserMsg *);
-
-void user_set_protocol_features(int vid, uint64_t protocol_features);
-int user_set_log_base(int vid, struct VhostUserMsg *);
-int user_send_rarp(int vid, struct VhostUserMsg *);
-
-int user_get_vring_base(int, struct vhost_vring_state *);
+#ifdef __cplusplus
+extern "C" {
+#endif
 
-int user_set_vring_enable(int vid, struct vhost_vring_state *state);
+/**
+ * GRE Header
+ */
+struct gre_hdr {
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+	uint16_t res2:4; /**< Reserved */
+	uint16_t s:1;    /**< Sequence Number Present bit */
+	uint16_t k:1;    /**< Key Present bit */
+	uint16_t res1:1; /**< Reserved */
+	uint16_t c:1;    /**< Checksum Present bit */
+	uint16_t ver:3;  /**< Version Number */
+	uint16_t res3:5; /**< Reserved */
+#elif RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+	uint16_t c:1;    /**< Checksum Present bit */
+	uint16_t res1:1; /**< Reserved */
+	uint16_t k:1;    /**< Key Present bit */
+	uint16_t s:1;    /**< Sequence Number Present bit */
+	uint16_t res2:4; /**< Reserved */
+	uint16_t res3:5; /**< Reserved */
+	uint16_t ver:3;  /**< Version Number */
+#endif
+	uint16_t proto;  /**< Protocol Type */
+} __attribute__((__packed__));
 
+#ifdef __cplusplus
+}
 #endif
+
+#endif /* RTE_GRE_H_ */
diff --git a/lib/librte_net/rte_ip.h b/lib/librte_net/rte_ip.h
index 5b7554ab..4491b86e 100644
--- a/lib/librte_net/rte_ip.h
+++ b/lib/librte_net/rte_ip.h
@@ -230,6 +230,77 @@ rte_raw_cksum(const void *buf, size_t len)
 }
 
 /**
+ * Compute the raw (non complemented) checksum of a packet.
+ *
+ * @param m
+ *   The pointer to the mbuf.
+ * @param off
+ *   The offset in bytes to start the checksum.
+ * @param len
+ *   The length in bytes of the data to ckecksum.
+ * @param cksum
+ *   A pointer to the checksum, filled on success.
+ * @return
+ *   0 on success, -1 on error (bad length or offset).
+ */
+static inline int
+rte_raw_cksum_mbuf(const struct rte_mbuf *m, uint32_t off, uint32_t len,
+	uint16_t *cksum)
+{
+	const struct rte_mbuf *seg;
+	const char *buf;
+	uint32_t sum, tmp;
+	uint32_t seglen, done;
+
+	/* easy case: all data in the first segment */
+	if (off + len <= rte_pktmbuf_data_len(m)) {
+		*cksum = rte_raw_cksum(rte_pktmbuf_mtod_offset(m,
+				const char *, off), len);
+		return 0;
+	}
+
+	if (unlikely(off + len > rte_pktmbuf_pkt_len(m)))
+		return -1; /* invalid params, return a dummy value */
+
+	/* else browse the segment to find offset */
+	seglen = 0;
+	for (seg = m; seg != NULL; seg = seg->next) {
+		seglen = rte_pktmbuf_data_len(seg);
+		if (off < seglen)
+			break;
+		off -= seglen;
+	}
+	seglen -= off;
+	buf = rte_pktmbuf_mtod_offset(seg, const char *, off);
+	if (seglen >= len) {
+		/* all in one segment */
+		*cksum = rte_raw_cksum(buf, len);
+		return 0;
+	}
+
+	/* hard case: process checksum of several segments */
+	sum = 0;
+	done = 0;
+	for (;;) {
+		tmp = __rte_raw_cksum(buf, seglen, 0);
+		if (done & 1)
+			tmp = rte_bswap16(tmp);
+		sum += tmp;
+		done += seglen;
+		if (done == len)
+			break;
+		seg = seg->next;
+		buf = rte_pktmbuf_mtod(seg, const char *);
+		seglen = rte_pktmbuf_data_len(seg);
+		if (seglen > len - done)
+			seglen = len - done;
+	}
+
+	*cksum = __rte_raw_cksum_reduce(sum);
+	return 0;
+}
+
+/**
  * Process the IPv4 checksum of an IPv4 header.
  *
  * The checksum field must be set to 0 by the caller.
diff --git a/lib/librte_net/rte_net.c b/lib/librte_net/rte_net.c
new file mode 100644
index 00000000..a8c7aff9
--- /dev/null
+++ b/lib/librte_net/rte_net.c
@@ -0,0 +1,517 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2016 6WIND S.A.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of 6WIND S.A. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+
+#include <rte_mbuf.h>
+#include <rte_mbuf_ptype.h>
+#include <rte_byteorder.h>
+#include <rte_ether.h>
+#include <rte_ip.h>
+#include <rte_tcp.h>
+#include <rte_udp.h>
+#include <rte_sctp.h>
+#include <rte_gre.h>
+#include <rte_net.h>
+
+/* get l3 packet type from ip6 next protocol */
+static uint32_t
+ptype_l3_ip6(uint8_t ip6_proto)
+{
+	static const uint32_t ip6_ext_proto_map[256] = {
+		[IPPROTO_HOPOPTS] = RTE_PTYPE_L3_IPV6_EXT - RTE_PTYPE_L3_IPV6,
+		[IPPROTO_ROUTING] = RTE_PTYPE_L3_IPV6_EXT - RTE_PTYPE_L3_IPV6,
+		[IPPROTO_FRAGMENT] = RTE_PTYPE_L3_IPV6_EXT - RTE_PTYPE_L3_IPV6,
+		[IPPROTO_ESP] = RTE_PTYPE_L3_IPV6_EXT - RTE_PTYPE_L3_IPV6,
+		[IPPROTO_AH] = RTE_PTYPE_L3_IPV6_EXT - RTE_PTYPE_L3_IPV6,
+		[IPPROTO_DSTOPTS] = RTE_PTYPE_L3_IPV6_EXT - RTE_PTYPE_L3_IPV6,
+	};
+
+	return RTE_PTYPE_L3_IPV6 + ip6_ext_proto_map[ip6_proto];
+}
+
+/* get l3 packet type from ip version and header length */
+static uint32_t
+ptype_l3_ip(uint8_t ipv_ihl)
+{
+	static const uint32_t ptype_l3_ip_proto_map[256] = {
+		[0x45] = RTE_PTYPE_L3_IPV4,
+		[0x46] = RTE_PTYPE_L3_IPV4_EXT,
+		[0x47] = RTE_PTYPE_L3_IPV4_EXT,
+		[0x48] = RTE_PTYPE_L3_IPV4_EXT,
+		[0x49] = RTE_PTYPE_L3_IPV4_EXT,
+		[0x4A] = RTE_PTYPE_L3_IPV4_EXT,
+		[0x4B] = RTE_PTYPE_L3_IPV4_EXT,
+		[0x4C] = RTE_PTYPE_L3_IPV4_EXT,
+		[0x4D] = RTE_PTYPE_L3_IPV4_EXT,
+		[0x4E] = RTE_PTYPE_L3_IPV4_EXT,
+		[0x4F] = RTE_PTYPE_L3_IPV4_EXT,
+	};
+
+	return ptype_l3_ip_proto_map[ipv_ihl];
+}
+
+/* get l4 packet type from proto */
+static uint32_t
+ptype_l4(uint8_t proto)
+{
+	static const uint32_t ptype_l4_proto[256] = {
+		[IPPROTO_UDP] = RTE_PTYPE_L4_UDP,
+		[IPPROTO_TCP] = RTE_PTYPE_L4_TCP,
+		[IPPROTO_SCTP] = RTE_PTYPE_L4_SCTP,
+	};
+
+	return ptype_l4_proto[proto];
+}
+
+/* get inner l3 packet type from ip6 next protocol */
+static uint32_t
+ptype_inner_l3_ip6(uint8_t ip6_proto)
+{
+	static const uint32_t ptype_inner_ip6_ext_proto_map[256] = {
+		[IPPROTO_HOPOPTS] = RTE_PTYPE_INNER_L3_IPV6_EXT -
+			RTE_PTYPE_INNER_L3_IPV6,
+		[IPPROTO_ROUTING] = RTE_PTYPE_INNER_L3_IPV6_EXT -
+			RTE_PTYPE_INNER_L3_IPV6,
+		[IPPROTO_FRAGMENT] = RTE_PTYPE_INNER_L3_IPV6_EXT -
+			RTE_PTYPE_INNER_L3_IPV6,
+		[IPPROTO_ESP] = RTE_PTYPE_INNER_L3_IPV6_EXT -
+			RTE_PTYPE_INNER_L3_IPV6,
+		[IPPROTO_AH] = RTE_PTYPE_INNER_L3_IPV6_EXT -
+			RTE_PTYPE_INNER_L3_IPV6,
+		[IPPROTO_DSTOPTS] = RTE_PTYPE_INNER_L3_IPV6_EXT -
+			RTE_PTYPE_INNER_L3_IPV6,
+	};
+
+	return RTE_PTYPE_INNER_L3_IPV6 +
+		ptype_inner_ip6_ext_proto_map[ip6_proto];
+}
+
+/* get inner l3 packet type from ip version and header length */
+static uint32_t
+ptype_inner_l3_ip(uint8_t ipv_ihl)
+{
+	static const uint32_t ptype_inner_l3_ip_proto_map[256] = {
+		[0x45] = RTE_PTYPE_INNER_L3_IPV4,
+		[0x46] = RTE_PTYPE_INNER_L3_IPV4_EXT,
+		[0x47] = RTE_PTYPE_INNER_L3_IPV4_EXT,
+		[0x48] = RTE_PTYPE_INNER_L3_IPV4_EXT,
+		[0x49] = RTE_PTYPE_INNER_L3_IPV4_EXT,
+		[0x4A] = RTE_PTYPE_INNER_L3_IPV4_EXT,
+		[0x4B] = RTE_PTYPE_INNER_L3_IPV4_EXT,
+		[0x4C] = RTE_PTYPE_INNER_L3_IPV4_EXT,
+		[0x4D] = RTE_PTYPE_INNER_L3_IPV4_EXT,
+		[0x4E] = RTE_PTYPE_INNER_L3_IPV4_EXT,
+		[0x4F] = RTE_PTYPE_INNER_L3_IPV4_EXT,
+	};
+
+	return ptype_inner_l3_ip_proto_map[ipv_ihl];
+}
+
+/* get inner l4 packet type from proto */
+static uint32_t
+ptype_inner_l4(uint8_t proto)
+{
+	static const uint32_t ptype_inner_l4_proto[256] = {
+		[IPPROTO_UDP] = RTE_PTYPE_INNER_L4_UDP,
+		[IPPROTO_TCP] = RTE_PTYPE_INNER_L4_TCP,
+		[IPPROTO_SCTP] = RTE_PTYPE_INNER_L4_SCTP,
+	};
+
+	return ptype_inner_l4_proto[proto];
+}
+
+/* get the tunnel packet type if any, update proto and off. */
+static uint32_t
+ptype_tunnel(uint16_t *proto, const struct rte_mbuf *m,
+	uint32_t *off)
+{
+	switch (*proto) {
+	case IPPROTO_GRE: {
+		static const uint8_t opt_len[16] = {
+			[0x0] = 4,
+			[0x1] = 8,
+			[0x2] = 8,
+			[0x8] = 8,
+			[0x3] = 12,
+			[0x9] = 12,
+			[0xa] = 12,
+			[0xb] = 16,
+		};
+		const struct gre_hdr *gh;
+		struct gre_hdr gh_copy;
+		uint16_t flags;
+
+		gh = rte_pktmbuf_read(m, *off, sizeof(*gh), &gh_copy);
+		if (unlikely(gh == NULL))
+			return 0;
+
+		flags = rte_be_to_cpu_16(*(const uint16_t *)gh);
+		flags >>= 12;
+		if (opt_len[flags] == 0)
+			return 0;
+
+		*off += opt_len[flags];
+		*proto = gh->proto;
+		if (*proto == rte_cpu_to_be_16(ETHER_TYPE_TEB))
+			return RTE_PTYPE_TUNNEL_NVGRE;
+		else
+			return RTE_PTYPE_TUNNEL_GRE;
+	}
+	case IPPROTO_IPIP:
+		*proto = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
+		return RTE_PTYPE_TUNNEL_IP;
+	case IPPROTO_IPV6:
+		*proto = rte_cpu_to_be_16(ETHER_TYPE_IPv6);
+		return RTE_PTYPE_TUNNEL_IP; /* IP is also valid for IPv6 */
+	default:
+		return 0;
+	}
+}
+
+/* get the ipv4 header length */
+static uint8_t
+ip4_hlen(const struct ipv4_hdr *hdr)
+{
+	return (hdr->version_ihl & 0xf) * 4;
+}
+
+/* parse ipv6 extended headers, update offset and return next proto */
+static uint16_t
+skip_ip6_ext(uint16_t proto, const struct rte_mbuf *m, uint32_t *off,
+	int *frag)
+{
+	struct ext_hdr {
+		uint8_t next_hdr;
+		uint8_t len;
+	};
+	const struct ext_hdr *xh;
+	struct ext_hdr xh_copy;
+	unsigned int i;
+
+	*frag = 0;
+
+#define MAX_EXT_HDRS 5
+	for (i = 0; i < MAX_EXT_HDRS; i++) {
+		switch (proto) {
+		case IPPROTO_HOPOPTS:
+		case IPPROTO_ROUTING:
+		case IPPROTO_DSTOPTS:
+			xh = rte_pktmbuf_read(m, *off, sizeof(*xh),
+				&xh_copy);
+			if (xh == NULL)
+				return 0;
+			*off += (xh->len + 1) * 8;
+			proto = xh->next_hdr;
+			break;
+		case IPPROTO_FRAGMENT:
+			xh = rte_pktmbuf_read(m, *off, sizeof(*xh),
+				&xh_copy);
+			if (xh == NULL)
+				return 0;
+			*off += 8;
+			proto = xh->next_hdr;
+			*frag = 1;
+			return proto; /* this is always the last ext hdr */
+		case IPPROTO_NONE:
+			return 0;
+		default:
+			return proto;
+		}
+	}
+	return 0;
+}
+
+/* parse mbuf data to get packet type */
+uint32_t rte_net_get_ptype(const struct rte_mbuf *m,
+	struct rte_net_hdr_lens *hdr_lens, uint32_t layers)
+{
+	struct rte_net_hdr_lens local_hdr_lens;
+	const struct ether_hdr *eh;
+	struct ether_hdr eh_copy;
+	uint32_t pkt_type = RTE_PTYPE_L2_ETHER;
+	uint32_t off = 0;
+	uint16_t proto;
+
+	if (hdr_lens == NULL)
+		hdr_lens = &local_hdr_lens;
+
+	eh = rte_pktmbuf_read(m, off, sizeof(*eh), &eh_copy);
+	if (unlikely(eh == NULL))
+		return 0;
+	proto = eh->ether_type;
+	off = sizeof(*eh);
+	hdr_lens->l2_len = off;
+
+	if ((layers & RTE_PTYPE_L2_MASK) == 0)
+		return 0;
+
+	if (proto == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
+		goto l3; /* fast path if packet is IPv4 */
+
+	if (proto == rte_cpu_to_be_16(ETHER_TYPE_VLAN)) {
+		const struct vlan_hdr *vh;
+		struct vlan_hdr vh_copy;
+
+		pkt_type = RTE_PTYPE_L2_ETHER_VLAN;
+		vh = rte_pktmbuf_read(m, off, sizeof(*vh), &vh_copy);
+		if (unlikely(vh == NULL))
+			return pkt_type;
+		off += sizeof(*vh);
+		hdr_lens->l2_len += sizeof(*vh);
+		proto = vh->eth_proto;
+	} else if (proto == rte_cpu_to_be_16(ETHER_TYPE_QINQ)) {
+		const struct vlan_hdr *vh;
+		struct vlan_hdr vh_copy;
+
+		pkt_type = RTE_PTYPE_L2_ETHER_QINQ;
+		vh = rte_pktmbuf_read(m, off + sizeof(*vh), sizeof(*vh),
+			&vh_copy);
+		if (unlikely(vh == NULL))
+			return pkt_type;
+		off += 2 * sizeof(*vh);
+		hdr_lens->l2_len += 2 * sizeof(*vh);
+		proto = vh->eth_proto;
+	}
+
+ l3:
+	if ((layers & RTE_PTYPE_L3_MASK) == 0)
+		return pkt_type;
+
+	if (proto == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
+		const struct ipv4_hdr *ip4h;
+		struct ipv4_hdr ip4h_copy;
+
+		ip4h = rte_pktmbuf_read(m, off, sizeof(*ip4h), &ip4h_copy);
+		if (unlikely(ip4h == NULL))
+			return pkt_type;
+
+		pkt_type |= ptype_l3_ip(ip4h->version_ihl);
+		hdr_lens->l3_len = ip4_hlen(ip4h);
+		off += hdr_lens->l3_len;
+
+		if ((layers & RTE_PTYPE_L4_MASK) == 0)
+			return pkt_type;
+
+		if (ip4h->fragment_offset & rte_cpu_to_be_16(
+				IPV4_HDR_OFFSET_MASK | IPV4_HDR_MF_FLAG)) {
+			pkt_type |= RTE_PTYPE_L4_FRAG;
+			hdr_lens->l4_len = 0;
+			return pkt_type;
+		}
+		proto = ip4h->next_proto_id;
+		pkt_type |= ptype_l4(proto);
+	} else if (proto == rte_cpu_to_be_16(ETHER_TYPE_IPv6)) {
+		const struct ipv6_hdr *ip6h;
+		struct ipv6_hdr ip6h_copy;
+		int frag = 0;
+
+		ip6h = rte_pktmbuf_read(m, off, sizeof(*ip6h), &ip6h_copy);
+		if (unlikely(ip6h == NULL))
+			return pkt_type;
+
+		proto = ip6h->proto;
+		hdr_lens->l3_len = sizeof(*ip6h);
+		off += hdr_lens->l3_len;
+		pkt_type |= ptype_l3_ip6(proto);
+		if ((pkt_type & RTE_PTYPE_L3_MASK) == RTE_PTYPE_L3_IPV6_EXT) {
+			proto = skip_ip6_ext(proto, m, &off, &frag);
+			hdr_lens->l3_len = off - hdr_lens->l2_len;
+		}
+		if (proto == 0)
+			return pkt_type;
+
+		if ((layers & RTE_PTYPE_L4_MASK) == 0)
+			return pkt_type;
+
+		if (frag) {
+			pkt_type |= RTE_PTYPE_L4_FRAG;
+			hdr_lens->l4_len = 0;
+			return pkt_type;
+		}
+		pkt_type |= ptype_l4(proto);
+	}
+
+	if ((pkt_type & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP) {
+		hdr_lens->l4_len = sizeof(struct udp_hdr);
+		return pkt_type;
+	} else if ((pkt_type & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP) {
+		const struct tcp_hdr *th;
+		struct tcp_hdr th_copy;
+
+		th = rte_pktmbuf_read(m, off, sizeof(*th), &th_copy);
+		if (unlikely(th == NULL))
+			return pkt_type & (RTE_PTYPE_L2_MASK |
+				RTE_PTYPE_L3_MASK);
+		hdr_lens->l4_len = (th->data_off & 0xf0) >> 2;
+		return pkt_type;
+	} else if ((pkt_type & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP) {
+		hdr_lens->l4_len = sizeof(struct sctp_hdr);
+		return pkt_type;
+	} else {
+		uint32_t prev_off = off;
+
+		hdr_lens->l4_len = 0;
+
+		if ((layers & RTE_PTYPE_TUNNEL_MASK) == 0)
+			return pkt_type;
+
+		pkt_type |= ptype_tunnel(&proto, m, &off);
+		hdr_lens->tunnel_len = off - prev_off;
+	}
+
+	/* same job for inner header: we need to duplicate the code
+	 * because the packet types do not have the same value.
+	 */
+	if ((layers & RTE_PTYPE_INNER_L2_MASK) == 0)
+		return pkt_type;
+
+	if (proto == rte_cpu_to_be_16(ETHER_TYPE_TEB)) {
+		eh = rte_pktmbuf_read(m, off, sizeof(*eh), &eh_copy);
+		if (unlikely(eh == NULL))
+			return pkt_type;
+		pkt_type |= RTE_PTYPE_INNER_L2_ETHER;
+		proto = eh->ether_type;
+		off += sizeof(*eh);
+		hdr_lens->inner_l2_len = sizeof(*eh);
+	}
+
+	if (proto == rte_cpu_to_be_16(ETHER_TYPE_VLAN)) {
+		const struct vlan_hdr *vh;
+		struct vlan_hdr vh_copy;
+
+		pkt_type &= ~RTE_PTYPE_INNER_L2_MASK;
+		pkt_type |= RTE_PTYPE_INNER_L2_ETHER_VLAN;
+		vh = rte_pktmbuf_read(m, off, sizeof(*vh), &vh_copy);
+		if (unlikely(vh == NULL))
+			return pkt_type;
+		off += sizeof(*vh);
+		hdr_lens->inner_l2_len += sizeof(*vh);
+		proto = vh->eth_proto;
+	} else if (proto == rte_cpu_to_be_16(ETHER_TYPE_QINQ)) {
+		const struct vlan_hdr *vh;
+		struct vlan_hdr vh_copy;
+
+		pkt_type &= ~RTE_PTYPE_INNER_L2_MASK;
+		pkt_type |= RTE_PTYPE_INNER_L2_ETHER_QINQ;
+		vh = rte_pktmbuf_read(m, off + sizeof(*vh), sizeof(*vh),
+			&vh_copy);
+		if (unlikely(vh == NULL))
+			return pkt_type;
+		off += 2 * sizeof(*vh);
+		hdr_lens->inner_l2_len += 2 * sizeof(*vh);
+		proto = vh->eth_proto;
+	}
+
+	if ((layers & RTE_PTYPE_INNER_L3_MASK) == 0)
+		return pkt_type;
+
+	if (proto == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
+		const struct ipv4_hdr *ip4h;
+		struct ipv4_hdr ip4h_copy;
+
+		ip4h = rte_pktmbuf_read(m, off, sizeof(*ip4h), &ip4h_copy);
+		if (unlikely(ip4h == NULL))
+			return pkt_type;
+
+		pkt_type |= ptype_inner_l3_ip(ip4h->version_ihl);
+		hdr_lens->inner_l3_len = ip4_hlen(ip4h);
+		off += hdr_lens->inner_l3_len;
+
+		if ((layers & RTE_PTYPE_INNER_L4_MASK) == 0)
+			return pkt_type;
+		if (ip4h->fragment_offset &
+				rte_cpu_to_be_16(IPV4_HDR_OFFSET_MASK |
+					IPV4_HDR_MF_FLAG)) {
+			pkt_type |= RTE_PTYPE_INNER_L4_FRAG;
+			hdr_lens->inner_l4_len = 0;
+			return pkt_type;
+		}
+		proto = ip4h->next_proto_id;
+		pkt_type |= ptype_inner_l4(proto);
+	} else if (proto == rte_cpu_to_be_16(ETHER_TYPE_IPv6)) {
+		const struct ipv6_hdr *ip6h;
+		struct ipv6_hdr ip6h_copy;
+		int frag = 0;
+
+		ip6h = rte_pktmbuf_read(m, off, sizeof(*ip6h), &ip6h_copy);
+		if (unlikely(ip6h == NULL))
+			return pkt_type;
+
+		proto = ip6h->proto;
+		hdr_lens->inner_l3_len = sizeof(*ip6h);
+		off += hdr_lens->inner_l3_len;
+		pkt_type |= ptype_inner_l3_ip6(proto);
+		if ((pkt_type & RTE_PTYPE_INNER_L3_MASK) ==
+				RTE_PTYPE_INNER_L3_IPV6_EXT) {
+			uint32_t prev_off;
+
+			prev_off = off;
+			proto = skip_ip6_ext(proto, m, &off, &frag);
+			hdr_lens->inner_l3_len += off - prev_off;
+		}
+		if (proto == 0)
+			return pkt_type;
+
+		if ((layers & RTE_PTYPE_INNER_L4_MASK) == 0)
+			return pkt_type;
+
+		if (frag) {
+			pkt_type |= RTE_PTYPE_INNER_L4_FRAG;
+			hdr_lens->inner_l4_len = 0;
+			return pkt_type;
+		}
+		pkt_type |= ptype_inner_l4(proto);
+	}
+
+	if ((pkt_type & RTE_PTYPE_INNER_L4_MASK) == RTE_PTYPE_INNER_L4_UDP) {
+		hdr_lens->inner_l4_len = sizeof(struct udp_hdr);
+	} else if ((pkt_type & RTE_PTYPE_INNER_L4_MASK) ==
+			RTE_PTYPE_INNER_L4_TCP) {
+		const struct tcp_hdr *th;
+		struct tcp_hdr th_copy;
+
+		th = rte_pktmbuf_read(m, off, sizeof(*th), &th_copy);
+		if (unlikely(th == NULL))
+			return pkt_type & (RTE_PTYPE_INNER_L2_MASK |
+				RTE_PTYPE_INNER_L3_MASK);
+		hdr_lens->inner_l4_len = (th->data_off & 0xf0) >> 2;
+	} else if ((pkt_type & RTE_PTYPE_INNER_L4_MASK) ==
+			RTE_PTYPE_INNER_L4_SCTP) {
+		hdr_lens->inner_l4_len = sizeof(struct sctp_hdr);
+	} else {
+		hdr_lens->inner_l4_len = 0;
+	}
+
+	return pkt_type;
+}
diff --git a/lib/librte_vhost/vhost_cuse/eventfd_copy.h b/lib/librte_net/rte_net.h
index 5f446ca0..d4156aea 100644
--- a/lib/librte_vhost/vhost_cuse/eventfd_copy.h
+++ b/lib/librte_net/rte_net.h
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   Copyright 2016 6WIND S.A.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -30,16 +30,65 @@
  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
-#ifndef _EVENTFD_H
-#define _EVENTFD_H
 
-int
-eventfd_init(void);
+#ifndef _RTE_NET_PTYPE_H_
+#define _RTE_NET_PTYPE_H_
 
-int
-eventfd_free(void);
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Structure containing header lengths associated to a packet, filled
+ * by rte_net_get_ptype().
+ */
+struct rte_net_hdr_lens {
+	uint8_t l2_len;
+	uint8_t l3_len;
+	uint8_t l4_len;
+	uint8_t tunnel_len;
+	uint8_t inner_l2_len;
+	uint8_t inner_l3_len;
+	uint8_t inner_l4_len;
+};
 
-int
-eventfd_copy(int target_fd, int target_pid);
+/**
+ * Parse an Ethernet packet to get its packet type.
+ *
+ * This function parses the network headers in mbuf data and return its
+ * packet type.
+ *
+ * If it is provided by the user, it also fills a rte_net_hdr_lens
+ * structure that contains the lengths of the parsed network
+ * headers. Each length field is valid only if the associated packet
+ * type is set. For instance, hdr_lens->l2_len is valid only if
+ * (retval & RTE_PTYPE_L2_MASK) != RTE_PTYPE_UNKNOWN.
+ *
+ * Supported packet types are:
+ *   L2: Ether, Vlan, QinQ
+ *   L3: IPv4, IPv6
+ *   L4: TCP, UDP, SCTP
+ *   Tunnels: IPv4, IPv6, Gre, Nvgre
+ *
+ * @param m
+ *   The packet mbuf to be parsed.
+ * @param hdr_lens
+ *   A pointer to a structure where the header lengths will be returned,
+ *   or NULL.
+ * @param layers
+ *   List of layers to parse. The function will stop at the first
+ *   empty layer. Examples:
+ *   - To parse all known layers, use RTE_PTYPE_ALL_MASK.
+ *   - To parse only L2 and L3, use RTE_PTYPE_L2_MASK | RTE_PTYPE_L3_MASK
+ * @return
+ *   The packet type of the packet.
+ */
+uint32_t rte_net_get_ptype(const struct rte_mbuf *m,
+	struct rte_net_hdr_lens *hdr_lens, uint32_t layers);
 
+#ifdef __cplusplus
+}
 #endif
+
+
+#endif /* _RTE_NET_PTYPE_H_ */
diff --git a/lib/librte_net/rte_net_version.map b/lib/librte_net/rte_net_version.map
new file mode 100644
index 00000000..3b15e651
--- /dev/null
+++ b/lib/librte_net/rte_net_version.map
@@ -0,0 +1,6 @@
+DPDK_16.11 {
+	global:
+	rte_net_get_ptype;
+
+	local: *;
+};
diff --git a/lib/librte_pdump/rte_pdump.c b/lib/librte_pdump/rte_pdump.c
index ea5ccd98..59686837 100644
--- a/lib/librte_pdump/rte_pdump.c
+++ b/lib/librte_pdump/rte_pdump.c
@@ -226,29 +226,6 @@ pdump_tx(uint8_t port __rte_unused, uint16_t qidx __rte_unused,
 }
 
 static int
-pdump_get_dombdf(char *device_id, char *domBDF, size_t len)
-{
-	int ret;
-	struct rte_pci_addr dev_addr = {0};
-
-	/* identify if device_id is pci address or name */
-	ret = eal_parse_pci_DomBDF(device_id, &dev_addr);
-	if (ret < 0)
-		return -1;
-
-	if (dev_addr.domain)
-		ret = snprintf(domBDF, len, "%u:%u:%u.%u", dev_addr.domain,
-				dev_addr.bus, dev_addr.devid,
-				dev_addr.function);
-	else
-		ret = snprintf(domBDF, len, "%u:%u.%u", dev_addr.bus,
-				dev_addr.devid,
-				dev_addr.function);
-
-	return ret;
-}
-
-static int
 pdump_regitser_rx_callbacks(uint16_t end_q, uint8_t port, uint16_t queue,
 				struct rte_ring *ring, struct rte_mempool *mp,
 				uint16_t operation)
@@ -292,7 +269,7 @@ pdump_regitser_rx_callbacks(uint16_t end_q, uint8_t port, uint16_t queue,
 			if (ret < 0) {
 				RTE_LOG(ERR, PDUMP,
 					"failed to remove rx callback, errno=%d\n",
-					rte_errno);
+					-ret);
 				return ret;
 			}
 			cbs->cb = NULL;
@@ -347,7 +324,7 @@ pdump_regitser_tx_callbacks(uint16_t end_q, uint8_t port, uint16_t queue,
 			if (ret < 0) {
 				RTE_LOG(ERR, PDUMP,
 					"failed to remove tx callback, errno=%d\n",
-					rte_errno);
+					-ret);
 				return ret;
 			}
 			cbs->cb = NULL;
@@ -885,7 +862,6 @@ rte_pdump_enable_by_deviceid(char *device_id, uint16_t queue,
 				void *filter)
 {
 	int ret = 0;
-	char domBDF[DEVICE_ID_SIZE];
 
 	ret = pdump_validate_ring_mp(ring, mp);
 	if (ret < 0)
@@ -894,11 +870,7 @@ rte_pdump_enable_by_deviceid(char *device_id, uint16_t queue,
 	if (ret < 0)
 		return ret;
 
-	if (pdump_get_dombdf(device_id, domBDF, sizeof(domBDF)) > 0)
-		ret = pdump_prepare_client_request(domBDF, queue, flags,
-						ENABLE, ring, mp, filter);
-	else
-		ret = pdump_prepare_client_request(device_id, queue, flags,
+	ret = pdump_prepare_client_request(device_id, queue, flags,
 						ENABLE, ring, mp, filter);
 
 	return ret;
@@ -928,17 +900,12 @@ rte_pdump_disable_by_deviceid(char *device_id, uint16_t queue,
 				uint32_t flags)
 {
 	int ret = 0;
-	char domBDF[DEVICE_ID_SIZE];
 
 	ret = pdump_validate_flags(flags);
 	if (ret < 0)
 		return ret;
 
-	if (pdump_get_dombdf(device_id, domBDF, sizeof(domBDF)) > 0)
-		ret = pdump_prepare_client_request(domBDF, queue, flags,
-						DISABLE, NULL, NULL, NULL);
-	else
-		ret = pdump_prepare_client_request(device_id, queue, flags,
+	ret = pdump_prepare_client_request(device_id, queue, flags,
 						DISABLE, NULL, NULL, NULL);
 
 	return ret;
diff --git a/lib/librte_pdump/rte_pdump.h b/lib/librte_pdump/rte_pdump.h
index b5f4e2f3..924b8043 100644
--- a/lib/librte_pdump/rte_pdump.h
+++ b/lib/librte_pdump/rte_pdump.h
@@ -41,6 +41,10 @@
  * packet dump library to provide packet capturing support on dpdk.
  */
 
+#include <stdint.h>
+#include <rte_mempool.h>
+#include <rte_ring.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
diff --git a/lib/librte_pipeline/rte_pipeline.h b/lib/librte_pipeline/rte_pipeline.h
index 84d18025..f3663483 100644
--- a/lib/librte_pipeline/rte_pipeline.h
+++ b/lib/librte_pipeline/rte_pipeline.h
@@ -87,6 +87,7 @@ extern "C" {
 
 #include <rte_port.h>
 #include <rte_table.h>
+#include <rte_common.h>
 
 struct rte_mbuf;
 
@@ -244,6 +245,7 @@ struct rte_pipeline_table_entry {
 	/** Reserved action */
 	enum rte_pipeline_action action;
 
+	RTE_STD_C11
 	union {
 		/** Output port ID (meta-data for "Send packet to output port"
 		action) */
@@ -252,7 +254,7 @@ struct rte_pipeline_table_entry {
 		uint32_t table_id;
 	};
 	/** Start of table entry area for user defined actions and meta-data */
-	uint8_t action_data[0];
+	__extension__ uint8_t action_data[0];
 };
 
 /**
diff --git a/lib/librte_port/Makefile b/lib/librte_port/Makefile
index 3d84a0e4..44fa7352 100644
--- a/lib/librte_port/Makefile
+++ b/lib/librte_port/Makefile
@@ -56,6 +56,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_PORT) += rte_port_frag.c
 SRCS-$(CONFIG_RTE_LIBRTE_PORT) += rte_port_ras.c
 endif
 SRCS-$(CONFIG_RTE_LIBRTE_PORT) += rte_port_sched.c
+SRCS-$(CONFIG_RTE_LIBRTE_PORT) += rte_port_fd.c
 ifeq ($(CONFIG_RTE_LIBRTE_KNI),y)
 SRCS-$(CONFIG_RTE_LIBRTE_PORT) += rte_port_kni.c
 endif
@@ -70,6 +71,7 @@ SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port_frag.h
 SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port_ras.h
 endif
 SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port_sched.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port_fd.h
 ifeq ($(CONFIG_RTE_LIBRTE_KNI),y)
 SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port_kni.h
 endif
diff --git a/lib/librte_port/rte_port_fd.c b/lib/librte_port/rte_port_fd.c
new file mode 100644
index 00000000..0d640f34
--- /dev/null
+++ b/lib/librte_port/rte_port_fd.c
@@ -0,0 +1,552 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <string.h>
+#include <stdint.h>
+#include <unistd.h>
+
+#include <rte_mbuf.h>
+#include <rte_malloc.h>
+
+#include "rte_port_fd.h"
+
+/*
+ * Port FD Reader
+ */
+#ifdef RTE_PORT_STATS_COLLECT
+
+#define RTE_PORT_FD_READER_STATS_PKTS_IN_ADD(port, val) \
+	do { port->stats.n_pkts_in += val; } while (0)
+#define RTE_PORT_FD_READER_STATS_PKTS_DROP_ADD(port, val) \
+	do { port->stats.n_pkts_drop += val; } while (0)
+
+#else
+
+#define RTE_PORT_FD_READER_STATS_PKTS_IN_ADD(port, val)
+#define RTE_PORT_FD_READER_STATS_PKTS_DROP_ADD(port, val)
+
+#endif
+
+struct rte_port_fd_reader {
+	struct rte_port_in_stats stats;
+	int fd;
+	uint32_t mtu;
+	struct rte_mempool *mempool;
+};
+
+static void *
+rte_port_fd_reader_create(void *params, int socket_id)
+{
+	struct rte_port_fd_reader_params *conf =
+			(struct rte_port_fd_reader_params *) params;
+	struct rte_port_fd_reader *port;
+
+	/* Check input parameters */
+	if (conf == NULL) {
+		RTE_LOG(ERR, PORT, "%s: params is NULL\n", __func__);
+		return NULL;
+	}
+	if (conf->fd < 0) {
+		RTE_LOG(ERR, PORT, "%s: Invalid file descriptor\n", __func__);
+		return NULL;
+	}
+	if (conf->mtu == 0) {
+		RTE_LOG(ERR, PORT, "%s: Invalid MTU\n", __func__);
+		return NULL;
+	}
+	if (conf->mempool == NULL) {
+		RTE_LOG(ERR, PORT, "%s: Invalid mempool\n", __func__);
+		return NULL;
+	}
+
+	/* Memory allocation */
+	port = rte_zmalloc_socket("PORT", sizeof(*port),
+			RTE_CACHE_LINE_SIZE, socket_id);
+	if (port == NULL) {
+		RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__);
+		return NULL;
+	}
+
+	/* Initialization */
+	port->fd = conf->fd;
+	port->mtu = conf->mtu;
+	port->mempool = conf->mempool;
+
+	return port;
+}
+
+static int
+rte_port_fd_reader_rx(void *port, struct rte_mbuf **pkts, uint32_t n_pkts)
+{
+	struct rte_port_fd_reader *p = (struct rte_port_fd_reader *) port;
+	uint32_t i;
+
+	if (rte_mempool_get_bulk(p->mempool, (void **) pkts, n_pkts) != 0)
+		return 0;
+
+	for (i = 0; i < n_pkts; i++) {
+		rte_mbuf_refcnt_set(pkts[i], 1);
+		rte_pktmbuf_reset(pkts[i]);
+	}
+
+	for (i = 0; i < n_pkts; i++) {
+		struct rte_mbuf *pkt = pkts[i];
+		void *pkt_data = rte_pktmbuf_mtod(pkt, void *);
+		ssize_t n_bytes;
+
+		n_bytes = read(p->fd, pkt_data, (size_t) p->mtu);
+		if (n_bytes <= 0)
+			break;
+
+		pkt->data_len = n_bytes;
+		pkt->pkt_len = n_bytes;
+	}
+
+	for ( ; i < n_pkts; i++)
+		rte_pktmbuf_free(pkts[i]);
+
+	RTE_PORT_FD_READER_STATS_PKTS_IN_ADD(p, i);
+
+	return n_pkts;
+}
+
+static int
+rte_port_fd_reader_free(void *port)
+{
+	if (port == NULL) {
+		RTE_LOG(ERR, PORT, "%s: port is NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	rte_free(port);
+
+	return 0;
+}
+
+static int rte_port_fd_reader_stats_read(void *port,
+		struct rte_port_in_stats *stats, int clear)
+{
+	struct rte_port_fd_reader *p =
+			(struct rte_port_fd_reader *) port;
+
+	if (stats != NULL)
+		memcpy(stats, &p->stats, sizeof(p->stats));
+
+	if (clear)
+		memset(&p->stats, 0, sizeof(p->stats));
+
+	return 0;
+}
+
+/*
+ * Port FD Writer
+ */
+#ifdef RTE_PORT_STATS_COLLECT
+
+#define RTE_PORT_FD_WRITER_STATS_PKTS_IN_ADD(port, val) \
+	do { port->stats.n_pkts_in += val; } while (0)
+#define RTE_PORT_FD_WRITER_STATS_PKTS_DROP_ADD(port, val) \
+	do { port->stats.n_pkts_drop += val; } while (0)
+
+#else
+
+#define RTE_PORT_FD_WRITER_STATS_PKTS_IN_ADD(port, val)
+#define RTE_PORT_FD_WRITER_STATS_PKTS_DROP_ADD(port, val)
+
+#endif
+
+struct rte_port_fd_writer {
+	struct rte_port_out_stats stats;
+
+	struct rte_mbuf *tx_buf[2 * RTE_PORT_IN_BURST_SIZE_MAX];
+	uint32_t tx_burst_sz;
+	uint16_t tx_buf_count;
+	uint32_t fd;
+};
+
+static void *
+rte_port_fd_writer_create(void *params, int socket_id)
+{
+	struct rte_port_fd_writer_params *conf =
+		(struct rte_port_fd_writer_params *) params;
+	struct rte_port_fd_writer *port;
+
+	/* Check input parameters */
+	if ((conf == NULL) ||
+		(conf->tx_burst_sz == 0) ||
+		(conf->tx_burst_sz > RTE_PORT_IN_BURST_SIZE_MAX) ||
+		(!rte_is_power_of_2(conf->tx_burst_sz))) {
+		RTE_LOG(ERR, PORT, "%s: Invalid input parameters\n", __func__);
+		return NULL;
+	}
+
+	/* Memory allocation */
+	port = rte_zmalloc_socket("PORT", sizeof(*port),
+		RTE_CACHE_LINE_SIZE, socket_id);
+	if (port == NULL) {
+		RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__);
+		return NULL;
+	}
+
+	/* Initialization */
+	port->fd = conf->fd;
+	port->tx_burst_sz = conf->tx_burst_sz;
+	port->tx_buf_count = 0;
+
+	return port;
+}
+
+static inline void
+send_burst(struct rte_port_fd_writer *p)
+{
+	uint32_t i;
+
+	for (i = 0; i < p->tx_buf_count; i++) {
+		struct rte_mbuf *pkt = p->tx_buf[i];
+		void *pkt_data = rte_pktmbuf_mtod(pkt, void*);
+		size_t n_bytes = rte_pktmbuf_data_len(pkt);
+		ssize_t ret;
+
+		ret = write(p->fd, pkt_data, n_bytes);
+		if (ret < 0)
+			break;
+	}
+
+	RTE_PORT_FD_WRITER_STATS_PKTS_DROP_ADD(p, p->tx_buf_count - i);
+
+	for (i = 0; i < p->tx_buf_count; i++)
+		rte_pktmbuf_free(p->tx_buf[i]);
+
+	p->tx_buf_count = 0;
+}
+
+static int
+rte_port_fd_writer_tx(void *port, struct rte_mbuf *pkt)
+{
+	struct rte_port_fd_writer *p =
+		(struct rte_port_fd_writer *) port;
+
+	p->tx_buf[p->tx_buf_count++] = pkt;
+	RTE_PORT_FD_WRITER_STATS_PKTS_IN_ADD(p, 1);
+	if (p->tx_buf_count >= p->tx_burst_sz)
+		send_burst(p);
+
+	return 0;
+}
+
+static int
+rte_port_fd_writer_tx_bulk(void *port,
+	struct rte_mbuf **pkts,
+	uint64_t pkts_mask)
+{
+	struct rte_port_fd_writer *p =
+		(struct rte_port_fd_writer *) port;
+	uint32_t tx_buf_count = p->tx_buf_count;
+
+	if ((pkts_mask & (pkts_mask + 1)) == 0) {
+		uint64_t n_pkts = __builtin_popcountll(pkts_mask);
+		uint32_t i;
+
+		for (i = 0; i < n_pkts; i++)
+			p->tx_buf[tx_buf_count++] = pkts[i];
+		RTE_PORT_FD_WRITER_STATS_PKTS_IN_ADD(p, n_pkts);
+	} else
+		for ( ; pkts_mask; ) {
+			uint32_t pkt_index = __builtin_ctzll(pkts_mask);
+			uint64_t pkt_mask = 1LLU << pkt_index;
+			struct rte_mbuf *pkt = pkts[pkt_index];
+
+			p->tx_buf[tx_buf_count++] = pkt;
+			RTE_PORT_FD_WRITER_STATS_PKTS_IN_ADD(p, 1);
+			pkts_mask &= ~pkt_mask;
+		}
+
+	p->tx_buf_count = tx_buf_count;
+	if (tx_buf_count >= p->tx_burst_sz)
+		send_burst(p);
+
+	return 0;
+}
+
+static int
+rte_port_fd_writer_flush(void *port)
+{
+	struct rte_port_fd_writer *p =
+		(struct rte_port_fd_writer *) port;
+
+	if (p->tx_buf_count > 0)
+		send_burst(p);
+
+	return 0;
+}
+
+static int
+rte_port_fd_writer_free(void *port)
+{
+	if (port == NULL) {
+		RTE_LOG(ERR, PORT, "%s: Port is NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	rte_port_fd_writer_flush(port);
+	rte_free(port);
+
+	return 0;
+}
+
+static int rte_port_fd_writer_stats_read(void *port,
+		struct rte_port_out_stats *stats, int clear)
+{
+	struct rte_port_fd_writer *p =
+		(struct rte_port_fd_writer *) port;
+
+	if (stats != NULL)
+		memcpy(stats, &p->stats, sizeof(p->stats));
+
+	if (clear)
+		memset(&p->stats, 0, sizeof(p->stats));
+
+	return 0;
+}
+
+/*
+ * Port FD Writer Nodrop
+ */
+#ifdef RTE_PORT_STATS_COLLECT
+
+#define RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_IN_ADD(port, val) \
+	do { port->stats.n_pkts_in += val; } while (0)
+#define RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_DROP_ADD(port, val) \
+	do { port->stats.n_pkts_drop += val; } while (0)
+
+#else
+
+#define RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_IN_ADD(port, val)
+#define RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_DROP_ADD(port, val)
+
+#endif
+
+struct rte_port_fd_writer_nodrop {
+	struct rte_port_out_stats stats;
+
+	struct rte_mbuf *tx_buf[2 * RTE_PORT_IN_BURST_SIZE_MAX];
+	uint32_t tx_burst_sz;
+	uint16_t tx_buf_count;
+	uint64_t n_retries;
+	uint32_t fd;
+};
+
+static void *
+rte_port_fd_writer_nodrop_create(void *params, int socket_id)
+{
+	struct rte_port_fd_writer_nodrop_params *conf =
+			(struct rte_port_fd_writer_nodrop_params *) params;
+	struct rte_port_fd_writer_nodrop *port;
+
+	/* Check input parameters */
+	if ((conf == NULL) ||
+		(conf->fd < 0) ||
+		(conf->tx_burst_sz == 0) ||
+		(conf->tx_burst_sz > RTE_PORT_IN_BURST_SIZE_MAX) ||
+		(!rte_is_power_of_2(conf->tx_burst_sz))) {
+		RTE_LOG(ERR, PORT, "%s: Invalid input parameters\n", __func__);
+		return NULL;
+	}
+
+	/* Memory allocation */
+	port = rte_zmalloc_socket("PORT", sizeof(*port),
+		RTE_CACHE_LINE_SIZE, socket_id);
+	if (port == NULL) {
+		RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__);
+		return NULL;
+	}
+
+	/* Initialization */
+	port->fd = conf->fd;
+	port->tx_burst_sz = conf->tx_burst_sz;
+	port->tx_buf_count = 0;
+
+	/*
+	 * When n_retries is 0 it means that we should wait for every packet to
+	 * send no matter how many retries should it take. To limit number of
+	 * branches in fast path, we use UINT64_MAX instead of branching.
+	 */
+	port->n_retries = (conf->n_retries == 0) ? UINT64_MAX : conf->n_retries;
+
+	return port;
+}
+
+static inline void
+send_burst_nodrop(struct rte_port_fd_writer_nodrop *p)
+{
+	uint64_t n_retries;
+	uint32_t i;
+
+	n_retries = 0;
+	for (i = 0; (i < p->tx_buf_count) && (n_retries < p->n_retries); i++) {
+		struct rte_mbuf *pkt = p->tx_buf[i];
+		void *pkt_data = rte_pktmbuf_mtod(pkt, void*);
+		size_t n_bytes = rte_pktmbuf_data_len(pkt);
+
+		for ( ; n_retries < p->n_retries; n_retries++) {
+			ssize_t ret;
+
+			ret = write(p->fd, pkt_data, n_bytes);
+			if (ret)
+				break;
+		}
+	}
+
+	RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_DROP_ADD(p, p->tx_buf_count - i);
+
+	for (i = 0; i < p->tx_buf_count; i++)
+		rte_pktmbuf_free(p->tx_buf[i]);
+
+	p->tx_buf_count = 0;
+}
+
+static int
+rte_port_fd_writer_nodrop_tx(void *port, struct rte_mbuf *pkt)
+{
+	struct rte_port_fd_writer_nodrop *p =
+		(struct rte_port_fd_writer_nodrop *) port;
+
+	p->tx_buf[p->tx_buf_count++] = pkt;
+	RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_IN_ADD(p, 1);
+	if (p->tx_buf_count >= p->tx_burst_sz)
+		send_burst_nodrop(p);
+
+	return 0;
+}
+
+static int
+rte_port_fd_writer_nodrop_tx_bulk(void *port,
+	struct rte_mbuf **pkts,
+	uint64_t pkts_mask)
+{
+	struct rte_port_fd_writer_nodrop *p =
+		(struct rte_port_fd_writer_nodrop *) port;
+	uint32_t tx_buf_count = p->tx_buf_count;
+
+	if ((pkts_mask & (pkts_mask + 1)) == 0) {
+		uint64_t n_pkts = __builtin_popcountll(pkts_mask);
+		uint32_t i;
+
+		for (i = 0; i < n_pkts; i++)
+			p->tx_buf[tx_buf_count++] = pkts[i];
+		RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_IN_ADD(p, n_pkts);
+	} else
+		for ( ; pkts_mask; ) {
+			uint32_t pkt_index = __builtin_ctzll(pkts_mask);
+			uint64_t pkt_mask = 1LLU << pkt_index;
+			struct rte_mbuf *pkt = pkts[pkt_index];
+
+			p->tx_buf[tx_buf_count++] = pkt;
+			RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_IN_ADD(p, 1);
+			pkts_mask &= ~pkt_mask;
+		}
+
+	p->tx_buf_count = tx_buf_count;
+	if (tx_buf_count >= p->tx_burst_sz)
+		send_burst_nodrop(p);
+
+	return 0;
+}
+
+static int
+rte_port_fd_writer_nodrop_flush(void *port)
+{
+	struct rte_port_fd_writer_nodrop *p =
+		(struct rte_port_fd_writer_nodrop *) port;
+
+	if (p->tx_buf_count > 0)
+		send_burst_nodrop(p);
+
+	return 0;
+}
+
+static int
+rte_port_fd_writer_nodrop_free(void *port)
+{
+	if (port == NULL) {
+		RTE_LOG(ERR, PORT, "%s: Port is NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	rte_port_fd_writer_nodrop_flush(port);
+	rte_free(port);
+
+return 0;
+}
+
+static int rte_port_fd_writer_nodrop_stats_read(void *port,
+		struct rte_port_out_stats *stats, int clear)
+{
+	struct rte_port_fd_writer_nodrop *p =
+		(struct rte_port_fd_writer_nodrop *) port;
+
+	if (stats != NULL)
+		memcpy(stats, &p->stats, sizeof(p->stats));
+
+	if (clear)
+		memset(&p->stats, 0, sizeof(p->stats));
+
+	return 0;
+}
+
+/*
+ * Summary of port operations
+ */
+struct rte_port_in_ops rte_port_fd_reader_ops = {
+	.f_create = rte_port_fd_reader_create,
+	.f_free = rte_port_fd_reader_free,
+	.f_rx = rte_port_fd_reader_rx,
+	.f_stats = rte_port_fd_reader_stats_read,
+};
+
+struct rte_port_out_ops rte_port_fd_writer_ops = {
+	.f_create = rte_port_fd_writer_create,
+	.f_free = rte_port_fd_writer_free,
+	.f_tx = rte_port_fd_writer_tx,
+	.f_tx_bulk = rte_port_fd_writer_tx_bulk,
+	.f_flush = rte_port_fd_writer_flush,
+	.f_stats = rte_port_fd_writer_stats_read,
+};
+
+struct rte_port_out_ops rte_port_fd_writer_nodrop_ops = {
+	.f_create = rte_port_fd_writer_nodrop_create,
+	.f_free = rte_port_fd_writer_nodrop_free,
+	.f_tx = rte_port_fd_writer_nodrop_tx,
+	.f_tx_bulk = rte_port_fd_writer_nodrop_tx_bulk,
+	.f_flush = rte_port_fd_writer_nodrop_flush,
+	.f_stats = rte_port_fd_writer_nodrop_stats_read,
+};
diff --git a/lib/librte_vhost/vhost_cuse/eventfd_copy.c b/lib/librte_port/rte_port_fd.h
index 154b32a4..77a2d31b 100644
--- a/lib/librte_vhost/vhost_cuse/eventfd_copy.c
+++ b/lib/librte_port/rte_port_fd.h
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -31,74 +31,75 @@
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include <unistd.h>
-#include <sys/eventfd.h>
-#include <sys/ioctl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
+#ifndef __INCLUDE_RTE_PORT_FD_H__
+#define __INCLUDE_RTE_PORT_FD_H__
 
-#include <rte_log.h>
+#ifdef __cplusplus
+extern "C" {
+#endif
 
-#include "eventfd_link/eventfd_link.h"
-#include "eventfd_copy.h"
-#include "vhost-net.h"
+/**
+ * @file
+ * RTE Port FD Device
+ *
+ * fd_reader: input port built on top of valid non-blocking file descriptor
+ * fd_writer: output port built on top of valid non-blocking file descriptor
+ *
+ ***/
 
-static const char eventfd_cdev[] = "/dev/eventfd-link";
+#include <stdint.h>
 
-static int eventfd_link = -1;
+#include <rte_mempool.h>
+#include "rte_port.h"
 
-int
-eventfd_init(void)
-{
-	if (eventfd_link >= 0)
-		return 0;
+/** fd_reader port parameters */
+struct rte_port_fd_reader_params {
+	/** File descriptor */
+	int fd;
 
-	eventfd_link = open(eventfd_cdev, O_RDWR);
-	if (eventfd_link < 0) {
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"eventfd_link module is not loaded\n");
-		return -1;
-	}
+	/** Maximum Transfer Unit (MTU) */
+	uint32_t mtu;
 
-	return 0;
-}
+	/** Pre-initialized buffer pool */
+	struct rte_mempool *mempool;
+};
 
-int
-eventfd_free(void)
-{
-	if (eventfd_link >= 0)
-		close(eventfd_link);
-	return 0;
-}
+/** fd_reader port operations */
+extern struct rte_port_in_ops rte_port_fd_reader_ops;
 
-/*
- * This function uses the eventfd_link kernel module to copy an eventfd file
- * descriptor provided by QEMU in to our process space.
- */
-int
-eventfd_copy(int target_fd, int target_pid)
-{
-	int ret;
-	struct eventfd_copy2 eventfd_copy2;
-
-
-	/* Open the character device to the kernel module. */
-	/* TODO: check this earlier rather than fail until VM boots! */
-	if (eventfd_init() < 0)
-		return -1;
-
-	eventfd_copy2.fd = target_fd;
-	eventfd_copy2.pid = target_pid;
-	eventfd_copy2.flags = O_NONBLOCK | O_CLOEXEC;
-	/* Call the IOCTL to copy the eventfd. */
-	ret = ioctl(eventfd_link, EVENTFD_COPY2, &eventfd_copy2);
-
-	if (ret < 0) {
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"EVENTFD_COPY2 ioctl failed\n");
-		return -1;
-	}
-
-	return ret;
+/** fd_writer port parameters */
+struct rte_port_fd_writer_params {
+	/** File descriptor */
+	int fd;
+
+	/**< Recommended write burst size. The actual burst size can be
+	 * bigger or smaller than this value.
+	 */
+	uint32_t tx_burst_sz;
+};
+
+/** fd_writer port operations */
+extern struct rte_port_out_ops rte_port_fd_writer_ops;
+
+/** fd_writer_nodrop port parameters */
+struct rte_port_fd_writer_nodrop_params {
+	/** File descriptor */
+	int fd;
+
+	/**< Recommended write burst size. The actual burst size can be
+	 * bigger or smaller than this value.
+	 */
+	uint32_t tx_burst_sz;
+
+	/** Maximum number of retries, 0 for no limit */
+	uint32_t n_retries;
+};
+
+/** fd_writer_nodrop port operations */
+extern struct rte_port_out_ops rte_port_fd_writer_nodrop_ops;
+
+#ifdef __cplusplus
 }
+#endif
+
+#endif
diff --git a/lib/librte_port/rte_port_source_sink.h b/lib/librte_port/rte_port_source_sink.h
index 4db8a8a8..be585a77 100644
--- a/lib/librte_port/rte_port_source_sink.h
+++ b/lib/librte_port/rte_port_source_sink.h
@@ -55,7 +55,7 @@ struct rte_port_source_params {
 	struct rte_mempool *mempool;
 
 	/** The full path of the pcap file to read packets from */
-	char *file_name;
+	const char *file_name;
 	/** The number of bytes to be read from each packet in the
 	 *  pcap file. If this value is 0, the whole packet is read;
 	 *  if it is bigger than packet size, the generated packets
@@ -69,7 +69,7 @@ extern struct rte_port_in_ops rte_port_source_ops;
 /** sink port parameters */
 struct rte_port_sink_params {
 	/** The full path of the pcap file to write the packets to */
-	char *file_name;
+	const char *file_name;
 	/** The maximum number of packets write to the pcap file.
 	 *  If this value is 0, the "infinite" write will be carried
 	 *  out.
diff --git a/lib/librte_port/rte_port_version.map b/lib/librte_port/rte_port_version.map
index 048c20d7..6470629b 100644
--- a/lib/librte_port/rte_port_version.map
+++ b/lib/librte_port/rte_port_version.map
@@ -42,3 +42,12 @@ DPDK_16.07 {
 	rte_port_kni_writer_nodrop_ops;
 
 } DPDK_2.2;
+
+DPDK_16.11 {
+	global:
+
+	rte_port_fd_reader_ops;
+	rte_port_fd_writer_ops;
+	rte_port_fd_writer_nodrop_ops;
+
+} DPDK_16.07;
diff --git a/lib/librte_reorder/rte_reorder.h b/lib/librte_reorder/rte_reorder.h
index c7a2934c..737e0554 100644
--- a/lib/librte_reorder/rte_reorder.h
+++ b/lib/librte_reorder/rte_reorder.h
@@ -44,6 +44,8 @@
  *
  */
 
+#include <rte_mbuf.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
diff --git a/lib/librte_ring/rte_ring.h b/lib/librte_ring/rte_ring.h
index 0e22e694..32b8c8d2 100644
--- a/lib/librte_ring/rte_ring.h
+++ b/lib/librte_ring/rte_ring.h
@@ -187,7 +187,7 @@ struct rte_ring {
 	struct rte_ring_debug_stats stats[RTE_MAX_LCORE];
 #endif
 
-	void * ring[0] __rte_cache_aligned; /**< Memory space of ring starts here.
+	void *ring[] __rte_cache_aligned;   /**< Memory space of ring starts here.
 	                                     * not volatile so need to be careful
 	                                     * about compiler re-ordering */
 };
@@ -341,7 +341,7 @@ void rte_ring_free(struct rte_ring *r);
 int rte_ring_set_water_mark(struct rte_ring *r, unsigned count);
 
 /**
- * Dump the status of the ring to the console.
+ * Dump the status of the ring to a file.
  *
  * @param f
  *   A pointer to a file for output
diff --git a/lib/librte_sched/rte_bitmap.h b/lib/librte_sched/rte_bitmap.h
index ff675c58..010d752c 100644
--- a/lib/librte_sched/rte_bitmap.h
+++ b/lib/librte_sched/rte_bitmap.h
@@ -64,6 +64,7 @@ extern "C" {
  *
  ***/
 
+#include <string.h>
 #include <rte_common.h>
 #include <rte_debug.h>
 #include <rte_memory.h>
@@ -103,7 +104,7 @@ struct rte_bitmap {
 	uint32_t go2;     /**< Bitmap scan: Go/stop condition for current array2 cache line */
 
 	/* Storage space for array1 and array2 */
-	uint8_t memory[0];
+	uint8_t memory[];
 };
 
 static inline void
diff --git a/lib/librte_sched/rte_reciprocal.h b/lib/librte_sched/rte_reciprocal.h
index abd15251..5e21f096 100644
--- a/lib/librte_sched/rte_reciprocal.h
+++ b/lib/librte_sched/rte_reciprocal.h
@@ -22,6 +22,8 @@
 #ifndef _RTE_RECIPROCAL_H_
 #define _RTE_RECIPROCAL_H_
 
+#include <stdint.h>
+
 struct rte_reciprocal {
 	uint32_t m;
 	uint8_t sh1, sh2;
diff --git a/lib/librte_sched/rte_sched_common.h b/lib/librte_sched/rte_sched_common.h
index 8920adec..aed144ba 100644
--- a/lib/librte_sched/rte_sched_common.h
+++ b/lib/librte_sched/rte_sched_common.h
@@ -38,6 +38,7 @@
 extern "C" {
 #endif
 
+#include <stdint.h>
 #include <sys/types.h>
 
 #define __rte_aligned_16 __attribute__((__aligned__(16)))
diff --git a/lib/librte_table/Makefile b/lib/librte_table/Makefile
index 7a8a3f3c..c82c7696 100644
--- a/lib/librte_table/Makefile
+++ b/lib/librte_table/Makefile
@@ -1,6 +1,6 @@
 #   BSD LICENSE
 #
-#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
 #   All rights reserved.
 #
 #   Redistribution and use in source and binary forms, with or without
@@ -51,6 +51,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_TABLE) += rte_table_lpm_ipv6.c
 ifeq ($(CONFIG_RTE_LIBRTE_ACL),y)
 SRCS-$(CONFIG_RTE_LIBRTE_TABLE) += rte_table_acl.c
 endif
+SRCS-$(CONFIG_RTE_LIBRTE_TABLE) += rte_table_hash_cuckoo.c
 SRCS-$(CONFIG_RTE_LIBRTE_TABLE) += rte_table_hash_key8.c
 SRCS-$(CONFIG_RTE_LIBRTE_TABLE) += rte_table_hash_key16.c
 SRCS-$(CONFIG_RTE_LIBRTE_TABLE) += rte_table_hash_key32.c
@@ -80,5 +81,6 @@ DEPDIRS-$(CONFIG_RTE_LIBRTE_TABLE) += lib/librte_lpm
 ifeq ($(CONFIG_RTE_LIBRTE_ACL),y)
 DEPDIRS-$(CONFIG_RTE_LIBRTE_TABLE) += lib/librte_acl
 endif
+DEPDIRS-$(CONFIG_RTE_LIBRTE_TABLE) += lib/librte_hash
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_table/rte_table_hash.h b/lib/librte_table/rte_table_hash.h
index 9d17516a..57505a6f 100644
--- a/lib/librte_table/rte_table_hash.h
+++ b/lib/librte_table/rte_table_hash.h
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -363,6 +363,35 @@ struct rte_table_hash_key32_ext_params {
 /** Extendible bucket hash table operations */
 extern struct rte_table_ops rte_table_hash_key32_ext_ops;
 
+/** Cuckoo hash table parameters */
+struct rte_table_hash_cuckoo_params {
+    /** Key size (number of bytes */
+		uint32_t key_size;
+
+	/** Maximum number of hash table entries */
+	uint32_t n_keys;
+
+	/** Hash function used to calculate hash */
+	rte_table_hash_op_hash f_hash;
+
+	/** Seed value or Init value used by f_hash */
+	uint32_t seed;
+
+	/** Byte offset within packet meta-data where the 4-byte key signature
+	is located. Valid for pre-computed key signature tables, ignored for
+	do-sig tables. */
+	uint32_t signature_offset;
+
+	/** Byte offset within packet meta-data where the key is located */
+	uint32_t key_offset;
+
+	/** Hash table name */
+	const char *name;
+};
+
+/** Cuckoo hash table operations */
+extern struct rte_table_ops rte_table_hash_cuckoo_dosig_ops;
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_table/rte_table_hash_cuckoo.c b/lib/librte_table/rte_table_hash_cuckoo.c
new file mode 100644
index 00000000..ff7baee3
--- /dev/null
+++ b/lib/librte_table/rte_table_hash_cuckoo.c
@@ -0,0 +1,382 @@
+/*-
+ *	 BSD LICENSE
+ *
+ *	 Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *	 All rights reserved.
+ *
+ *	 Redistribution and use in source and binary forms, with or without
+ *	 modification, are permitted provided that the following conditions
+ *	 are met:
+ *
+ *	* Redistributions of source code must retain the above copyright
+ *		 notice, this list of conditions and the following disclaimer.
+ *	* Redistributions in binary form must reproduce the above copyright
+ *		 notice, this list of conditions and the following disclaimer in
+ *		 the documentation and/or other materials provided with the
+ *		 distribution.
+ *	* Neither the name of Intel Corporation nor the names of its
+ *		 contributors may be used to endorse or promote products derived
+ *		 from this software without specific prior written permission.
+ *
+ *	 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *	 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *	 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *	 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *	 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *	 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *	 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *	 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *	 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *	 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *	 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <string.h>
+#include <stdio.h>
+
+#include <rte_common.h>
+#include <rte_mbuf.h>
+#include <rte_memory.h>
+#include <rte_malloc.h>
+#include <rte_log.h>
+
+#include <rte_hash.h>
+#include "rte_table_hash.h"
+
+#ifdef RTE_TABLE_STATS_COLLECT
+
+#define RTE_TABLE_HASH_CUCKOO_STATS_PKTS_IN_ADD(table, val) \
+	(table->stats.n_pkts_in += val)
+#define RTE_TABLE_HASH_CUCKOO_STATS_PKTS_LOOKUP_MISS(table, val) \
+	(table->stats.n_pkts_lookup_miss += val)
+
+#else
+
+#define RTE_TABLE_HASH_CUCKOO_STATS_PKTS_IN_ADD(table, val)
+#define RTE_TABLE_HASH_CUCKOO_STATS_PKTS_LOOKUP_MISS(table, val)
+
+#endif
+
+
+struct rte_table_hash {
+	struct rte_table_stats stats;
+
+	/* Input parameters */
+	uint32_t key_size;
+	uint32_t entry_size;
+	uint32_t n_keys;
+	rte_table_hash_op_hash f_hash;
+	uint32_t seed;
+	uint32_t signature_offset;
+	uint32_t key_offset;
+	const char *name;
+
+	/* cuckoo hash table object */
+	struct rte_hash *h_table;
+
+	/* Lookup table */
+	uint8_t memory[0] __rte_cache_aligned; };
+
+static int
+check_params_create_hash_cuckoo(const struct
+rte_table_hash_cuckoo_params *params) {
+	/* Check for valid parameters */
+	if (params == NULL) {
+		RTE_LOG(ERR, TABLE, "NULL Input Parameters.\n");
+		return -EINVAL;
+	}
+
+	if (params->key_size == 0) {
+		RTE_LOG(ERR, TABLE, "Invalid key_size.\n");
+		return -EINVAL;
+	}
+
+	if (params->n_keys == 0) {
+		RTE_LOG(ERR, TABLE, "Invalid n_keys.\n");
+		return -EINVAL;
+	}
+
+	if (params->f_hash == NULL) {
+		RTE_LOG(ERR, TABLE, "f_hash is NULL.\n");
+		return -EINVAL;
+	}
+
+	if (params->name == NULL) {
+		RTE_LOG(ERR, TABLE, "Table name is NULL.\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void *
+rte_table_hash_cuckoo_create(void *params,
+			int socket_id,
+			uint32_t entry_size)
+{
+	struct rte_hash *rte_hash_handle;
+	struct rte_table_hash *t;
+	uint32_t total_size, total_cl_size;
+
+	/* Check input parameters */
+	struct rte_table_hash_cuckoo_params *p =
+		(struct rte_table_hash_cuckoo_params *) params;
+
+	if (check_params_create_hash_cuckoo(params))
+		return NULL;
+
+	/* Memory allocation */
+	total_cl_size =
+		(sizeof(struct rte_table_hash) +
+		 RTE_CACHE_LINE_SIZE) / RTE_CACHE_LINE_SIZE;
+	total_cl_size += (p->n_keys * entry_size +
+			RTE_CACHE_LINE_SIZE) / RTE_CACHE_LINE_SIZE;
+	total_size = total_cl_size * RTE_CACHE_LINE_SIZE;
+
+	t = rte_zmalloc_socket("TABLE",
+			total_size,
+			RTE_CACHE_LINE_SIZE,
+			socket_id);
+	if (t == NULL) {
+		RTE_LOG(ERR, TABLE,
+			"%s: Cannot allocate %u bytes for Cuckoo hash table\n",
+			__func__,
+			(uint32_t)sizeof(struct rte_table_hash));
+		return NULL;
+	}
+
+	/* Create cuckoo hash table */
+	struct rte_hash_parameters hash_cuckoo_params = {
+		.entries = p->n_keys,
+		.key_len = p->key_size,
+		.hash_func = (rte_hash_function)(p->f_hash),
+		.hash_func_init_val = p->seed,
+		.socket_id = socket_id,
+		.name = p->name
+	};
+
+	rte_hash_handle = rte_hash_find_existing(p->name);
+	if (rte_hash_handle == NULL) {
+		rte_hash_handle = rte_hash_create(&hash_cuckoo_params);
+		if (NULL == rte_hash_handle) {
+			RTE_LOG(ERR, TABLE,
+				"%s: failed to create cuckoo hash table. keysize: %u",
+				__func__, hash_cuckoo_params.key_len);
+			rte_free(t);
+			return NULL;
+		}
+	}
+
+	/* initialize the cuckoo hash parameters */
+	t->key_size = p->key_size;
+	t->entry_size = entry_size;
+	t->n_keys = p->n_keys;
+	t->f_hash = p->f_hash;
+	t->seed = p->seed;
+	t->signature_offset = p->signature_offset;
+	t->key_offset = p->key_offset;
+	t->name = p->name;
+	t->h_table = rte_hash_handle;
+
+	RTE_LOG(INFO, TABLE,
+		"%s: Cuckoo Hash table memory footprint is %u bytes\n",
+		__func__, total_size);
+	return t;
+}
+
+static int
+rte_table_hash_cuckoo_free(void *table) {
+	if (table == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	struct rte_table_hash *t = (struct rte_table_hash *)table;
+
+	rte_hash_free(t->h_table);
+	rte_free(t);
+
+	return 0;
+}
+
+static int
+rte_table_hash_cuckoo_entry_add(void *table, void *key, void *entry,
+		int *key_found, void **entry_ptr) {
+	int pos = 0;
+
+	if (table == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	if (key == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: key parameter is NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	if (entry == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: entry parameter is NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	struct rte_table_hash *t = (struct rte_table_hash *)table;
+
+	/*  Find Existing entries */
+	pos = rte_hash_lookup(t->h_table, key);
+	if (pos >= 0) {
+		uint8_t *existing_entry;
+
+		*key_found = 1;
+		existing_entry = &t->memory[pos * t->entry_size];
+		memcpy(existing_entry, entry, t->entry_size);
+		*entry_ptr = existing_entry;
+
+		return 0;
+} else if (pos == -ENOENT) {
+	/* Entry not found. Adding new entry */
+		uint8_t *new_entry;
+
+		pos = rte_hash_add_key(t->h_table, key);
+		if (pos < 0) {
+			RTE_LOG(ERR, TABLE,
+				"%s: Entry not added, status : %u\n",
+				__func__, pos);
+			return pos;
+		}
+
+		new_entry = &t->memory[pos * t->entry_size];
+		memcpy(new_entry, entry, t->entry_size);
+
+		*key_found = 0;
+		*entry_ptr = new_entry;
+		return 0;
+	}
+	return pos;
+}
+
+static int
+rte_table_hash_cuckoo_entry_delete(void *table, void *key,
+		int *key_found, __rte_unused void *entry) {
+	int pos = 0;
+
+	if (table == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	if (key == NULL) {
+		RTE_LOG(ERR, TABLE, "%s: key parameter is NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	struct rte_table_hash *t = (struct rte_table_hash *)table;
+
+	pos = rte_hash_del_key(t->h_table, key);
+	if (pos >= 0) {
+		*key_found = 1;
+		uint8_t *entry_ptr = &t->memory[pos * t->entry_size];
+
+		if (entry)
+			memcpy(entry, entry_ptr, t->entry_size);
+
+		memset(&t->memory[pos * t->entry_size], 0, t->entry_size);
+	}
+
+	return pos;
+}
+
+
+static int
+rte_table_hash_cuckoo_lookup_dosig(void *table,
+	struct rte_mbuf **pkts,
+	uint64_t pkts_mask,
+	uint64_t *lookup_hit_mask,
+	void **entries)
+{
+	struct rte_table_hash *t = (struct rte_table_hash *)table;
+	uint64_t pkts_mask_out = 0;
+	uint32_t i;
+
+	__rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
+
+	RTE_TABLE_HASH_CUCKOO_STATS_PKTS_IN_ADD(t, n_pkts_in);
+
+	if ((pkts_mask & (pkts_mask + 1)) == 0) {
+		const uint8_t *keys[64];
+		int32_t positions[64], status;
+
+		/* Keys for bulk lookup */
+		for (i = 0; i < n_pkts_in; i++)
+			keys[i] = RTE_MBUF_METADATA_UINT8_PTR(pkts[i],
+					t->key_offset);
+
+		/* Bulk Lookup */
+		status = rte_hash_lookup_bulk(t->h_table,
+				(const void **) keys,
+				n_pkts_in,
+				positions);
+
+		if (status == 0) {
+			for (i = 0; i < n_pkts_in; i++) {
+				if (likely(positions[i] >= 0)) {
+					uint64_t pkt_mask = 1LLU << i;
+
+					entries[i] = &t->memory[positions[i]
+						* t->entry_size];
+					pkts_mask_out |= pkt_mask;
+				}
+			}
+		}
+	} else {
+		for (i = 0; i < (uint32_t)(RTE_PORT_IN_BURST_SIZE_MAX
+					- __builtin_clzll(pkts_mask)); i++) {
+			uint64_t pkt_mask = 1LLU << i;
+
+			if (pkt_mask & pkts_mask) {
+				struct rte_mbuf *pkt = pkts[i];
+				uint8_t *key = RTE_MBUF_METADATA_UINT8_PTR(pkt,
+						t->key_offset);
+				int pos;
+
+				pos = rte_hash_lookup(t->h_table, key);
+				if (likely(pos >= 0)) {
+					entries[i] = &t->memory[pos
+						* t->entry_size];
+					pkts_mask_out |= pkt_mask;
+				}
+			}
+		}
+	}
+
+	*lookup_hit_mask = pkts_mask_out;
+	RTE_TABLE_HASH_CUCKOO_STATS_PKTS_LOOKUP_MISS(t,
+			n_pkts_in - __builtin_popcountll(pkts_mask_out));
+
+	return 0;
+
+}
+
+static int
+rte_table_hash_cuckoo_stats_read(void *table, struct rte_table_stats *stats,
+	int clear)
+{
+	struct rte_table_hash *t = (struct rte_table_hash *) table;
+
+	if (stats != NULL)
+		memcpy(stats, &t->stats, sizeof(t->stats));
+
+	if (clear)
+		memset(&t->stats, 0, sizeof(t->stats));
+
+	return 0;
+}
+
+struct rte_table_ops rte_table_hash_cuckoo_dosig_ops = {
+	.f_create = rte_table_hash_cuckoo_create,
+	.f_free = rte_table_hash_cuckoo_free,
+	.f_add = rte_table_hash_cuckoo_entry_add,
+	.f_delete = rte_table_hash_cuckoo_entry_delete,
+	.f_add_bulk = NULL,
+	.f_delete_bulk = NULL,
+	.f_lookup = rte_table_hash_cuckoo_lookup_dosig,
+	.f_stats = rte_table_hash_cuckoo_stats_read,
+};
diff --git a/lib/librte_table/rte_table_hash_key16.c b/lib/librte_table/rte_table_hash_key16.c
index b7e000fd..08d4d77e 100644
--- a/lib/librte_table/rte_table_hash_key16.c
+++ b/lib/librte_table/rte_table_hash_key16.c
@@ -130,7 +130,7 @@ rte_table_hash_create_key16_lru(void *params,
 	/* Check input parameters */
 	if ((check_params_create_lru(p) != 0) ||
 		((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
-		((sizeof(struct rte_bucket_4_16) % RTE_CACHE_LINE_SIZE) != 0))
+		((sizeof(struct rte_bucket_4_16) % 64) != 0))
 		return NULL;
 	n_entries_per_bucket = 4;
 	key_size = 16;
@@ -344,7 +344,7 @@ rte_table_hash_create_key16_ext(void *params,
 	/* Check input parameters */
 	if ((check_params_create_ext(p) != 0) ||
 		((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
-		((sizeof(struct rte_bucket_4_16) % RTE_CACHE_LINE_SIZE) != 0))
+		((sizeof(struct rte_bucket_4_16) % 64) != 0))
 		return NULL;
 
 	n_entries_per_bucket = 4;
diff --git a/lib/librte_table/rte_table_hash_key32.c b/lib/librte_table/rte_table_hash_key32.c
index a7aba492..161f6b7a 100644
--- a/lib/librte_table/rte_table_hash_key32.c
+++ b/lib/librte_table/rte_table_hash_key32.c
@@ -129,7 +129,7 @@ rte_table_hash_create_key32_lru(void *params,
 	/* Check input parameters */
 	if ((check_params_create_lru(p) != 0) ||
 		((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
-		((sizeof(struct rte_bucket_4_32) % RTE_CACHE_LINE_SIZE) != 0)) {
+		((sizeof(struct rte_bucket_4_32) % 64) != 0)) {
 		return NULL;
 	}
 	n_entries_per_bucket = 4;
@@ -337,7 +337,7 @@ rte_table_hash_create_key32_ext(void *params,
 	/* Check input parameters */
 	if ((check_params_create_ext(p) != 0) ||
 		((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
-		((sizeof(struct rte_bucket_4_32) % RTE_CACHE_LINE_SIZE) != 0))
+		((sizeof(struct rte_bucket_4_32) % 64) != 0))
 		return NULL;
 
 	n_entries_per_bucket = 4;
diff --git a/lib/librte_table/rte_table_hash_key8.c b/lib/librte_table/rte_table_hash_key8.c
index e2e2bdc4..b04f60dc 100644
--- a/lib/librte_table/rte_table_hash_key8.c
+++ b/lib/librte_table/rte_table_hash_key8.c
@@ -125,7 +125,7 @@ rte_table_hash_create_key8_lru(void *params, int socket_id, uint32_t entry_size)
 	/* Check input parameters */
 	if ((check_params_create_lru(p) != 0) ||
 		((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
-		((sizeof(struct rte_bucket_4_8) % RTE_CACHE_LINE_SIZE) != 0)) {
+		((sizeof(struct rte_bucket_4_8) % 64) != 0)) {
 		return NULL;
 	}
 	n_entries_per_bucket = 4;
@@ -332,7 +332,7 @@ rte_table_hash_create_key8_ext(void *params, int socket_id, uint32_t entry_size)
 	/* Check input parameters */
 	if ((check_params_create_ext(p) != 0) ||
 		((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
-		((sizeof(struct rte_bucket_4_8) % RTE_CACHE_LINE_SIZE) != 0))
+		((sizeof(struct rte_bucket_4_8) % 64) != 0))
 		return NULL;
 
 	n_entries_per_bucket = 4;
diff --git a/lib/librte_table/rte_table_version.map b/lib/librte_table/rte_table_version.map
index 459c2da3..e1eaa275 100644
--- a/lib/librte_table/rte_table_version.map
+++ b/lib/librte_table/rte_table_version.map
@@ -28,4 +28,11 @@ DPDK_2.2 {
 	rte_table_hash_key16_ext_dosig_ops;
 	rte_table_hash_key16_lru_dosig_ops;
 
+};
+
+DPDK_16.07 {
+       global:
+
+       rte_table_hash_cuckoo_dosig_ops;
+
 } DPDK_2.0;
diff --git a/lib/librte_timer/rte_timer.h b/lib/librte_timer/rte_timer.h
index 77547c6b..a276a736 100644
--- a/lib/librte_timer/rte_timer.h
+++ b/lib/librte_timer/rte_timer.h
@@ -66,6 +66,7 @@
 #include <stdio.h>
 #include <stdint.h>
 #include <stddef.h>
+#include <rte_common.h>
 
 #ifdef __cplusplus
 extern "C" {
@@ -91,6 +92,7 @@ enum rte_timer_type {
  * config) and an owner (the id of the lcore that owns the timer).
  */
 union rte_timer_status {
+	RTE_STD_C11
 	struct {
 		uint16_t state;  /**< Stop, pending, running, config. */
 		int16_t owner;   /**< The lcore that owns the timer. */
diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
index 538adb0b..415ffc6e 100644
--- a/lib/librte_vhost/Makefile
+++ b/lib/librte_vhost/Makefile
@@ -39,25 +39,16 @@ EXPORT_MAP := rte_vhost_version.map
 LIBABIVER := 3
 
 CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 -D_FILE_OFFSET_BITS=64
-ifeq ($(CONFIG_RTE_LIBRTE_VHOST_USER),y)
 CFLAGS += -I vhost_user
 LDLIBS += -lpthread
-else
-CFLAGS += -I vhost_cuse
-LDLIBS += -lfuse
-endif
 
 ifeq ($(CONFIG_RTE_LIBRTE_VHOST_NUMA),y)
 LDLIBS += -lnuma
 endif
 
 # all source are stored in SRCS-y
-SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := virtio-net.c vhost_rxtx.c
-ifeq ($(CONFIG_RTE_LIBRTE_VHOST_USER),y)
-SRCS-$(CONFIG_RTE_LIBRTE_VHOST) += vhost_user/vhost-net-user.c vhost_user/virtio-net-user.c vhost_user/fd_man.c
-else
-SRCS-$(CONFIG_RTE_LIBRTE_VHOST) += vhost_cuse/vhost-net-cdev.c vhost_cuse/virtio-net-cdev.c vhost_cuse/eventfd_copy.c
-endif
+SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := fd_man.c socket.c vhost.c vhost_user.c \
+				   virtio_net.c
 
 # install includes
 SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_virtio_net.h
diff --git a/lib/librte_vhost/eventfd_link/Makefile b/lib/librte_vhost/eventfd_link/Makefile
deleted file mode 100644
index 3140e8bf..00000000
--- a/lib/librte_vhost/eventfd_link/Makefile
+++ /dev/null
@@ -1,41 +0,0 @@
-#   BSD LICENSE
-#
-#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
-#   All rights reserved.
-#
-#   Redistribution and use in source and binary forms, with or without
-#   modification, are permitted provided that the following conditions
-#   are met:
-#
-#     * Redistributions of source code must retain the above copyright
-#       notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above copyright
-#       notice, this list of conditions and the following disclaimer in
-#       the documentation and/or other materials provided with the
-#       distribution.
-#     * Neither the name of Intel Corporation nor the names of its
-#       contributors may be used to endorse or promote products derived
-#       from this software without specific prior written permission.
-#
-#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-RTE_KERNELDIR ?= /lib/modules/$(shell uname -r)/build
-
-obj-m += eventfd_link.o
-
-
-all:
-	make -C $(RTE_KERNELDIR) M=$(PWD) modules
-
-clean:
-	make -C $(RTE_KERNELDIR) M=$(PWD) clean
diff --git a/lib/librte_vhost/eventfd_link/eventfd_link.c b/lib/librte_vhost/eventfd_link/eventfd_link.c
deleted file mode 100644
index 4b05b5a8..00000000
--- a/lib/librte_vhost/eventfd_link/eventfd_link.c
+++ /dev/null
@@ -1,277 +0,0 @@
-/*-
- * GPL LICENSE SUMMARY
- *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *
- *   This program is free software; you can redistribute it and/or modify
- *   it under the terms of version 2 of the GNU General Public License as
- *   published by the Free Software Foundation.
- *
- *   This program is distributed in the hope that it will be useful, but
- *   WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *   General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program; if not, write to the Free Software
- *   Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *   The full GNU General Public License is included in this distribution
- *   in the file called LICENSE.GPL.
- *
- *   Contact Information:
- *   Intel Corporation
- */
-
-#include <linux/miscdevice.h>
-#include <linux/module.h>
-#include <linux/file.h>
-#include <linux/fdtable.h>
-#include <linux/syscalls.h>
-
-#include "eventfd_link.h"
-
-
-/*
- * get_files_struct is copied from fs/file.c
- */
-struct files_struct *
-get_files_struct(struct task_struct *task)
-{
-	struct files_struct *files;
-
-	task_lock(task);
-	files = task->files;
-	if (files)
-		atomic_inc(&files->count);
-	task_unlock(task);
-
-	return files;
-}
-
-/*
- * put_files_struct is extracted from fs/file.c
- */
-void
-put_files_struct(struct files_struct *files)
-{
-	if (atomic_dec_and_test(&files->count))
-		BUG();
-}
-
-static struct file *
-fget_from_files(struct files_struct *files, unsigned fd)
-{
-	struct file *file;
-
-	rcu_read_lock();
-	file = fcheck_files(files, fd);
-	if (file) {
-		if (file->f_mode & FMODE_PATH ||
-			!atomic_long_inc_not_zero(&file->f_count)) {
-
-			file = NULL;
-		}
-	}
-	rcu_read_unlock();
-
-	return file;
-}
-
-static long
-eventfd_link_ioctl_copy2(unsigned long arg)
-{
-	void __user *argp = (void __user *) arg;
-	struct task_struct *task_target = NULL;
-	struct file *file;
-	struct files_struct *files;
-	struct eventfd_copy2 eventfd_copy2;
-	long ret = -EFAULT;
-
-	if (copy_from_user(&eventfd_copy2, argp, sizeof(struct eventfd_copy2)))
-		goto out;
-
-	/*
-	 * Find the task struct for the target pid
-	 */
-	ret = -ESRCH;
-
-	task_target =
-		get_pid_task(find_vpid(eventfd_copy2.pid), PIDTYPE_PID);
-	if (task_target == NULL) {
-		pr_info("Unable to find pid %d\n", eventfd_copy2.pid);
-		goto out;
-	}
-
-	ret = -ESTALE;
-	files = get_files_struct(task_target);
-	if (files == NULL) {
-		pr_info("Failed to get target files struct\n");
-		goto out_task;
-	}
-
-	ret = -EBADF;
-	file = fget_from_files(files, eventfd_copy2.fd);
-	put_files_struct(files);
-
-	if (file == NULL) {
-		pr_info("Failed to get fd %d from target\n", eventfd_copy2.fd);
-		goto out_task;
-	}
-
-	/*
-	 * Install the file struct from the target process into the
-	 * newly allocated file desciptor of the source process.
-	 */
-	ret = get_unused_fd_flags(eventfd_copy2.flags);
-	if (ret < 0) {
-		fput(file);
-		goto out_task;
-	}
-	fd_install(ret, file);
-
-out_task:
-	put_task_struct(task_target);
-out:
-	return ret;
-}
-
-static long
-eventfd_link_ioctl_copy(unsigned long arg)
-{
-	void __user *argp = (void __user *) arg;
-	struct task_struct *task_target = NULL;
-	struct file *file;
-	struct files_struct *files;
-	struct fdtable *fdt;
-	struct eventfd_copy eventfd_copy;
-	long ret = -EFAULT;
-
-	if (copy_from_user(&eventfd_copy, argp, sizeof(struct eventfd_copy)))
-		goto out;
-
-	/*
-	 * Find the task struct for the target pid
-	 */
-	ret = -ESRCH;
-
-	task_target =
-		get_pid_task(find_vpid(eventfd_copy.target_pid), PIDTYPE_PID);
-	if (task_target == NULL) {
-		pr_info("Unable to find pid %d\n", eventfd_copy.target_pid);
-		goto out;
-	}
-
-	ret = -ESTALE;
-	files = get_files_struct(current);
-	if (files == NULL) {
-		pr_info("Failed to get current files struct\n");
-		goto out_task;
-	}
-
-	ret = -EBADF;
-	file = fget_from_files(files, eventfd_copy.source_fd);
-
-	if (file == NULL) {
-		pr_info("Failed to get fd %d from source\n",
-			eventfd_copy.source_fd);
-		put_files_struct(files);
-		goto out_task;
-	}
-
-	/*
-	 * Release the existing eventfd in the source process
-	 */
-	spin_lock(&files->file_lock);
-	fput(file);
-	filp_close(file, files);
-	fdt = files_fdtable(files);
-	fdt->fd[eventfd_copy.source_fd] = NULL;
-	spin_unlock(&files->file_lock);
-
-	put_files_struct(files);
-
-	/*
-	 * Find the file struct associated with the target fd.
-	 */
-
-	ret = -ESTALE;
-	files = get_files_struct(task_target);
-	if (files == NULL) {
-		pr_info("Failed to get target files struct\n");
-		goto out_task;
-	}
-
-	ret = -EBADF;
-	file = fget_from_files(files, eventfd_copy.target_fd);
-	put_files_struct(files);
-
-	if (file == NULL) {
-		pr_info("Failed to get fd %d from target\n",
-			eventfd_copy.target_fd);
-		goto out_task;
-	}
-
-	/*
-	 * Install the file struct from the target process into the
-	 * file desciptor of the source process,
-	 */
-
-	fd_install(eventfd_copy.source_fd, file);
-	ret = 0;
-
-out_task:
-	put_task_struct(task_target);
-out:
-	return ret;
-}
-
-static long
-eventfd_link_ioctl(struct file *f, unsigned int ioctl, unsigned long arg)
-{
-	long ret = -ENOIOCTLCMD;
-
-	switch (ioctl) {
-	case EVENTFD_COPY:
-		ret = eventfd_link_ioctl_copy(arg);
-		break;
-	case EVENTFD_COPY2:
-		ret = eventfd_link_ioctl_copy2(arg);
-		break;
-	}
-
-	return ret;
-}
-
-static const struct file_operations eventfd_link_fops = {
-	.owner = THIS_MODULE,
-	.unlocked_ioctl = eventfd_link_ioctl,
-};
-
-
-static struct miscdevice eventfd_link_misc = {
-	.minor = MISC_DYNAMIC_MINOR,
-	.name = "eventfd-link",
-	.fops = &eventfd_link_fops,
-};
-
-static int __init
-eventfd_link_init(void)
-{
-	return misc_register(&eventfd_link_misc);
-}
-
-module_init(eventfd_link_init);
-
-static void __exit
-eventfd_link_exit(void)
-{
-	misc_deregister(&eventfd_link_misc);
-}
-
-module_exit(eventfd_link_exit);
-
-MODULE_VERSION("0.0.1");
-MODULE_LICENSE("GPL v2");
-MODULE_AUTHOR("Anthony Fee");
-MODULE_DESCRIPTION("Link eventfd");
-MODULE_ALIAS("devname:eventfd-link");
diff --git a/lib/librte_vhost/eventfd_link/eventfd_link.h b/lib/librte_vhost/eventfd_link/eventfd_link.h
deleted file mode 100644
index 5ebc20b8..00000000
--- a/lib/librte_vhost/eventfd_link/eventfd_link.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/*-
- *  This file is provided under a dual BSD/GPLv2 license.  When using or
- *  redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *
- *   This program is free software; you can redistribute it and/or modify
- *   it under the terms of version 2 of the GNU General Public License as
- *   published by the Free Software Foundation.
- *
- *   This program is distributed in the hope that it will be useful, but
- *   WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *   General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program; if not, write to the Free Software
- *   Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *   The full GNU General Public License is included in this distribution
- *   in the file called LICENSE.GPL.
- *
- *   Contact Information:
- *   Intel Corporation
- *
- * BSD LICENSE
- *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *   Redistributions of source code must retain the above copyright
- *   notice, this list of conditions and the following disclaimer.
- *   Redistributions in binary form must reproduce the above copyright
- *   notice, this list of conditions and the following disclaimer in
- *   the documentation and/or other materials provided with the
- *   distribution.
- *   Neither the name of Intel Corporation nor the names of its
- *   contributors may be used to endorse or promote products derived
- *   from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#ifndef _EVENTFD_LINK_H_
-#define _EVENTFD_LINK_H_
-
-/*
- * arguements for the EVENTFD_COPY ioctl
- */
-struct eventfd_copy {
-	unsigned target_fd; /* fd in the target pid */
-	unsigned source_fd; /* fd in the calling pid */
-	pid_t target_pid; /* pid of the target pid */
-};
-
-/*
- * ioctl to copy an fd entry in calling process to an fd in a target process
- * NOTE: this one should be
- * #define EVENTFD_COPY _IOWR('D', 1, struct eventfd_copy) actually
- */
-#define EVENTFD_COPY 1
-
-/*
- * arguments for the EVENTFD_COPY2 ioctl
- */
-struct eventfd_copy2 {
-	unsigned fd; /* fd to steal */
-	pid_t pid; /* pid of the process to steal from */
-	unsigned flags; /* flags to allocate new fd with */
-};
-
-/*
- * ioctl to copy an fd entry from the target process into newly allocated
- * fd in the calling process
- */
-#define EVENTFD_COPY2 _IOW('D', 2, struct eventfd_copy2)
-
-#endif /* _EVENTFD_LINK_H_ */
diff --git a/lib/librte_vhost/vhost_user/fd_man.c b/lib/librte_vhost/fd_man.c
index 2d3eeb7d..2d3eeb7d 100644
--- a/lib/librte_vhost/vhost_user/fd_man.c
+++ b/lib/librte_vhost/fd_man.c
diff --git a/lib/librte_vhost/vhost_user/fd_man.h b/lib/librte_vhost/fd_man.h
index bd66ed1c..bd66ed1c 100644
--- a/lib/librte_vhost/vhost_user/fd_man.h
+++ b/lib/librte_vhost/fd_man.h
diff --git a/lib/librte_vhost/libvirt/qemu-wrap.py b/lib/librte_vhost/libvirt/qemu-wrap.py
deleted file mode 100755
index e6a2cc9d..00000000
--- a/lib/librte_vhost/libvirt/qemu-wrap.py
+++ /dev/null
@@ -1,387 +0,0 @@
-#!/usr/bin/python
-#/*
-# *   BSD LICENSE
-# *
-# *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
-# *   All rights reserved.
-# *
-# *   Redistribution and use in source and binary forms, with or without
-# *   modification, are permitted provided that the following conditions
-# *   are met:
-# *
-# *     * Redistributions of source code must retain the above copyright
-# *       notice, this list of conditions and the following disclaimer.
-# *     * Redistributions in binary form must reproduce the above copyright
-# *       notice, this list of conditions and the following disclaimer in
-# *       the documentation and/or other materials provided with the
-# *       distribution.
-# *     * Neither the name of Intel Corporation nor the names of its
-# *       contributors may be used to endorse or promote products derived
-# *       from this software without specific prior written permission.
-# *
-# *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-# */
-
-#####################################################################
-# This script is designed to modify the call to the QEMU emulator
-# to support userspace vhost when starting a guest machine through
-# libvirt with vhost enabled. The steps to enable this are as follows
-# and should be run as root:
-#
-# 1. Place this script in a libvirtd's binary search PATH ($PATH)
-#    A good location would be in the same directory that the QEMU
-#    binary is located
-#
-# 2. Ensure that the script has the same owner/group and file
-#    permissions as the QEMU binary
-#
-# 3. Update the VM xml file using "virsh edit VM.xml"
-#
-#    3.a) Set the VM to use the launch script
-#
-#	Set the emulator path contained in the
-#		<emulator><emulator/> tags
-#
-#	e.g replace <emulator>/usr/bin/qemu-kvm<emulator/>
-#        with    <emulator>/usr/bin/qemu-wrap.py<emulator/>
-#
-#	 3.b) Set the VM's device's to use vhost-net offload
-#
-#		<interface type="network">
-#	<model type="virtio"/>
-#	<driver name="vhost"/>
-#		<interface/>
-#
-# 4. Enable libvirt to access our userpace device file by adding it to
-#    controllers cgroup for libvirtd using the following steps
-#
-#   4.a) In /etc/libvirt/qemu.conf add/edit the following lines:
-#         1) cgroup_controllers = [ ... "devices", ... ]
-#		  2) clear_emulator_capabilities = 0
-#         3) user = "root"
-#         4) group = "root"
-#         5) cgroup_device_acl = [
-#                "/dev/null", "/dev/full", "/dev/zero",
-#                "/dev/random", "/dev/urandom",
-#                "/dev/ptmx", "/dev/kvm", "/dev/kqemu",
-#                "/dev/rtc", "/dev/hpet", "/dev/net/tun",
-#                "/dev/<devbase-name>",
-#                "/dev/hugepages",
-#            ]
-#
-#   4.b) Disable SELinux or set to permissive mode
-#
-#   4.c) Mount cgroup device controller
-#        "mkdir /dev/cgroup"
-#        "mount -t cgroup none /dev/cgroup -o devices"
-#
-#   4.d) Set hugetlbfs_mount variable - ( Optional )
-#        VMs using userspace vhost must use hugepage backed
-#        memory. This can be enabled in the libvirt XML
-#        config by adding a memory backing section to the
-#        XML config e.g.
-#             <memoryBacking>
-#             <hugepages/>
-#             </memoryBacking>
-#        This memory backing section should be added after the
-#        <memory> and <currentMemory> sections. This will add
-#        flags "-mem-prealloc -mem-path <path>" to the QEMU
-#        command line. The hugetlbfs_mount variable can be used
-#        to override the default <path> passed through by libvirt.
-#
-#        if "-mem-prealloc" or "-mem-path <path>" are not passed
-#        through and a vhost device is detected then these options will
-#        be automatically added by this script. This script will detect
-#        the system hugetlbfs mount point to be used for <path>. The
-#        default <path> for this script can be overidden by the
-#        hugetlbfs_dir variable in the configuration section of this script.
-#
-#
-#   4.e) Restart the libvirtd system process
-#        e.g. on Fedora "systemctl restart libvirtd.service"
-#
-#
-#   4.f) Edit the Configuration Parameters section of this script
-#        to point to the correct emulator location and set any
-#        addition options
-#
-# The script modifies the libvirtd Qemu call by modifying/adding
-# options based on the configuration parameters below.
-# NOTE:
-#     emul_path and us_vhost_path must be set
-#     All other parameters are optional
-#####################################################################
-
-
-#############################################
-# Configuration Parameters
-#############################################
-#Path to QEMU binary
-emul_path = "/usr/local/bin/qemu-system-x86_64"
-
-#Path to userspace vhost device file
-# This filename should match the --dev-basename parameters of
-# the command used to launch the userspace vhost sample application e.g.
-# if the sample app lauch command is:
-#    ./build/vhost-switch ..... --dev-basename usvhost
-# then this variable should be set to:
-#   us_vhost_path = "/dev/usvhost"
-us_vhost_path = "/dev/usvhost"
-
-#List of additional user defined emulation options. These options will
-#be added to all Qemu calls
-emul_opts_user = []
-
-#List of additional user defined emulation options for vhost only.
-#These options will only be added to vhost enabled guests
-emul_opts_user_vhost = []
-
-#For all VHOST enabled VMs, the VM memory is preallocated from hugetlbfs
-# Set this variable to one to enable this option for all VMs
-use_huge_all = 0
-
-#Instead of autodetecting, override the hugetlbfs directory by setting
-#this variable
-hugetlbfs_dir = ""
-
-#############################################
-
-
-#############################################
-# ****** Do Not Modify Below this Line ******
-#############################################
-
-import sys, os, subprocess
-import time
-import signal
-
-
-#List of open userspace vhost file descriptors
-fd_list = []
-
-#additional virtio device flags when using userspace vhost
-vhost_flags = [ "csum=off",
-                "gso=off",
-                "guest_tso4=off",
-                "guest_tso6=off",
-                "guest_ecn=off"
-              ]
-
-#String of the path to the Qemu process pid
-qemu_pid = "/tmp/%d-qemu.pid" % os.getpid()
-
-#############################################
-# Signal haldler to kill Qemu subprocess
-#############################################
-def kill_qemu_process(signum, stack):
-    pidfile = open(qemu_pid, 'r')
-    pid = int(pidfile.read())
-    os.killpg(pid, signal.SIGTERM)
-    pidfile.close()
-
-
-#############################################
-# Find the system hugefile mount point.
-# Note:
-# if multiple hugetlbfs mount points exist
-# then the first one found will be used
-#############################################
-def find_huge_mount():
-
-    if (len(hugetlbfs_dir)):
-        return hugetlbfs_dir
-
-    huge_mount = ""
-
-    if (os.access("/proc/mounts", os.F_OK)):
-        f = open("/proc/mounts", "r")
-        line = f.readline()
-        while line:
-            line_split = line.split(" ")
-            if line_split[2] == 'hugetlbfs':
-                huge_mount = line_split[1]
-                break
-            line = f.readline()
-    else:
-        print "/proc/mounts not found"
-        exit (1)
-
-    f.close
-    if len(huge_mount) == 0:
-        print "Failed to find hugetlbfs mount point"
-        exit (1)
-
-    return huge_mount
-
-
-#############################################
-# Get a userspace Vhost file descriptor
-#############################################
-def get_vhost_fd():
-
-    if (os.access(us_vhost_path, os.F_OK)):
-        fd = os.open( us_vhost_path, os.O_RDWR)
-    else:
-        print ("US-Vhost file %s not found" %us_vhost_path)
-        exit (1)
-
-    return fd
-
-
-#############################################
-# Check for vhostfd. if found then replace
-# with our own vhost fd and append any vhost
-# flags onto the end
-#############################################
-def modify_netdev_arg(arg):
-
-    global fd_list
-    vhost_in_use = 0
-    s = ''
-    new_opts = []
-    netdev_opts = arg.split(",")
-
-    for opt in netdev_opts:
-        #check if vhost is used
-        if "vhost" == opt[:5]:
-            vhost_in_use = 1
-        else:
-            new_opts.append(opt)
-
-    #if using vhost append vhost options
-    if vhost_in_use == 1:
-        #append vhost on option
-        new_opts.append('vhost=on')
-        #append vhostfd ption
-        new_fd = get_vhost_fd()
-        new_opts.append('vhostfd=' + str(new_fd))
-        fd_list.append(new_fd)
-
-    #concatenate all options
-    for opt in new_opts:
-        if len(s) > 0:
-			s+=','
-
-        s+=opt
-
-    return s
-
-
-#############################################
-# Main
-#############################################
-def main():
-
-    global fd_list
-    global vhost_in_use
-    new_args = []
-    num_cmd_args = len(sys.argv)
-    emul_call = ''
-    mem_prealloc_set = 0
-    mem_path_set = 0
-    num = 0;
-
-    #parse the parameters
-    while (num < num_cmd_args):
-        arg = sys.argv[num]
-
-	#Check netdev +1 parameter for vhostfd
-        if arg == '-netdev':
-            num_vhost_devs = len(fd_list)
-            new_args.append(arg)
-
-            num+=1
-            arg = sys.argv[num]
-            mod_arg = modify_netdev_arg(arg)
-            new_args.append(mod_arg)
-
-            #append vhost flags if this is a vhost device
-            # and -device is the next arg
-            # i.e -device -opt1,-opt2,...,-opt3,%vhost
-            if (num_vhost_devs < len(fd_list)):
-                num+=1
-                arg = sys.argv[num]
-                if arg == '-device':
-                    new_args.append(arg)
-                    num+=1
-                    new_arg = sys.argv[num]
-                    for flag in vhost_flags:
-                        new_arg = ''.join([new_arg,',',flag])
-                    new_args.append(new_arg)
-                else:
-                    new_args.append(arg)
-        elif arg == '-mem-prealloc':
-            mem_prealloc_set = 1
-            new_args.append(arg)
-        elif arg == '-mem-path':
-            mem_path_set = 1
-            new_args.append(arg)
-
-        else:
-            new_args.append(arg)
-
-        num+=1
-
-    #Set Qemu binary location
-    emul_call+=emul_path
-    emul_call+=" "
-
-    #Add prealloc mem options if using vhost and not already added
-    if ((len(fd_list) > 0) and (mem_prealloc_set == 0)):
-        emul_call += "-mem-prealloc "
-
-    #Add mempath mem options if using vhost and not already added
-    if ((len(fd_list) > 0) and (mem_path_set == 0)):
-        #Detect and add hugetlbfs mount point
-        mp = find_huge_mount()
-        mp = "".join(["-mem-path ", mp])
-        emul_call += mp
-        emul_call += " "
-
-    #add user options
-    for opt in emul_opts_user:
-        emul_call += opt
-        emul_call += " "
-
-    #Add add user vhost only options
-    if len(fd_list) > 0:
-        for opt in emul_opts_user_vhost:
-            emul_call += opt
-            emul_call += " "
-
-    #Add updated libvirt options
-    iter_args = iter(new_args)
-    #skip 1st arg i.e. call to this script
-    next(iter_args)
-    for arg in iter_args:
-        emul_call+=str(arg)
-        emul_call+= " "
-
-    emul_call += "-pidfile %s " % qemu_pid
-    #Call QEMU
-    process = subprocess.Popen(emul_call, shell=True, preexec_fn=os.setsid)
-
-    for sig in [signal.SIGTERM, signal.SIGINT, signal.SIGHUP, signal.SIGQUIT]:
-        signal.signal(sig, kill_qemu_process)
-
-    process.wait()
-
-    #Close usvhost files
-    for fd in fd_list:
-        os.close(fd)
-    #Cleanup temporary files
-    if os.access(qemu_pid, os.F_OK):
-        os.remove(qemu_pid)
-
-if __name__ == "__main__":
-    main()
diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
index 9caa6221..926039c5 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -53,16 +53,13 @@
 
 #define RTE_VHOST_USER_CLIENT		(1ULL << 0)
 #define RTE_VHOST_USER_NO_RECONNECT	(1ULL << 1)
+#define RTE_VHOST_USER_DEQUEUE_ZERO_COPY	(1ULL << 2)
 
 /* Enum for virtqueue management. */
 enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
 
 /**
  * Device and vring operations.
- *
- * Make sure to set VIRTIO_DEV_RUNNING to the device flags in new_device and
- * remove it in destroy_device.
- *
  */
 struct virtio_net_device_ops {
 	int (*new_device)(int vid);		/**< Add device. */
@@ -126,9 +123,8 @@ int rte_vhost_get_numa_node(int vid);
 uint32_t rte_vhost_get_queue_num(int vid);
 
 /**
- * Get the virtio net device's ifname. For vhost-cuse, ifname is the
- * path of the char device. For vhost-user, ifname is the vhost-user
- * socket file path.
+ * Get the virtio net device's ifname, which is the vhost-user socket
+ * file path.
  *
  * @param vid
  *  virtio-net device ID
diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c b/lib/librte_vhost/socket.c
index b35594d9..aaa9c270 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.c
+++ b/lib/librte_vhost/socket.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -47,12 +47,10 @@
 #include <pthread.h>
 
 #include <rte_log.h>
-#include <rte_virtio_net.h>
 
 #include "fd_man.h"
-#include "vhost-net-user.h"
-#include "vhost-net.h"
-#include "virtio-net-user.h"
+#include "vhost.h"
+#include "vhost_user.h"
 
 /*
  * Every time rte_vhost_driver_register() is invoked, an associated
@@ -64,6 +62,7 @@ struct vhost_user_socket {
 	int connfd;
 	bool is_server;
 	bool reconnect;
+	bool dequeue_zero_copy;
 };
 
 struct vhost_user_connection {
@@ -82,7 +81,7 @@ struct vhost_user {
 #define MAX_VIRTIO_BACKLOG 128
 
 static void vhost_user_server_new_connection(int fd, void *data, int *remove);
-static void vhost_user_msg_handler(int fd, void *dat, int *remove);
+static void vhost_user_read_cb(int fd, void *dat, int *remove);
 static int vhost_user_create_client(struct vhost_user_socket *vsocket);
 
 static struct vhost_user vhost_user = {
@@ -95,31 +94,8 @@ static struct vhost_user vhost_user = {
 	.mutex = PTHREAD_MUTEX_INITIALIZER,
 };
 
-static const char *vhost_message_str[VHOST_USER_MAX] = {
-	[VHOST_USER_NONE] = "VHOST_USER_NONE",
-	[VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES",
-	[VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES",
-	[VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER",
-	[VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER",
-	[VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE",
-	[VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE",
-	[VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD",
-	[VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM",
-	[VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR",
-	[VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE",
-	[VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE",
-	[VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK",
-	[VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL",
-	[VHOST_USER_SET_VRING_ERR]  = "VHOST_USER_SET_VRING_ERR",
-	[VHOST_USER_GET_PROTOCOL_FEATURES]  = "VHOST_USER_GET_PROTOCOL_FEATURES",
-	[VHOST_USER_SET_PROTOCOL_FEATURES]  = "VHOST_USER_SET_PROTOCOL_FEATURES",
-	[VHOST_USER_GET_QUEUE_NUM]  = "VHOST_USER_GET_QUEUE_NUM",
-	[VHOST_USER_SET_VRING_ENABLE]  = "VHOST_USER_SET_VRING_ENABLE",
-	[VHOST_USER_SEND_RARP]  = "VHOST_USER_SEND_RARP",
-};
-
 /* return bytes# of read on success or negative val on failure. */
-static int
+int
 read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
 {
 	struct iovec iov;
@@ -161,37 +137,7 @@ read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
 	return ret;
 }
 
-/* return bytes# of read on success or negative val on failure. */
-static int
-read_vhost_message(int sockfd, struct VhostUserMsg *msg)
-{
-	int ret;
-
-	ret = read_fd_message(sockfd, (char *)msg, VHOST_USER_HDR_SIZE,
-		msg->fds, VHOST_MEMORY_MAX_NREGIONS);
-	if (ret <= 0)
-		return ret;
-
-	if (msg && msg->size) {
-		if (msg->size > sizeof(msg->payload)) {
-			RTE_LOG(ERR, VHOST_CONFIG,
-				"invalid msg size: %d\n", msg->size);
-			return -1;
-		}
-		ret = read(sockfd, &msg->payload, msg->size);
-		if (ret <= 0)
-			return ret;
-		if (ret != (int)msg->size) {
-			RTE_LOG(ERR, VHOST_CONFIG,
-				"read control message failed\n");
-			return -1;
-		}
-	}
-
-	return ret;
-}
-
-static int
+int
 send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
 {
 
@@ -234,25 +180,6 @@ send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
 	return ret;
 }
 
-static int
-send_vhost_message(int sockfd, struct VhostUserMsg *msg)
-{
-	int ret;
-
-	if (!msg)
-		return 0;
-
-	msg->flags &= ~VHOST_USER_VERSION_MASK;
-	msg->flags |= VHOST_USER_VERSION;
-	msg->flags |= VHOST_USER_REPLY_MASK;
-
-	ret = send_fd_message(sockfd, (char *)msg,
-		VHOST_USER_HDR_SIZE + msg->size, NULL, 0);
-
-	return ret;
-}
-
-
 static void
 vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
 {
@@ -277,12 +204,15 @@ vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
 	size = strnlen(vsocket->path, PATH_MAX);
 	vhost_set_ifname(vid, vsocket->path, size);
 
+	if (vsocket->dequeue_zero_copy)
+		vhost_enable_dequeue_zero_copy(vid);
+
 	RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", vid);
 
 	vsocket->connfd = fd;
 	conn->vsocket = vsocket;
 	conn->vid = vid;
-	ret = fdset_add(&vhost_user.fdset, fd, vhost_user_msg_handler,
+	ret = fdset_add(&vhost_user.fdset, fd, vhost_user_read_cb,
 			NULL, conn);
 	if (ret < 0) {
 		vsocket->connfd = -1;
@@ -308,134 +238,23 @@ vhost_user_server_new_connection(int fd, void *dat, int *remove __rte_unused)
 	vhost_user_add_connection(fd, vsocket);
 }
 
-/* callback when there is message on the connfd */
 static void
-vhost_user_msg_handler(int connfd, void *dat, int *remove)
+vhost_user_read_cb(int connfd, void *dat, int *remove)
 {
-	int vid;
 	struct vhost_user_connection *conn = dat;
-	struct VhostUserMsg msg;
-	uint64_t features;
+	struct vhost_user_socket *vsocket = conn->vsocket;
 	int ret;
 
-	vid = conn->vid;
-	ret = read_vhost_message(connfd, &msg);
-	if (ret <= 0 || msg.request >= VHOST_USER_MAX) {
-		struct vhost_user_socket *vsocket = conn->vsocket;
-
-		if (ret < 0)
-			RTE_LOG(ERR, VHOST_CONFIG,
-				"vhost read message failed\n");
-		else if (ret == 0)
-			RTE_LOG(INFO, VHOST_CONFIG,
-				"vhost peer closed\n");
-		else
-			RTE_LOG(ERR, VHOST_CONFIG,
-				"vhost read incorrect message\n");
-
+	ret = vhost_user_msg_handler(conn->vid, connfd);
+	if (ret < 0) {
 		vsocket->connfd = -1;
 		close(connfd);
 		*remove = 1;
+		vhost_destroy_device(conn->vid);
 		free(conn);
-		vhost_destroy_device(vid);
 
 		if (vsocket->reconnect)
 			vhost_user_create_client(vsocket);
-
-		return;
-	}
-
-	RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n",
-		vhost_message_str[msg.request]);
-	switch (msg.request) {
-	case VHOST_USER_GET_FEATURES:
-		ret = vhost_get_features(vid, &features);
-		msg.payload.u64 = features;
-		msg.size = sizeof(msg.payload.u64);
-		send_vhost_message(connfd, &msg);
-		break;
-	case VHOST_USER_SET_FEATURES:
-		features = msg.payload.u64;
-		vhost_set_features(vid, &features);
-		break;
-
-	case VHOST_USER_GET_PROTOCOL_FEATURES:
-		msg.payload.u64 = VHOST_USER_PROTOCOL_FEATURES;
-		msg.size = sizeof(msg.payload.u64);
-		send_vhost_message(connfd, &msg);
-		break;
-	case VHOST_USER_SET_PROTOCOL_FEATURES:
-		user_set_protocol_features(vid, msg.payload.u64);
-		break;
-
-	case VHOST_USER_SET_OWNER:
-		vhost_set_owner(vid);
-		break;
-	case VHOST_USER_RESET_OWNER:
-		vhost_reset_owner(vid);
-		break;
-
-	case VHOST_USER_SET_MEM_TABLE:
-		user_set_mem_table(vid, &msg);
-		break;
-
-	case VHOST_USER_SET_LOG_BASE:
-		user_set_log_base(vid, &msg);
-
-		/* it needs a reply */
-		msg.size = sizeof(msg.payload.u64);
-		send_vhost_message(connfd, &msg);
-		break;
-	case VHOST_USER_SET_LOG_FD:
-		close(msg.fds[0]);
-		RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n");
-		break;
-
-	case VHOST_USER_SET_VRING_NUM:
-		vhost_set_vring_num(vid, &msg.payload.state);
-		break;
-	case VHOST_USER_SET_VRING_ADDR:
-		vhost_set_vring_addr(vid, &msg.payload.addr);
-		break;
-	case VHOST_USER_SET_VRING_BASE:
-		vhost_set_vring_base(vid, &msg.payload.state);
-		break;
-
-	case VHOST_USER_GET_VRING_BASE:
-		ret = user_get_vring_base(vid, &msg.payload.state);
-		msg.size = sizeof(msg.payload.state);
-		send_vhost_message(connfd, &msg);
-		break;
-
-	case VHOST_USER_SET_VRING_KICK:
-		user_set_vring_kick(vid, &msg);
-		break;
-	case VHOST_USER_SET_VRING_CALL:
-		user_set_vring_call(vid, &msg);
-		break;
-
-	case VHOST_USER_SET_VRING_ERR:
-		if (!(msg.payload.u64 & VHOST_USER_VRING_NOFD_MASK))
-			close(msg.fds[0]);
-		RTE_LOG(INFO, VHOST_CONFIG, "not implemented\n");
-		break;
-
-	case VHOST_USER_GET_QUEUE_NUM:
-		msg.payload.u64 = VHOST_MAX_QUEUE_PAIRS;
-		msg.size = sizeof(msg.payload.u64);
-		send_vhost_message(connfd, &msg);
-		break;
-
-	case VHOST_USER_SET_VRING_ENABLE:
-		user_set_vring_enable(vid, &msg.payload.state);
-		break;
-	case VHOST_USER_SEND_RARP:
-		user_send_rarp(vid, &msg);
-		break;
-
-	default:
-		break;
-
 	}
 }
 
@@ -684,6 +503,7 @@ rte_vhost_driver_register(const char *path, uint64_t flags)
 	memset(vsocket, 0, sizeof(struct vhost_user_socket));
 	vsocket->path = strdup(path);
 	vsocket->connfd = -1;
+	vsocket->dequeue_zero_copy = flags & RTE_VHOST_USER_DEQUEUE_ZERO_COPY;
 
 	if ((flags & RTE_VHOST_USER_CLIENT) != 0) {
 		vsocket->reconnect = !(flags & RTE_VHOST_USER_NO_RECONNECT);
diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
new file mode 100644
index 00000000..31825b82
--- /dev/null
+++ b/lib/librte_vhost/vhost.c
@@ -0,0 +1,430 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/vhost.h>
+#include <linux/virtio_net.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#ifdef RTE_LIBRTE_VHOST_NUMA
+#include <numaif.h>
+#endif
+
+#include <rte_ethdev.h>
+#include <rte_log.h>
+#include <rte_string_fns.h>
+#include <rte_memory.h>
+#include <rte_malloc.h>
+#include <rte_virtio_net.h>
+
+#include "vhost.h"
+
+#define VHOST_USER_F_PROTOCOL_FEATURES	30
+
+/* Features supported by this lib. */
+#define VHOST_SUPPORTED_FEATURES ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | \
+				(1ULL << VIRTIO_NET_F_CTRL_VQ) | \
+				(1ULL << VIRTIO_NET_F_CTRL_RX) | \
+				(1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) | \
+				(VHOST_SUPPORTS_MQ)            | \
+				(1ULL << VIRTIO_F_VERSION_1)   | \
+				(1ULL << VHOST_F_LOG_ALL)      | \
+				(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
+				(1ULL << VIRTIO_NET_F_HOST_TSO4) | \
+				(1ULL << VIRTIO_NET_F_HOST_TSO6) | \
+				(1ULL << VIRTIO_NET_F_CSUM)    | \
+				(1ULL << VIRTIO_NET_F_GUEST_CSUM) | \
+				(1ULL << VIRTIO_NET_F_GUEST_TSO4) | \
+				(1ULL << VIRTIO_NET_F_GUEST_TSO6) | \
+				(1ULL << VIRTIO_RING_F_INDIRECT_DESC))
+
+uint64_t VHOST_FEATURES = VHOST_SUPPORTED_FEATURES;
+
+struct virtio_net *vhost_devices[MAX_VHOST_DEVICE];
+
+/* device ops to add/remove device to/from data core. */
+struct virtio_net_device_ops const *notify_ops;
+
+struct virtio_net *
+get_device(int vid)
+{
+	struct virtio_net *dev = vhost_devices[vid];
+
+	if (unlikely(!dev)) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"(%d) device not found.\n", vid);
+	}
+
+	return dev;
+}
+
+static void
+cleanup_vq(struct vhost_virtqueue *vq, int destroy)
+{
+	if ((vq->callfd >= 0) && (destroy != 0))
+		close(vq->callfd);
+	if (vq->kickfd >= 0)
+		close(vq->kickfd);
+}
+
+/*
+ * Unmap any memory, close any file descriptors and
+ * free any memory owned by a device.
+ */
+void
+cleanup_device(struct virtio_net *dev, int destroy)
+{
+	uint32_t i;
+
+	vhost_backend_cleanup(dev);
+
+	for (i = 0; i < dev->virt_qp_nb; i++) {
+		cleanup_vq(dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_RXQ], destroy);
+		cleanup_vq(dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_TXQ], destroy);
+	}
+}
+
+/*
+ * Release virtqueues and device memory.
+ */
+static void
+free_device(struct virtio_net *dev)
+{
+	uint32_t i;
+	struct vhost_virtqueue *rxq, *txq;
+
+	for (i = 0; i < dev->virt_qp_nb; i++) {
+		rxq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_RXQ];
+		txq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_TXQ];
+
+		rte_free(rxq->shadow_used_ring);
+		rte_free(txq->shadow_used_ring);
+
+		/* rxq and txq are allocated together as queue-pair */
+		rte_free(rxq);
+	}
+
+	rte_free(dev);
+}
+
+static void
+init_vring_queue(struct vhost_virtqueue *vq, int qp_idx)
+{
+	memset(vq, 0, sizeof(struct vhost_virtqueue));
+
+	vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
+	vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
+
+	/* Backends are set to -1 indicating an inactive device. */
+	vq->backend = -1;
+
+	/* always set the default vq pair to enabled */
+	if (qp_idx == 0)
+		vq->enabled = 1;
+
+	TAILQ_INIT(&vq->zmbuf_list);
+}
+
+static void
+init_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx)
+{
+	uint32_t base_idx = qp_idx * VIRTIO_QNUM;
+
+	init_vring_queue(dev->virtqueue[base_idx + VIRTIO_RXQ], qp_idx);
+	init_vring_queue(dev->virtqueue[base_idx + VIRTIO_TXQ], qp_idx);
+}
+
+static void
+reset_vring_queue(struct vhost_virtqueue *vq, int qp_idx)
+{
+	int callfd;
+
+	callfd = vq->callfd;
+	init_vring_queue(vq, qp_idx);
+	vq->callfd = callfd;
+}
+
+static void
+reset_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx)
+{
+	uint32_t base_idx = qp_idx * VIRTIO_QNUM;
+
+	reset_vring_queue(dev->virtqueue[base_idx + VIRTIO_RXQ], qp_idx);
+	reset_vring_queue(dev->virtqueue[base_idx + VIRTIO_TXQ], qp_idx);
+}
+
+int
+alloc_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx)
+{
+	struct vhost_virtqueue *virtqueue = NULL;
+	uint32_t virt_rx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_RXQ;
+	uint32_t virt_tx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_TXQ;
+
+	virtqueue = rte_malloc(NULL,
+			       sizeof(struct vhost_virtqueue) * VIRTIO_QNUM, 0);
+	if (virtqueue == NULL) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Failed to allocate memory for virt qp:%d.\n", qp_idx);
+		return -1;
+	}
+
+	dev->virtqueue[virt_rx_q_idx] = virtqueue;
+	dev->virtqueue[virt_tx_q_idx] = virtqueue + VIRTIO_TXQ;
+
+	init_vring_queue_pair(dev, qp_idx);
+
+	dev->virt_qp_nb += 1;
+
+	return 0;
+}
+
+/*
+ * Reset some variables in device structure, while keeping few
+ * others untouched, such as vid, ifname, virt_qp_nb: they
+ * should be same unless the device is removed.
+ */
+void
+reset_device(struct virtio_net *dev)
+{
+	uint32_t i;
+
+	dev->features = 0;
+	dev->protocol_features = 0;
+	dev->flags = 0;
+
+	for (i = 0; i < dev->virt_qp_nb; i++)
+		reset_vring_queue_pair(dev, i);
+}
+
+/*
+ * Invoked when there is a new vhost-user connection established (when
+ * there is a new virtio device being attached).
+ */
+int
+vhost_new_device(void)
+{
+	struct virtio_net *dev;
+	int i;
+
+	dev = rte_zmalloc(NULL, sizeof(struct virtio_net), 0);
+	if (dev == NULL) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Failed to allocate memory for new dev.\n");
+		return -1;
+	}
+
+	for (i = 0; i < MAX_VHOST_DEVICE; i++) {
+		if (vhost_devices[i] == NULL)
+			break;
+	}
+	if (i == MAX_VHOST_DEVICE) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Failed to find a free slot for new device.\n");
+		return -1;
+	}
+
+	vhost_devices[i] = dev;
+	dev->vid = i;
+
+	return i;
+}
+
+/*
+ * Invoked when there is the vhost-user connection is broken (when
+ * the virtio device is being detached).
+ */
+void
+vhost_destroy_device(int vid)
+{
+	struct virtio_net *dev = get_device(vid);
+
+	if (dev == NULL)
+		return;
+
+	if (dev->flags & VIRTIO_DEV_RUNNING) {
+		dev->flags &= ~VIRTIO_DEV_RUNNING;
+		notify_ops->destroy_device(vid);
+	}
+
+	cleanup_device(dev, 1);
+	free_device(dev);
+
+	vhost_devices[vid] = NULL;
+}
+
+void
+vhost_set_ifname(int vid, const char *if_name, unsigned int if_len)
+{
+	struct virtio_net *dev;
+	unsigned int len;
+
+	dev = get_device(vid);
+	if (dev == NULL)
+		return;
+
+	len = if_len > sizeof(dev->ifname) ?
+		sizeof(dev->ifname) : if_len;
+
+	strncpy(dev->ifname, if_name, len);
+	dev->ifname[sizeof(dev->ifname) - 1] = '\0';
+}
+
+void
+vhost_enable_dequeue_zero_copy(int vid)
+{
+	struct virtio_net *dev = get_device(vid);
+
+	if (dev == NULL)
+		return;
+
+	dev->dequeue_zero_copy = 1;
+}
+
+int
+rte_vhost_get_numa_node(int vid)
+{
+#ifdef RTE_LIBRTE_VHOST_NUMA
+	struct virtio_net *dev = get_device(vid);
+	int numa_node;
+	int ret;
+
+	if (dev == NULL)
+		return -1;
+
+	ret = get_mempolicy(&numa_node, NULL, 0, dev,
+			    MPOL_F_NODE | MPOL_F_ADDR);
+	if (ret < 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"(%d) failed to query numa node: %d\n", vid, ret);
+		return -1;
+	}
+
+	return numa_node;
+#else
+	RTE_SET_USED(vid);
+	return -1;
+#endif
+}
+
+uint32_t
+rte_vhost_get_queue_num(int vid)
+{
+	struct virtio_net *dev = get_device(vid);
+
+	if (dev == NULL)
+		return 0;
+
+	return dev->virt_qp_nb;
+}
+
+int
+rte_vhost_get_ifname(int vid, char *buf, size_t len)
+{
+	struct virtio_net *dev = get_device(vid);
+
+	if (dev == NULL)
+		return -1;
+
+	len = RTE_MIN(len, sizeof(dev->ifname));
+
+	strncpy(buf, dev->ifname, len);
+	buf[len - 1] = '\0';
+
+	return 0;
+}
+
+uint16_t
+rte_vhost_avail_entries(int vid, uint16_t queue_id)
+{
+	struct virtio_net *dev;
+	struct vhost_virtqueue *vq;
+
+	dev = get_device(vid);
+	if (!dev)
+		return 0;
+
+	vq = dev->virtqueue[queue_id];
+	if (!vq->enabled)
+		return 0;
+
+	return *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx;
+}
+
+int
+rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable)
+{
+	struct virtio_net *dev = get_device(vid);
+
+	if (dev == NULL)
+		return -1;
+
+	if (enable) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"guest notification isn't supported.\n");
+		return -1;
+	}
+
+	dev->virtqueue[queue_id]->used->flags = VRING_USED_F_NO_NOTIFY;
+	return 0;
+}
+
+uint64_t rte_vhost_feature_get(void)
+{
+	return VHOST_FEATURES;
+}
+
+int rte_vhost_feature_disable(uint64_t feature_mask)
+{
+	VHOST_FEATURES = VHOST_FEATURES & ~feature_mask;
+	return 0;
+}
+
+int rte_vhost_feature_enable(uint64_t feature_mask)
+{
+	if ((feature_mask & VHOST_SUPPORTED_FEATURES) == feature_mask) {
+		VHOST_FEATURES = VHOST_FEATURES | feature_mask;
+		return 0;
+	}
+	return -1;
+}
+
+/*
+ * Register ops so that we can add/remove device to data core.
+ */
+int
+rte_vhost_driver_callback_register(struct virtio_net_device_ops const * const ops)
+{
+	notify_ops = ops;
+
+	return 0;
+}
diff --git a/lib/librte_vhost/vhost-net.h b/lib/librte_vhost/vhost.h
index 38593a29..22564f1c 100644
--- a/lib/librte_vhost/vhost-net.h
+++ b/lib/librte_vhost/vhost.h
@@ -36,6 +36,7 @@
 #include <stdint.h>
 #include <stdio.h>
 #include <sys/types.h>
+#include <sys/queue.h>
 #include <unistd.h>
 #include <linux/vhost.h>
 
@@ -61,6 +62,19 @@ struct buf_vector {
 	uint32_t desc_idx;
 };
 
+/*
+ * A structure to hold some fields needed in zero copy code path,
+ * mainly for associating an mbuf with the right desc_idx.
+ */
+struct zcopy_mbuf {
+	struct rte_mbuf *mbuf;
+	uint32_t desc_idx;
+	uint16_t in_use;
+
+	TAILQ_ENTRY(zcopy_mbuf) next;
+};
+TAILQ_HEAD(zcopy_mbuf_list, zcopy_mbuf);
+
 /**
  * Structure contains variables relevant to RX/TX virtqueues.
  */
@@ -70,8 +84,8 @@ struct vhost_virtqueue {
 	struct vring_used	*used;
 	uint32_t		size;
 
-	/* Last index used on the available ring */
-	volatile uint16_t	last_used_idx;
+	uint16_t		last_avail_idx;
+	uint16_t		last_used_idx;
 #define VIRTIO_INVALID_EVENTFD		(-1)
 #define VIRTIO_UNINITIALIZED_EVENTFD	(-2)
 
@@ -85,6 +99,15 @@ struct vhost_virtqueue {
 
 	/* Physical address of used ring, for logging */
 	uint64_t		log_guest_addr;
+
+	uint16_t		nr_zmbuf;
+	uint16_t		zmbuf_size;
+	uint16_t		last_zmbuf_idx;
+	struct zcopy_mbuf	*zmbufs;
+	struct zcopy_mbuf_list	zmbuf_list;
+
+	struct vring_used_elem  *shadow_used_ring;
+	uint16_t                shadow_used_idx;
 } __rte_cache_aligned;
 
 /* Old kernels have no such macro defined */
@@ -114,6 +137,12 @@ struct vhost_virtqueue {
  #define VIRTIO_F_VERSION_1 32
 #endif
 
+struct guest_page {
+	uint64_t guest_phys_addr;
+	uint64_t host_phys_addr;
+	uint64_t size;
+};
+
 /**
  * Device structure contains all configuration information relating
  * to the device.
@@ -129,6 +158,7 @@ struct virtio_net {
 	/* to tell if we need broadcast rarp packet */
 	rte_atomic16_t		broadcast_rarp;
 	uint32_t		virt_qp_nb;
+	int			dequeue_zero_copy;
 	struct vhost_virtqueue	*virtqueue[VHOST_MAX_QUEUE_PAIRS * 2];
 #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ)
 	char			ifname[IF_NAME_SZ];
@@ -137,18 +167,23 @@ struct virtio_net {
 	uint64_t		log_addr;
 	struct ether_addr	mac;
 
+	uint32_t		nr_guest_pages;
+	uint32_t		max_guest_pages;
+	struct guest_page       *guest_pages;
 } __rte_cache_aligned;
 
 /**
  * Information relating to memory regions including offsets to
  * addresses in QEMUs memory file.
  */
-struct virtio_memory_regions {
-	uint64_t guest_phys_address;
-	uint64_t guest_phys_address_end;
-	uint64_t memory_size;
-	uint64_t userspace_address;
-	uint64_t address_offset;
+struct virtio_memory_region {
+	uint64_t guest_phys_addr;
+	uint64_t guest_user_addr;
+	uint64_t host_user_addr;
+	uint64_t size;
+	void	 *mmap_addr;
+	uint64_t mmap_size;
+	int fd;
 };
 
 
@@ -156,12 +191,8 @@ struct virtio_memory_regions {
  * Memory structure includes region and mapping information.
  */
 struct virtio_memory {
-	/* Base QEMU userspace address of the memory file. */
-	uint64_t base_address;
-	uint64_t mapped_address;
-	uint64_t mapped_size;
 	uint32_t nregions;
-	struct virtio_memory_regions regions[0];
+	struct virtio_memory_region regions[0];
 };
 
 
@@ -196,54 +227,66 @@ struct virtio_memory {
 #define PRINT_PACKET(device, addr, size, header) do {} while (0)
 #endif
 
-/**
- * Function to convert guest physical addresses to vhost virtual addresses.
- * This is used to convert guest virtio buffer addresses.
- */
+extern uint64_t VHOST_FEATURES;
+#define MAX_VHOST_DEVICE	1024
+extern struct virtio_net *vhost_devices[MAX_VHOST_DEVICE];
+
+/* Convert guest physical Address to host virtual address */
 static inline uint64_t __attribute__((always_inline))
-gpa_to_vva(struct virtio_net *dev, uint64_t guest_pa)
+gpa_to_vva(struct virtio_net *dev, uint64_t gpa)
 {
-	struct virtio_memory_regions *region;
-	uint32_t regionidx;
-	uint64_t vhost_va = 0;
-
-	for (regionidx = 0; regionidx < dev->mem->nregions; regionidx++) {
-		region = &dev->mem->regions[regionidx];
-		if ((guest_pa >= region->guest_phys_address) &&
-			(guest_pa <= region->guest_phys_address_end)) {
-			vhost_va = region->address_offset + guest_pa;
-			break;
+	struct virtio_memory_region *reg;
+	uint32_t i;
+
+	for (i = 0; i < dev->mem->nregions; i++) {
+		reg = &dev->mem->regions[i];
+		if (gpa >= reg->guest_phys_addr &&
+		    gpa <  reg->guest_phys_addr + reg->size) {
+			return gpa - reg->guest_phys_addr +
+			       reg->host_user_addr;
 		}
 	}
-	return vhost_va;
+
+	return 0;
 }
 
-struct virtio_net_device_ops const *notify_ops;
-struct virtio_net *get_device(int vid);
+/* Convert guest physical address to host physical address */
+static inline phys_addr_t __attribute__((always_inline))
+gpa_to_hpa(struct virtio_net *dev, uint64_t gpa, uint64_t size)
+{
+	uint32_t i;
+	struct guest_page *page;
 
-int vhost_new_device(void);
-void vhost_destroy_device(int);
+	for (i = 0; i < dev->nr_guest_pages; i++) {
+		page = &dev->guest_pages[i];
 
-void vhost_set_ifname(int, const char *if_name, unsigned int if_len);
+		if (gpa >= page->guest_phys_addr &&
+		    gpa + size < page->guest_phys_addr + page->size) {
+			return gpa - page->guest_phys_addr +
+			       page->host_phys_addr;
+		}
+	}
 
-int vhost_get_features(int, uint64_t *);
-int vhost_set_features(int, uint64_t *);
+	return 0;
+}
 
-int vhost_set_vring_num(int, struct vhost_vring_state *);
-int vhost_set_vring_addr(int, struct vhost_vring_addr *);
-int vhost_set_vring_base(int, struct vhost_vring_state *);
-int vhost_get_vring_base(int, uint32_t, struct vhost_vring_state *);
+struct virtio_net_device_ops const *notify_ops;
+struct virtio_net *get_device(int vid);
 
-int vhost_set_vring_kick(int, struct vhost_vring_file *);
-int vhost_set_vring_call(int, struct vhost_vring_file *);
+int vhost_new_device(void);
+void cleanup_device(struct virtio_net *dev, int destroy);
+void reset_device(struct virtio_net *dev);
+void vhost_destroy_device(int);
 
-int vhost_set_backend(int, struct vhost_vring_file *);
+int alloc_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx);
 
-int vhost_set_owner(int);
-int vhost_reset_owner(int);
+void vhost_set_ifname(int, const char *if_name, unsigned int if_len);
+void vhost_enable_dequeue_zero_copy(int vid);
 
 /*
- * Backend-specific cleanup. Defined by vhost-cuse and vhost-user.
+ * Backend-specific cleanup.
+ *
+ * TODO: fix it; we have one backend now
  */
 void vhost_backend_cleanup(struct virtio_net *dev);
 
diff --git a/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c b/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
deleted file mode 100644
index 5d150116..00000000
--- a/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
+++ /dev/null
@@ -1,431 +0,0 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <errno.h>
-#include <fuse/cuse_lowlevel.h>
-#include <linux/limits.h>
-#include <linux/vhost.h>
-#include <stdint.h>
-#include <string.h>
-#include <unistd.h>
-
-#include <rte_ethdev.h>
-#include <rte_log.h>
-#include <rte_string_fns.h>
-#include <rte_virtio_net.h>
-
-#include "virtio-net-cdev.h"
-#include "vhost-net.h"
-#include "eventfd_copy.h"
-
-#define FUSE_OPT_DUMMY "\0\0"
-#define FUSE_OPT_FORE  "-f\0\0"
-#define FUSE_OPT_NOMULTI "-s\0\0"
-
-static const uint32_t default_major = 231;
-static const uint32_t default_minor = 1;
-static const char cuse_device_name[] = "/dev/cuse";
-static const char default_cdev[] = "vhost-net";
-
-static struct fuse_session *session;
-
-/*
- * Returns vhost_cuse_device_ctx from given fuse_req_t. The
- * index is populated later when the device is added to the
- * device linked list.
- */
-static struct vhost_cuse_device_ctx
-fuse_req_to_vhost_ctx(fuse_req_t req, struct fuse_file_info *fi)
-{
-	struct vhost_cuse_device_ctx ctx;
-	struct fuse_ctx const *const req_ctx = fuse_req_ctx(req);
-
-	ctx.pid = req_ctx->pid;
-	ctx.vid = (int)fi->fh;
-
-	return ctx;
-}
-
-/*
- * When the device is created in QEMU it gets initialised here and
- * added to the device linked list.
- */
-static void
-vhost_net_open(fuse_req_t req, struct fuse_file_info *fi)
-{
-	int vid = 0;
-
-	vid = vhost_new_device();
-	if (vid == -1) {
-		fuse_reply_err(req, EPERM);
-		return;
-	}
-
-	fi->fh = vid;
-
-	RTE_LOG(INFO, VHOST_CONFIG,
-		"(%d) device configuration started\n", vid);
-	fuse_reply_open(req, fi);
-}
-
-/*
- * When QEMU is shutdown or killed the device gets released.
- */
-static void
-vhost_net_release(fuse_req_t req, struct fuse_file_info *fi)
-{
-	int err = 0;
-	struct vhost_cuse_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
-
-	vhost_destroy_device(ctx.vid);
-	RTE_LOG(INFO, VHOST_CONFIG, "(%d) device released\n", ctx.vid);
-	fuse_reply_err(req, err);
-}
-
-/*
- * Boilerplate code for CUSE IOCTL
- * Implicit arguments: vid, req, result.
- */
-#define VHOST_IOCTL(func) do {	\
-	result = (func)(vid);	\
-	fuse_reply_ioctl(req, result, NULL, 0);	\
-} while (0)
-
-/*
- * Boilerplate IOCTL RETRY
- * Implicit arguments: req.
- */
-#define VHOST_IOCTL_RETRY(size_r, size_w) do {	\
-	struct iovec iov_r = { arg, (size_r) };	\
-	struct iovec iov_w = { arg, (size_w) };	\
-	fuse_reply_ioctl_retry(req, &iov_r,	\
-		(size_r) ? 1 : 0, &iov_w, (size_w) ? 1 : 0);\
-} while (0)
-
-/*
- * Boilerplate code for CUSE Read IOCTL
- * Implicit arguments: vid, req, result, in_bufsz, in_buf.
- */
-#define VHOST_IOCTL_R(type, var, func) do {	\
-	if (!in_bufsz) {	\
-		VHOST_IOCTL_RETRY(sizeof(type), 0);\
-	} else {	\
-		(var) = *(const type*)in_buf;	\
-		result = func(vid, &(var));	\
-		fuse_reply_ioctl(req, result, NULL, 0);\
-	}	\
-} while (0)
-
-/*
- * Boilerplate code for CUSE Write IOCTL
- * Implicit arguments: vid, req, result, out_bufsz.
- */
-#define VHOST_IOCTL_W(type, var, func) do {	\
-	if (!out_bufsz) {	\
-		VHOST_IOCTL_RETRY(0, sizeof(type));\
-	} else {	\
-		result = (func)(vid, &(var));\
-		fuse_reply_ioctl(req, result, &(var), sizeof(type));\
-	} \
-} while (0)
-
-/*
- * Boilerplate code for CUSE Read/Write IOCTL
- * Implicit arguments: vid, req, result, in_bufsz, in_buf.
- */
-#define VHOST_IOCTL_RW(type1, var1, type2, var2, func) do {	\
-	if (!in_bufsz) {	\
-		VHOST_IOCTL_RETRY(sizeof(type1), sizeof(type2));\
-	} else {	\
-		(var1) = *(const type1*) (in_buf);	\
-		result = (func)(vid, (var1), &(var2));	\
-		fuse_reply_ioctl(req, result, &(var2), sizeof(type2));\
-	}	\
-} while (0)
-
-/*
- * The IOCTLs are handled using CUSE/FUSE in userspace. Depending on the type
- * of IOCTL a buffer is requested to read or to write. This request is handled
- * by FUSE and the buffer is then given to CUSE.
- */
-static void
-vhost_net_ioctl(fuse_req_t req, int cmd, void *arg,
-		struct fuse_file_info *fi, __rte_unused unsigned flags,
-		const void *in_buf, size_t in_bufsz, size_t out_bufsz)
-{
-	struct vhost_cuse_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
-	struct vhost_vring_file file;
-	struct vhost_vring_state state;
-	struct vhost_vring_addr addr;
-	uint64_t features;
-	uint32_t index;
-	int result = 0;
-	int vid = ctx.vid;
-
-	switch (cmd) {
-	case VHOST_NET_SET_BACKEND:
-		LOG_DEBUG(VHOST_CONFIG,
-			"(%d) IOCTL: VHOST_NET_SET_BACKEND\n", ctx.vid);
-		if (!in_buf) {
-			VHOST_IOCTL_RETRY(sizeof(file), 0);
-			break;
-		}
-		file = *(const struct vhost_vring_file *)in_buf;
-		result = cuse_set_backend(ctx, &file);
-		fuse_reply_ioctl(req, result, NULL, 0);
-		break;
-
-	case VHOST_GET_FEATURES:
-		LOG_DEBUG(VHOST_CONFIG,
-			"(%d) IOCTL: VHOST_GET_FEATURES\n", vid);
-		VHOST_IOCTL_W(uint64_t, features, vhost_get_features);
-		break;
-
-	case VHOST_SET_FEATURES:
-		LOG_DEBUG(VHOST_CONFIG,
-			"(%d) IOCTL: VHOST_SET_FEATURES\n", vid);
-		VHOST_IOCTL_R(uint64_t, features, vhost_set_features);
-		break;
-
-	case VHOST_RESET_OWNER:
-		LOG_DEBUG(VHOST_CONFIG,
-			"(%d) IOCTL: VHOST_RESET_OWNER\n", vid);
-		VHOST_IOCTL(vhost_reset_owner);
-		break;
-
-	case VHOST_SET_OWNER:
-		LOG_DEBUG(VHOST_CONFIG,
-			"(%d) IOCTL: VHOST_SET_OWNER\n", vid);
-		VHOST_IOCTL(vhost_set_owner);
-		break;
-
-	case VHOST_SET_MEM_TABLE:
-		/*TODO fix race condition.*/
-		LOG_DEBUG(VHOST_CONFIG,
-			"(%d) IOCTL: VHOST_SET_MEM_TABLE\n", vid);
-		static struct vhost_memory mem_temp;
-
-		switch (in_bufsz) {
-		case 0:
-			VHOST_IOCTL_RETRY(sizeof(struct vhost_memory), 0);
-			break;
-
-		case sizeof(struct vhost_memory):
-			mem_temp = *(const struct vhost_memory *) in_buf;
-
-			if (mem_temp.nregions > 0) {
-				VHOST_IOCTL_RETRY(sizeof(struct vhost_memory) +
-					(sizeof(struct vhost_memory_region) *
-						mem_temp.nregions), 0);
-			} else {
-				result = -1;
-				fuse_reply_ioctl(req, result, NULL, 0);
-			}
-			break;
-
-		default:
-			result = cuse_set_mem_table(ctx, in_buf,
-				mem_temp.nregions);
-			if (result)
-				fuse_reply_err(req, EINVAL);
-			else
-				fuse_reply_ioctl(req, result, NULL, 0);
-		}
-		break;
-
-	case VHOST_SET_VRING_NUM:
-		LOG_DEBUG(VHOST_CONFIG,
-			"(%d) IOCTL: VHOST_SET_VRING_NUM\n", vid);
-		VHOST_IOCTL_R(struct vhost_vring_state, state,
-			vhost_set_vring_num);
-		break;
-
-	case VHOST_SET_VRING_BASE:
-		LOG_DEBUG(VHOST_CONFIG,
-			"(%d) IOCTL: VHOST_SET_VRING_BASE\n", vid);
-		VHOST_IOCTL_R(struct vhost_vring_state, state,
-			vhost_set_vring_base);
-		break;
-
-	case VHOST_GET_VRING_BASE:
-		LOG_DEBUG(VHOST_CONFIG,
-			"(%d) IOCTL: VHOST_GET_VRING_BASE\n", vid);
-		VHOST_IOCTL_RW(uint32_t, index,
-			struct vhost_vring_state, state, vhost_get_vring_base);
-		break;
-
-	case VHOST_SET_VRING_ADDR:
-		LOG_DEBUG(VHOST_CONFIG,
-			"(%d) IOCTL: VHOST_SET_VRING_ADDR\n", vid);
-		VHOST_IOCTL_R(struct vhost_vring_addr, addr,
-			vhost_set_vring_addr);
-		break;
-
-	case VHOST_SET_VRING_KICK:
-	case VHOST_SET_VRING_CALL:
-		if (cmd == VHOST_SET_VRING_KICK)
-			LOG_DEBUG(VHOST_CONFIG,
-				"(%d) IOCTL: VHOST_SET_VRING_KICK\n", vid);
-		else
-			LOG_DEBUG(VHOST_CONFIG,
-				"(%d) IOCTL: VHOST_SET_VRING_CALL\n", vid);
-		if (!in_buf)
-			VHOST_IOCTL_RETRY(sizeof(struct vhost_vring_file), 0);
-		else {
-			int fd;
-			file = *(const struct vhost_vring_file *)in_buf;
-			LOG_DEBUG(VHOST_CONFIG,
-				"idx:%d fd:%d\n", file.index, file.fd);
-			fd = eventfd_copy(file.fd, ctx.pid);
-			if (fd < 0) {
-				fuse_reply_ioctl(req, -1, NULL, 0);
-				result = -1;
-				break;
-			}
-			file.fd = fd;
-			if (cmd == VHOST_SET_VRING_KICK) {
-				result = vhost_set_vring_kick(vid, &file);
-				fuse_reply_ioctl(req, result, NULL, 0);
-			} else {
-				result = vhost_set_vring_call(vid, &file);
-				fuse_reply_ioctl(req, result, NULL, 0);
-			}
-		}
-		break;
-
-	default:
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"(%d) IOCTL: DOESN NOT EXIST\n", vid);
-		result = -1;
-		fuse_reply_ioctl(req, result, NULL, 0);
-	}
-
-	if (result < 0)
-		LOG_DEBUG(VHOST_CONFIG,
-			"(%d) IOCTL: FAIL\n", vid);
-	else
-		LOG_DEBUG(VHOST_CONFIG,
-			"(%d) IOCTL: SUCCESS\n", vid);
-}
-
-/*
- * Structure handling open, release and ioctl function pointers is populated.
- */
-static const struct cuse_lowlevel_ops vhost_net_ops = {
-	.open		= vhost_net_open,
-	.release	= vhost_net_release,
-	.ioctl		= vhost_net_ioctl,
-};
-
-/*
- * cuse_info is populated and used to register the cuse device.
- * vhost_net_device_ops are also passed when the device is registered in app.
- */
-int
-rte_vhost_driver_register(const char *dev_name, uint64_t flags)
-{
-	struct cuse_info cuse_info;
-	char device_name[PATH_MAX] = "";
-	char char_device_name[PATH_MAX] = "";
-	const char *device_argv[] = { device_name };
-
-	char fuse_opt_dummy[] = FUSE_OPT_DUMMY;
-	char fuse_opt_fore[] = FUSE_OPT_FORE;
-	char fuse_opt_nomulti[] = FUSE_OPT_NOMULTI;
-	char *fuse_argv[] = {fuse_opt_dummy, fuse_opt_fore, fuse_opt_nomulti};
-
-	if (flags) {
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"vhost-cuse does not support any flags so far\n");
-		return -1;
-	}
-
-	if (access(cuse_device_name, R_OK | W_OK) < 0) {
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"char device %s can't be accessed, maybe not exist\n",
-			cuse_device_name);
-		return -1;
-	}
-
-	if (eventfd_init() < 0)
-		return -1;
-
-	/*
-	 * The device name is created. This is passed to QEMU so that it can
-	 * register the device with our application.
-	 */
-	snprintf(device_name, PATH_MAX, "DEVNAME=%s", dev_name);
-	snprintf(char_device_name, PATH_MAX, "/dev/%s", dev_name);
-
-	/* Check if device already exists. */
-	if (access(char_device_name, F_OK) != -1) {
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"char device %s already exists\n", char_device_name);
-		return -1;
-	}
-
-	memset(&cuse_info, 0, sizeof(cuse_info));
-	cuse_info.dev_major = default_major;
-	cuse_info.dev_minor = default_minor;
-	cuse_info.dev_info_argc = 1;
-	cuse_info.dev_info_argv = device_argv;
-	cuse_info.flags = CUSE_UNRESTRICTED_IOCTL;
-
-	session = cuse_lowlevel_setup(3, fuse_argv,
-			&cuse_info, &vhost_net_ops, 0, NULL);
-	if (session == NULL)
-		return -1;
-
-	return 0;
-}
-
-/**
- * An empty function for unregister
- */
-int
-rte_vhost_driver_unregister(const char *dev_name __rte_unused)
-{
-	return 0;
-}
-
-/**
- * The CUSE session is launched allowing the application to receive open,
- * release and ioctl calls.
- */
-int
-rte_vhost_driver_session_start(void)
-{
-	fuse_session_loop(session);
-
-	return 0;
-}
diff --git a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
deleted file mode 100644
index 552be7d4..00000000
--- a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
+++ /dev/null
@@ -1,433 +0,0 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <stdint.h>
-#include <dirent.h>
-#include <linux/vhost.h>
-#include <linux/virtio_net.h>
-#include <fuse/cuse_lowlevel.h>
-#include <stddef.h>
-#include <string.h>
-#include <stdlib.h>
-#include <sys/eventfd.h>
-#include <sys/mman.h>
-#include <sys/types.h>
-#include <unistd.h>
-#include <sys/ioctl.h>
-#include <sys/socket.h>
-#include <linux/if_tun.h>
-#include <linux/if.h>
-#include <errno.h>
-
-#include <rte_log.h>
-
-#include "rte_virtio_net.h"
-#include "vhost-net.h"
-#include "virtio-net-cdev.h"
-#include "eventfd_copy.h"
-
-/* Line size for reading maps file. */
-static const uint32_t BUFSIZE = PATH_MAX;
-
-/* Size of prot char array in procmap. */
-#define PROT_SZ 5
-
-/* Number of elements in procmap struct. */
-#define PROCMAP_SZ 8
-
-/* Structure containing information gathered from maps file. */
-struct procmap {
-	uint64_t va_start;	/* Start virtual address in file. */
-	uint64_t len;		/* Size of file. */
-	uint64_t pgoff;		/* Not used. */
-	uint32_t maj;		/* Not used. */
-	uint32_t min;		/* Not used. */
-	uint32_t ino;		/* Not used. */
-	char prot[PROT_SZ];	/* Not used. */
-	char fname[PATH_MAX];	/* File name. */
-};
-
-/*
- * Locate the file containing QEMU's memory space and
- * map it to our address space.
- */
-static int
-host_memory_map(pid_t pid, uint64_t addr,
-	uint64_t *mapped_address, uint64_t *mapped_size)
-{
-	struct dirent *dptr = NULL;
-	struct procmap procmap;
-	DIR *dp = NULL;
-	int fd;
-	int i;
-	char memfile[PATH_MAX];
-	char mapfile[PATH_MAX];
-	char procdir[PATH_MAX];
-	char resolved_path[PATH_MAX];
-	char *path = NULL;
-	FILE *fmap;
-	void *map;
-	uint8_t found = 0;
-	char line[BUFSIZE];
-	char dlm[] = "-   :   ";
-	char *str, *sp, *in[PROCMAP_SZ];
-	char *end = NULL;
-
-	/* Path where mem files are located. */
-	snprintf(procdir, PATH_MAX, "/proc/%u/fd/", pid);
-	/* Maps file used to locate mem file. */
-	snprintf(mapfile, PATH_MAX, "/proc/%u/maps", pid);
-
-	fmap = fopen(mapfile, "r");
-	if (fmap == NULL) {
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"Failed to open maps file for pid %d\n",
-			pid);
-		return -1;
-	}
-
-	/* Read through maps file until we find out base_address. */
-	while (fgets(line, BUFSIZE, fmap) != 0) {
-		str = line;
-		errno = 0;
-		/* Split line into fields. */
-		for (i = 0; i < PROCMAP_SZ; i++) {
-			in[i] = strtok_r(str, &dlm[i], &sp);
-			if ((in[i] == NULL) || (errno != 0)) {
-				fclose(fmap);
-				return -1;
-			}
-			str = NULL;
-		}
-
-		/* Convert/Copy each field as needed. */
-		procmap.va_start = strtoull(in[0], &end, 16);
-		if ((in[0] == '\0') || (end == NULL) || (*end != '\0') ||
-			(errno != 0)) {
-			fclose(fmap);
-			return -1;
-		}
-
-		procmap.len = strtoull(in[1], &end, 16);
-		if ((in[1] == '\0') || (end == NULL) || (*end != '\0') ||
-			(errno != 0)) {
-			fclose(fmap);
-			return -1;
-		}
-
-		procmap.pgoff = strtoull(in[3], &end, 16);
-		if ((in[3] == '\0') || (end == NULL) || (*end != '\0') ||
-			(errno != 0)) {
-			fclose(fmap);
-			return -1;
-		}
-
-		procmap.maj = strtoul(in[4], &end, 16);
-		if ((in[4] == '\0') || (end == NULL) || (*end != '\0') ||
-			(errno != 0)) {
-			fclose(fmap);
-			return -1;
-		}
-
-		procmap.min = strtoul(in[5], &end, 16);
-		if ((in[5] == '\0') || (end == NULL) || (*end != '\0') ||
-			(errno != 0)) {
-			fclose(fmap);
-			return -1;
-		}
-
-		procmap.ino = strtoul(in[6], &end, 16);
-		if ((in[6] == '\0') || (end == NULL) || (*end != '\0') ||
-			(errno != 0)) {
-			fclose(fmap);
-			return -1;
-		}
-
-		memcpy(&procmap.prot, in[2], PROT_SZ);
-		memcpy(&procmap.fname, in[7], PATH_MAX);
-
-		if (procmap.va_start == addr) {
-			procmap.len = procmap.len - procmap.va_start;
-			found = 1;
-			break;
-		}
-	}
-	fclose(fmap);
-
-	if (!found) {
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"Failed to find memory file in pid %d maps file\n",
-			pid);
-		return -1;
-	}
-
-	/* Find the guest memory file among the process fds. */
-	dp = opendir(procdir);
-	if (dp == NULL) {
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"Cannot open pid %d process directory\n",
-			pid);
-		return -1;
-	}
-
-	found = 0;
-
-	/* Read the fd directory contents. */
-	while (NULL != (dptr = readdir(dp))) {
-		snprintf(memfile, PATH_MAX, "/proc/%u/fd/%s",
-				pid, dptr->d_name);
-		path = realpath(memfile, resolved_path);
-		if ((path == NULL) && (strlen(resolved_path) == 0)) {
-			RTE_LOG(ERR, VHOST_CONFIG,
-				"Failed to resolve fd directory\n");
-			closedir(dp);
-			return -1;
-		}
-		if (strncmp(resolved_path, procmap.fname,
-			strnlen(procmap.fname, PATH_MAX)) == 0) {
-			found = 1;
-			break;
-		}
-	}
-
-	closedir(dp);
-
-	if (found == 0) {
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"Failed to find memory file for pid %d\n",
-			pid);
-		return -1;
-	}
-	/* Open the shared memory file and map the memory into this process. */
-	fd = open(memfile, O_RDWR);
-
-	if (fd == -1) {
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"Failed to open %s for pid %d\n",
-			memfile, pid);
-		return -1;
-	}
-
-	map = mmap(0, (size_t)procmap.len, PROT_READ|PROT_WRITE,
-			MAP_POPULATE|MAP_SHARED, fd, 0);
-	close(fd);
-
-	if (map == MAP_FAILED) {
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"Error mapping the file %s for pid %d\n",
-			memfile, pid);
-		return -1;
-	}
-
-	/* Store the memory address and size in the device data structure */
-	*mapped_address = (uint64_t)(uintptr_t)map;
-	*mapped_size = procmap.len;
-
-	LOG_DEBUG(VHOST_CONFIG,
-		"Mem File: %s->%s - Size: %llu - VA: %p\n",
-		memfile, resolved_path,
-		(unsigned long long)*mapped_size, map);
-
-	return 0;
-}
-
-int
-cuse_set_mem_table(struct vhost_cuse_device_ctx ctx,
-	const struct vhost_memory *mem_regions_addr, uint32_t nregions)
-{
-	uint64_t size = offsetof(struct vhost_memory, regions);
-	uint32_t idx, valid_regions;
-	struct virtio_memory_regions *pregion;
-	struct vhost_memory_region *mem_regions = (void *)(uintptr_t)
-		((uint64_t)(uintptr_t)mem_regions_addr + size);
-	uint64_t base_address = 0, mapped_address, mapped_size;
-	struct virtio_net *dev;
-
-	dev = get_device(ctx.vid);
-	if (dev == NULL)
-		return -1;
-
-	if (dev->mem && dev->mem->mapped_address) {
-		munmap((void *)(uintptr_t)dev->mem->mapped_address,
-			(size_t)dev->mem->mapped_size);
-		free(dev->mem);
-		dev->mem = NULL;
-	}
-
-	dev->mem = calloc(1, sizeof(struct virtio_memory) +
-		sizeof(struct virtio_memory_regions) * nregions);
-	if (dev->mem == NULL) {
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"(%d) failed to allocate memory for dev->mem\n",
-			dev->vid);
-		return -1;
-	}
-
-	pregion = &dev->mem->regions[0];
-
-	for (idx = 0; idx < nregions; idx++) {
-		pregion[idx].guest_phys_address =
-			mem_regions[idx].guest_phys_addr;
-		pregion[idx].guest_phys_address_end =
-			pregion[idx].guest_phys_address +
-			mem_regions[idx].memory_size;
-		pregion[idx].memory_size =
-			mem_regions[idx].memory_size;
-		pregion[idx].userspace_address =
-			mem_regions[idx].userspace_addr;
-
-		LOG_DEBUG(VHOST_CONFIG,
-			"REGION: %u - GPA: %p - QVA: %p - SIZE (%"PRIu64")\n",
-			idx,
-			(void *)(uintptr_t)pregion[idx].guest_phys_address,
-			(void *)(uintptr_t)pregion[idx].userspace_address,
-			pregion[idx].memory_size);
-
-		/*set the base address mapping*/
-		if (pregion[idx].guest_phys_address == 0x0) {
-			base_address =
-				pregion[idx].userspace_address;
-			/* Map VM memory file */
-			if (host_memory_map(ctx.pid, base_address,
-				&mapped_address, &mapped_size) != 0) {
-				free(dev->mem);
-				dev->mem = NULL;
-				return -1;
-			}
-			dev->mem->mapped_address = mapped_address;
-			dev->mem->base_address = base_address;
-			dev->mem->mapped_size = mapped_size;
-		}
-	}
-
-	/* Check that we have a valid base address. */
-	if (base_address == 0) {
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"Failed to find base address of qemu memory file.\n");
-		free(dev->mem);
-		dev->mem = NULL;
-		return -1;
-	}
-
-	valid_regions = nregions;
-	for (idx = 0; idx < nregions; idx++) {
-		if ((pregion[idx].userspace_address < base_address) ||
-			(pregion[idx].userspace_address >
-			(base_address + mapped_size)))
-			valid_regions--;
-	}
-
-
-	if (valid_regions != nregions) {
-		valid_regions = 0;
-		for (idx = nregions; 0 != idx--; ) {
-			if ((pregion[idx].userspace_address < base_address) ||
-			(pregion[idx].userspace_address >
-			(base_address + mapped_size))) {
-				memmove(&pregion[idx], &pregion[idx + 1],
-					sizeof(struct virtio_memory_regions) *
-					valid_regions);
-			} else
-				valid_regions++;
-		}
-	}
-
-	for (idx = 0; idx < valid_regions; idx++) {
-		pregion[idx].address_offset =
-			mapped_address - base_address +
-			pregion[idx].userspace_address -
-			pregion[idx].guest_phys_address;
-	}
-	dev->mem->nregions = valid_regions;
-
-	return 0;
-}
-
-/*
- * Function to get the tap device name from the provided file descriptor and
- * save it in the device structure.
- */
-static int
-get_ifname(int vid, int tap_fd, int pid)
-{
-	int fd_tap;
-	struct ifreq ifr;
-	uint32_t ifr_size;
-	int ret;
-
-	fd_tap = eventfd_copy(tap_fd, pid);
-	if (fd_tap < 0)
-		return -1;
-
-	ret = ioctl(fd_tap, TUNGETIFF, &ifr);
-
-	if (close(fd_tap) < 0)
-		RTE_LOG(ERR, VHOST_CONFIG, "(%d) fd close failed\n", vid);
-
-	if (ret >= 0) {
-		ifr_size = strnlen(ifr.ifr_name, sizeof(ifr.ifr_name));
-		vhost_set_ifname(vid, ifr.ifr_name, ifr_size);
-	} else
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"(%d) TUNGETIFF ioctl failed\n", vid);
-
-	return 0;
-}
-
-int
-cuse_set_backend(struct vhost_cuse_device_ctx ctx,
-		 struct vhost_vring_file *file)
-{
-	struct virtio_net *dev;
-
-	dev = get_device(ctx.vid);
-	if (dev == NULL)
-		return -1;
-
-	if (!(dev->flags & VIRTIO_DEV_RUNNING) && file->fd != VIRTIO_DEV_STOPPED)
-		get_ifname(ctx.vid, file->fd, ctx.pid);
-
-	return vhost_set_backend(ctx.vid, file);
-}
-
-void
-vhost_backend_cleanup(struct virtio_net *dev)
-{
-	/* Unmap QEMU memory file if mapped. */
-	if (dev->mem) {
-		munmap((void *)(uintptr_t)dev->mem->mapped_address,
-			(size_t)dev->mem->mapped_size);
-		free(dev->mem);
-		dev->mem = NULL;
-	}
-}
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
new file mode 100644
index 00000000..6b83c15f
--- /dev/null
+++ b/lib/librte_vhost/vhost_user.c
@@ -0,0 +1,1033 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <assert.h>
+#ifdef RTE_LIBRTE_VHOST_NUMA
+#include <numaif.h>
+#endif
+
+#include <rte_common.h>
+#include <rte_malloc.h>
+#include <rte_log.h>
+
+#include "vhost.h"
+#include "vhost_user.h"
+
+static const char *vhost_message_str[VHOST_USER_MAX] = {
+	[VHOST_USER_NONE] = "VHOST_USER_NONE",
+	[VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES",
+	[VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES",
+	[VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER",
+	[VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER",
+	[VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE",
+	[VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE",
+	[VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD",
+	[VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM",
+	[VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR",
+	[VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE",
+	[VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE",
+	[VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK",
+	[VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL",
+	[VHOST_USER_SET_VRING_ERR]  = "VHOST_USER_SET_VRING_ERR",
+	[VHOST_USER_GET_PROTOCOL_FEATURES]  = "VHOST_USER_GET_PROTOCOL_FEATURES",
+	[VHOST_USER_SET_PROTOCOL_FEATURES]  = "VHOST_USER_SET_PROTOCOL_FEATURES",
+	[VHOST_USER_GET_QUEUE_NUM]  = "VHOST_USER_GET_QUEUE_NUM",
+	[VHOST_USER_SET_VRING_ENABLE]  = "VHOST_USER_SET_VRING_ENABLE",
+	[VHOST_USER_SEND_RARP]  = "VHOST_USER_SEND_RARP",
+};
+
+static uint64_t
+get_blk_size(int fd)
+{
+	struct stat stat;
+	int ret;
+
+	ret = fstat(fd, &stat);
+	return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize;
+}
+
+static void
+free_mem_region(struct virtio_net *dev)
+{
+	uint32_t i;
+	struct virtio_memory_region *reg;
+
+	if (!dev || !dev->mem)
+		return;
+
+	for (i = 0; i < dev->mem->nregions; i++) {
+		reg = &dev->mem->regions[i];
+		if (reg->host_user_addr) {
+			munmap(reg->mmap_addr, reg->mmap_size);
+			close(reg->fd);
+		}
+	}
+}
+
+void
+vhost_backend_cleanup(struct virtio_net *dev)
+{
+	if (dev->mem) {
+		free_mem_region(dev);
+		rte_free(dev->mem);
+		dev->mem = NULL;
+	}
+	if (dev->log_addr) {
+		munmap((void *)(uintptr_t)dev->log_addr, dev->log_size);
+		dev->log_addr = 0;
+	}
+}
+
+/*
+ * This function just returns success at the moment unless
+ * the device hasn't been initialised.
+ */
+static int
+vhost_user_set_owner(void)
+{
+	return 0;
+}
+
+static int
+vhost_user_reset_owner(struct virtio_net *dev)
+{
+	if (dev->flags & VIRTIO_DEV_RUNNING) {
+		dev->flags &= ~VIRTIO_DEV_RUNNING;
+		notify_ops->destroy_device(dev->vid);
+	}
+
+	cleanup_device(dev, 0);
+	reset_device(dev);
+	return 0;
+}
+
+/*
+ * The features that we support are requested.
+ */
+static uint64_t
+vhost_user_get_features(void)
+{
+	return VHOST_FEATURES;
+}
+
+/*
+ * We receive the negotiated features supported by us and the virtio device.
+ */
+static int
+vhost_user_set_features(struct virtio_net *dev, uint64_t features)
+{
+	if (features & ~VHOST_FEATURES)
+		return -1;
+
+	dev->features = features;
+	if (dev->features &
+		((1 << VIRTIO_NET_F_MRG_RXBUF) | (1ULL << VIRTIO_F_VERSION_1))) {
+		dev->vhost_hlen = sizeof(struct virtio_net_hdr_mrg_rxbuf);
+	} else {
+		dev->vhost_hlen = sizeof(struct virtio_net_hdr);
+	}
+	LOG_DEBUG(VHOST_CONFIG,
+		"(%d) mergeable RX buffers %s, virtio 1 %s\n",
+		dev->vid,
+		(dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)) ? "on" : "off",
+		(dev->features & (1ULL << VIRTIO_F_VERSION_1)) ? "on" : "off");
+
+	return 0;
+}
+
+/*
+ * The virtio device sends us the size of the descriptor ring.
+ */
+static int
+vhost_user_set_vring_num(struct virtio_net *dev,
+			 struct vhost_vring_state *state)
+{
+	struct vhost_virtqueue *vq = dev->virtqueue[state->index];
+
+	vq->size = state->num;
+
+	if (dev->dequeue_zero_copy) {
+		vq->nr_zmbuf = 0;
+		vq->last_zmbuf_idx = 0;
+		vq->zmbuf_size = vq->size;
+		vq->zmbufs = rte_zmalloc(NULL, vq->zmbuf_size *
+					 sizeof(struct zcopy_mbuf), 0);
+		if (vq->zmbufs == NULL) {
+			RTE_LOG(WARNING, VHOST_CONFIG,
+				"failed to allocate mem for zero copy; "
+				"zero copy is force disabled\n");
+			dev->dequeue_zero_copy = 0;
+		}
+	}
+
+	vq->shadow_used_ring = rte_malloc(NULL,
+				vq->size * sizeof(struct vring_used_elem),
+				RTE_CACHE_LINE_SIZE);
+	if (!vq->shadow_used_ring) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"failed to allocate memory for shadow used ring.\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+/*
+ * Reallocate virtio_dev and vhost_virtqueue data structure to make them on the
+ * same numa node as the memory of vring descriptor.
+ */
+#ifdef RTE_LIBRTE_VHOST_NUMA
+static struct virtio_net*
+numa_realloc(struct virtio_net *dev, int index)
+{
+	int oldnode, newnode;
+	struct virtio_net *old_dev;
+	struct vhost_virtqueue *old_vq, *vq;
+	int ret;
+
+	/*
+	 * vq is allocated on pairs, we should try to do realloc
+	 * on first queue of one queue pair only.
+	 */
+	if (index % VIRTIO_QNUM != 0)
+		return dev;
+
+	old_dev = dev;
+	vq = old_vq = dev->virtqueue[index];
+
+	ret = get_mempolicy(&newnode, NULL, 0, old_vq->desc,
+			    MPOL_F_NODE | MPOL_F_ADDR);
+
+	/* check if we need to reallocate vq */
+	ret |= get_mempolicy(&oldnode, NULL, 0, old_vq,
+			     MPOL_F_NODE | MPOL_F_ADDR);
+	if (ret) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Unable to get vq numa information.\n");
+		return dev;
+	}
+	if (oldnode != newnode) {
+		RTE_LOG(INFO, VHOST_CONFIG,
+			"reallocate vq from %d to %d node\n", oldnode, newnode);
+		vq = rte_malloc_socket(NULL, sizeof(*vq) * VIRTIO_QNUM, 0,
+				       newnode);
+		if (!vq)
+			return dev;
+
+		memcpy(vq, old_vq, sizeof(*vq) * VIRTIO_QNUM);
+		rte_free(old_vq);
+	}
+
+	/* check if we need to reallocate dev */
+	ret = get_mempolicy(&oldnode, NULL, 0, old_dev,
+			    MPOL_F_NODE | MPOL_F_ADDR);
+	if (ret) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"Unable to get dev numa information.\n");
+		goto out;
+	}
+	if (oldnode != newnode) {
+		RTE_LOG(INFO, VHOST_CONFIG,
+			"reallocate dev from %d to %d node\n",
+			oldnode, newnode);
+		dev = rte_malloc_socket(NULL, sizeof(*dev), 0, newnode);
+		if (!dev) {
+			dev = old_dev;
+			goto out;
+		}
+
+		memcpy(dev, old_dev, sizeof(*dev));
+		rte_free(old_dev);
+	}
+
+out:
+	dev->virtqueue[index] = vq;
+	dev->virtqueue[index + 1] = vq + 1;
+	vhost_devices[dev->vid] = dev;
+
+	return dev;
+}
+#else
+static struct virtio_net*
+numa_realloc(struct virtio_net *dev, int index __rte_unused)
+{
+	return dev;
+}
+#endif
+
+/*
+ * Converts QEMU virtual address to Vhost virtual address. This function is
+ * used to convert the ring addresses to our address space.
+ */
+static uint64_t
+qva_to_vva(struct virtio_net *dev, uint64_t qva)
+{
+	struct virtio_memory_region *reg;
+	uint32_t i;
+
+	/* Find the region where the address lives. */
+	for (i = 0; i < dev->mem->nregions; i++) {
+		reg = &dev->mem->regions[i];
+
+		if (qva >= reg->guest_user_addr &&
+		    qva <  reg->guest_user_addr + reg->size) {
+			return qva - reg->guest_user_addr +
+			       reg->host_user_addr;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * The virtio device sends us the desc, used and avail ring addresses.
+ * This function then converts these to our address space.
+ */
+static int
+vhost_user_set_vring_addr(struct virtio_net *dev, struct vhost_vring_addr *addr)
+{
+	struct vhost_virtqueue *vq;
+
+	if (dev->mem == NULL)
+		return -1;
+
+	/* addr->index refers to the queue index. The txq 1, rxq is 0. */
+	vq = dev->virtqueue[addr->index];
+
+	/* The addresses are converted from QEMU virtual to Vhost virtual. */
+	vq->desc = (struct vring_desc *)(uintptr_t)qva_to_vva(dev,
+			addr->desc_user_addr);
+	if (vq->desc == 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"(%d) failed to find desc ring address.\n",
+			dev->vid);
+		return -1;
+	}
+
+	dev = numa_realloc(dev, addr->index);
+	vq = dev->virtqueue[addr->index];
+
+	vq->avail = (struct vring_avail *)(uintptr_t)qva_to_vva(dev,
+			addr->avail_user_addr);
+	if (vq->avail == 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"(%d) failed to find avail ring address.\n",
+			dev->vid);
+		return -1;
+	}
+
+	vq->used = (struct vring_used *)(uintptr_t)qva_to_vva(dev,
+			addr->used_user_addr);
+	if (vq->used == 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"(%d) failed to find used ring address.\n",
+			dev->vid);
+		return -1;
+	}
+
+	if (vq->last_used_idx != vq->used->idx) {
+		RTE_LOG(WARNING, VHOST_CONFIG,
+			"last_used_idx (%u) and vq->used->idx (%u) mismatches; "
+			"some packets maybe resent for Tx and dropped for Rx\n",
+			vq->last_used_idx, vq->used->idx);
+		vq->last_used_idx  = vq->used->idx;
+		vq->last_avail_idx = vq->used->idx;
+	}
+
+	vq->log_guest_addr = addr->log_guest_addr;
+
+	LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address desc: %p\n",
+			dev->vid, vq->desc);
+	LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address avail: %p\n",
+			dev->vid, vq->avail);
+	LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address used: %p\n",
+			dev->vid, vq->used);
+	LOG_DEBUG(VHOST_CONFIG, "(%d) log_guest_addr: %" PRIx64 "\n",
+			dev->vid, vq->log_guest_addr);
+
+	return 0;
+}
+
+/*
+ * The virtio device sends us the available ring last used index.
+ */
+static int
+vhost_user_set_vring_base(struct virtio_net *dev,
+			  struct vhost_vring_state *state)
+{
+	dev->virtqueue[state->index]->last_used_idx  = state->num;
+	dev->virtqueue[state->index]->last_avail_idx = state->num;
+
+	return 0;
+}
+
+static void
+add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
+		   uint64_t host_phys_addr, uint64_t size)
+{
+	struct guest_page *page, *last_page;
+
+	if (dev->nr_guest_pages == dev->max_guest_pages) {
+		dev->max_guest_pages *= 2;
+		dev->guest_pages = realloc(dev->guest_pages,
+					dev->max_guest_pages * sizeof(*page));
+	}
+
+	if (dev->nr_guest_pages > 0) {
+		last_page = &dev->guest_pages[dev->nr_guest_pages - 1];
+		/* merge if the two pages are continuous */
+		if (host_phys_addr == last_page->host_phys_addr +
+				      last_page->size) {
+			last_page->size += size;
+			return;
+		}
+	}
+
+	page = &dev->guest_pages[dev->nr_guest_pages++];
+	page->guest_phys_addr = guest_phys_addr;
+	page->host_phys_addr  = host_phys_addr;
+	page->size = size;
+}
+
+static void
+add_guest_pages(struct virtio_net *dev, struct virtio_memory_region *reg,
+		uint64_t page_size)
+{
+	uint64_t reg_size = reg->size;
+	uint64_t host_user_addr  = reg->host_user_addr;
+	uint64_t guest_phys_addr = reg->guest_phys_addr;
+	uint64_t host_phys_addr;
+	uint64_t size;
+
+	host_phys_addr = rte_mem_virt2phy((void *)(uintptr_t)host_user_addr);
+	size = page_size - (guest_phys_addr & (page_size - 1));
+	size = RTE_MIN(size, reg_size);
+
+	add_one_guest_page(dev, guest_phys_addr, host_phys_addr, size);
+	host_user_addr  += size;
+	guest_phys_addr += size;
+	reg_size -= size;
+
+	while (reg_size > 0) {
+		host_phys_addr = rte_mem_virt2phy((void *)(uintptr_t)
+						  host_user_addr);
+		add_one_guest_page(dev, guest_phys_addr, host_phys_addr,
+				   page_size);
+
+		host_user_addr  += page_size;
+		guest_phys_addr += page_size;
+		reg_size -= page_size;
+	}
+}
+
+#ifdef RTE_LIBRTE_VHOST_DEBUG
+/* TODO: enable it only in debug mode? */
+static void
+dump_guest_pages(struct virtio_net *dev)
+{
+	uint32_t i;
+	struct guest_page *page;
+
+	for (i = 0; i < dev->nr_guest_pages; i++) {
+		page = &dev->guest_pages[i];
+
+		RTE_LOG(INFO, VHOST_CONFIG,
+			"guest physical page region %u\n"
+			"\t guest_phys_addr: %" PRIx64 "\n"
+			"\t host_phys_addr : %" PRIx64 "\n"
+			"\t size           : %" PRIx64 "\n",
+			i,
+			page->guest_phys_addr,
+			page->host_phys_addr,
+			page->size);
+	}
+}
+#else
+#define dump_guest_pages(dev)
+#endif
+
+static int
+vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg)
+{
+	struct VhostUserMemory memory = pmsg->payload.memory;
+	struct virtio_memory_region *reg;
+	void *mmap_addr;
+	uint64_t mmap_size;
+	uint64_t mmap_offset;
+	uint64_t alignment;
+	uint32_t i;
+	int fd;
+
+	/* Remove from the data plane. */
+	if (dev->flags & VIRTIO_DEV_RUNNING) {
+		dev->flags &= ~VIRTIO_DEV_RUNNING;
+		notify_ops->destroy_device(dev->vid);
+	}
+
+	if (dev->mem) {
+		free_mem_region(dev);
+		rte_free(dev->mem);
+		dev->mem = NULL;
+	}
+
+	dev->nr_guest_pages = 0;
+	if (!dev->guest_pages) {
+		dev->max_guest_pages = 8;
+		dev->guest_pages = malloc(dev->max_guest_pages *
+						sizeof(struct guest_page));
+	}
+
+	dev->mem = rte_zmalloc("vhost-mem-table", sizeof(struct virtio_memory) +
+		sizeof(struct virtio_memory_region) * memory.nregions, 0);
+	if (dev->mem == NULL) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"(%d) failed to allocate memory for dev->mem\n",
+			dev->vid);
+		return -1;
+	}
+	dev->mem->nregions = memory.nregions;
+
+	for (i = 0; i < memory.nregions; i++) {
+		fd  = pmsg->fds[i];
+		reg = &dev->mem->regions[i];
+
+		reg->guest_phys_addr = memory.regions[i].guest_phys_addr;
+		reg->guest_user_addr = memory.regions[i].userspace_addr;
+		reg->size            = memory.regions[i].memory_size;
+		reg->fd              = fd;
+
+		mmap_offset = memory.regions[i].mmap_offset;
+		mmap_size   = reg->size + mmap_offset;
+
+		/* mmap() without flag of MAP_ANONYMOUS, should be called
+		 * with length argument aligned with hugepagesz at older
+		 * longterm version Linux, like 2.6.32 and 3.2.72, or
+		 * mmap() will fail with EINVAL.
+		 *
+		 * to avoid failure, make sure in caller to keep length
+		 * aligned.
+		 */
+		alignment = get_blk_size(fd);
+		if (alignment == (uint64_t)-1) {
+			RTE_LOG(ERR, VHOST_CONFIG,
+				"couldn't get hugepage size through fstat\n");
+			goto err_mmap;
+		}
+		mmap_size = RTE_ALIGN_CEIL(mmap_size, alignment);
+
+		mmap_addr = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
+				 MAP_SHARED | MAP_POPULATE, fd, 0);
+
+		if (mmap_addr == MAP_FAILED) {
+			RTE_LOG(ERR, VHOST_CONFIG,
+				"mmap region %u failed.\n", i);
+			goto err_mmap;
+		}
+
+		reg->mmap_addr = mmap_addr;
+		reg->mmap_size = mmap_size;
+		reg->host_user_addr = (uint64_t)(uintptr_t)mmap_addr +
+				      mmap_offset;
+
+		add_guest_pages(dev, reg, alignment);
+
+		RTE_LOG(INFO, VHOST_CONFIG,
+			"guest memory region %u, size: 0x%" PRIx64 "\n"
+			"\t guest physical addr: 0x%" PRIx64 "\n"
+			"\t guest virtual  addr: 0x%" PRIx64 "\n"
+			"\t host  virtual  addr: 0x%" PRIx64 "\n"
+			"\t mmap addr : 0x%" PRIx64 "\n"
+			"\t mmap size : 0x%" PRIx64 "\n"
+			"\t mmap align: 0x%" PRIx64 "\n"
+			"\t mmap off  : 0x%" PRIx64 "\n",
+			i, reg->size,
+			reg->guest_phys_addr,
+			reg->guest_user_addr,
+			reg->host_user_addr,
+			(uint64_t)(uintptr_t)mmap_addr,
+			mmap_size,
+			alignment,
+			mmap_offset);
+	}
+
+	dump_guest_pages(dev);
+
+	return 0;
+
+err_mmap:
+	free_mem_region(dev);
+	rte_free(dev->mem);
+	dev->mem = NULL;
+	return -1;
+}
+
+static int
+vq_is_ready(struct vhost_virtqueue *vq)
+{
+	return vq && vq->desc   &&
+	       vq->kickfd != VIRTIO_UNINITIALIZED_EVENTFD &&
+	       vq->callfd != VIRTIO_UNINITIALIZED_EVENTFD;
+}
+
+static int
+virtio_is_ready(struct virtio_net *dev)
+{
+	struct vhost_virtqueue *rvq, *tvq;
+	uint32_t i;
+
+	for (i = 0; i < dev->virt_qp_nb; i++) {
+		rvq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_RXQ];
+		tvq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_TXQ];
+
+		if (!vq_is_ready(rvq) || !vq_is_ready(tvq)) {
+			RTE_LOG(INFO, VHOST_CONFIG,
+				"virtio is not ready for processing.\n");
+			return 0;
+		}
+	}
+
+	RTE_LOG(INFO, VHOST_CONFIG,
+		"virtio is now ready for processing.\n");
+	return 1;
+}
+
+static void
+vhost_user_set_vring_call(struct virtio_net *dev, struct VhostUserMsg *pmsg)
+{
+	struct vhost_vring_file file;
+	struct vhost_virtqueue *vq;
+	uint32_t cur_qp_idx;
+
+	file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+	if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
+		file.fd = VIRTIO_INVALID_EVENTFD;
+	else
+		file.fd = pmsg->fds[0];
+	RTE_LOG(INFO, VHOST_CONFIG,
+		"vring call idx:%d file:%d\n", file.index, file.fd);
+
+	/*
+	 * FIXME: VHOST_SET_VRING_CALL is the first per-vring message
+	 * we get, so we do vring queue pair allocation here.
+	 */
+	cur_qp_idx = file.index / VIRTIO_QNUM;
+	if (cur_qp_idx + 1 > dev->virt_qp_nb) {
+		if (alloc_vring_queue_pair(dev, cur_qp_idx) < 0)
+			return;
+	}
+
+	vq = dev->virtqueue[file.index];
+	assert(vq != NULL);
+
+	if (vq->callfd >= 0)
+		close(vq->callfd);
+
+	vq->callfd = file.fd;
+}
+
+/*
+ *  In vhost-user, when we receive kick message, will test whether virtio
+ *  device is ready for packet processing.
+ */
+static void
+vhost_user_set_vring_kick(struct virtio_net *dev, struct VhostUserMsg *pmsg)
+{
+	struct vhost_vring_file file;
+	struct vhost_virtqueue *vq;
+
+	file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+	if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
+		file.fd = VIRTIO_INVALID_EVENTFD;
+	else
+		file.fd = pmsg->fds[0];
+	RTE_LOG(INFO, VHOST_CONFIG,
+		"vring kick idx:%d file:%d\n", file.index, file.fd);
+
+	vq = dev->virtqueue[file.index];
+	if (vq->kickfd >= 0)
+		close(vq->kickfd);
+	vq->kickfd = file.fd;
+
+	if (virtio_is_ready(dev) && !(dev->flags & VIRTIO_DEV_RUNNING)) {
+		if (dev->dequeue_zero_copy) {
+			RTE_LOG(INFO, VHOST_CONFIG,
+				"dequeue zero copy is enabled\n");
+		}
+
+		if (notify_ops->new_device(dev->vid) == 0)
+			dev->flags |= VIRTIO_DEV_RUNNING;
+	}
+}
+
+static void
+free_zmbufs(struct vhost_virtqueue *vq)
+{
+	struct zcopy_mbuf *zmbuf, *next;
+
+	for (zmbuf = TAILQ_FIRST(&vq->zmbuf_list);
+	     zmbuf != NULL; zmbuf = next) {
+		next = TAILQ_NEXT(zmbuf, next);
+
+		rte_pktmbuf_free(zmbuf->mbuf);
+		TAILQ_REMOVE(&vq->zmbuf_list, zmbuf, next);
+	}
+
+	rte_free(vq->zmbufs);
+}
+
+/*
+ * when virtio is stopped, qemu will send us the GET_VRING_BASE message.
+ */
+static int
+vhost_user_get_vring_base(struct virtio_net *dev,
+			  struct vhost_vring_state *state)
+{
+	struct vhost_virtqueue *vq = dev->virtqueue[state->index];
+
+	/* We have to stop the queue (virtio) if it is running. */
+	if (dev->flags & VIRTIO_DEV_RUNNING) {
+		dev->flags &= ~VIRTIO_DEV_RUNNING;
+		notify_ops->destroy_device(dev->vid);
+	}
+
+	/* Here we are safe to get the last used index */
+	state->num = vq->last_used_idx;
+
+	RTE_LOG(INFO, VHOST_CONFIG,
+		"vring base idx:%d file:%d\n", state->index, state->num);
+	/*
+	 * Based on current qemu vhost-user implementation, this message is
+	 * sent and only sent in vhost_vring_stop.
+	 * TODO: cleanup the vring, it isn't usable since here.
+	 */
+	if (vq->kickfd >= 0)
+		close(vq->kickfd);
+
+	vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
+
+	if (dev->dequeue_zero_copy)
+		free_zmbufs(vq);
+	rte_free(vq->shadow_used_ring);
+	vq->shadow_used_ring = NULL;
+
+	return 0;
+}
+
+/*
+ * when virtio queues are ready to work, qemu will send us to
+ * enable the virtio queue pair.
+ */
+static int
+vhost_user_set_vring_enable(struct virtio_net *dev,
+			    struct vhost_vring_state *state)
+{
+	int enable = (int)state->num;
+
+	RTE_LOG(INFO, VHOST_CONFIG,
+		"set queue enable: %d to qp idx: %d\n",
+		enable, state->index);
+
+	if (notify_ops->vring_state_changed)
+		notify_ops->vring_state_changed(dev->vid, state->index, enable);
+
+	dev->virtqueue[state->index]->enabled = enable;
+
+	return 0;
+}
+
+static void
+vhost_user_set_protocol_features(struct virtio_net *dev,
+				 uint64_t protocol_features)
+{
+	if (protocol_features & ~VHOST_USER_PROTOCOL_FEATURES)
+		return;
+
+	dev->protocol_features = protocol_features;
+}
+
+static int
+vhost_user_set_log_base(struct virtio_net *dev, struct VhostUserMsg *msg)
+{
+	int fd = msg->fds[0];
+	uint64_t size, off;
+	void *addr;
+
+	if (fd < 0) {
+		RTE_LOG(ERR, VHOST_CONFIG, "invalid log fd: %d\n", fd);
+		return -1;
+	}
+
+	if (msg->size != sizeof(VhostUserLog)) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"invalid log base msg size: %"PRId32" != %d\n",
+			msg->size, (int)sizeof(VhostUserLog));
+		return -1;
+	}
+
+	size = msg->payload.log.mmap_size;
+	off  = msg->payload.log.mmap_offset;
+	RTE_LOG(INFO, VHOST_CONFIG,
+		"log mmap size: %"PRId64", offset: %"PRId64"\n",
+		size, off);
+
+	/*
+	 * mmap from 0 to workaround a hugepage mmap bug: mmap will
+	 * fail when offset is not page size aligned.
+	 */
+	addr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+	close(fd);
+	if (addr == MAP_FAILED) {
+		RTE_LOG(ERR, VHOST_CONFIG, "mmap log base failed!\n");
+		return -1;
+	}
+
+	/*
+	 * Free previously mapped log memory on occasionally
+	 * multiple VHOST_USER_SET_LOG_BASE.
+	 */
+	if (dev->log_addr) {
+		munmap((void *)(uintptr_t)dev->log_addr, dev->log_size);
+	}
+	dev->log_addr = (uint64_t)(uintptr_t)addr;
+	dev->log_base = dev->log_addr + off;
+	dev->log_size = size;
+
+	return 0;
+}
+
+/*
+ * An rarp packet is constructed and broadcasted to notify switches about
+ * the new location of the migrated VM, so that packets from outside will
+ * not be lost after migration.
+ *
+ * However, we don't actually "send" a rarp packet here, instead, we set
+ * a flag 'broadcast_rarp' to let rte_vhost_dequeue_burst() inject it.
+ */
+static int
+vhost_user_send_rarp(struct virtio_net *dev, struct VhostUserMsg *msg)
+{
+	uint8_t *mac = (uint8_t *)&msg->payload.u64;
+
+	RTE_LOG(DEBUG, VHOST_CONFIG,
+		":: mac: %02x:%02x:%02x:%02x:%02x:%02x\n",
+		mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);
+	memcpy(dev->mac.addr_bytes, mac, 6);
+
+	/*
+	 * Set the flag to inject a RARP broadcast packet at
+	 * rte_vhost_dequeue_burst().
+	 *
+	 * rte_smp_wmb() is for making sure the mac is copied
+	 * before the flag is set.
+	 */
+	rte_smp_wmb();
+	rte_atomic16_set(&dev->broadcast_rarp, 1);
+
+	return 0;
+}
+
+/* return bytes# of read on success or negative val on failure. */
+static int
+read_vhost_message(int sockfd, struct VhostUserMsg *msg)
+{
+	int ret;
+
+	ret = read_fd_message(sockfd, (char *)msg, VHOST_USER_HDR_SIZE,
+		msg->fds, VHOST_MEMORY_MAX_NREGIONS);
+	if (ret <= 0)
+		return ret;
+
+	if (msg && msg->size) {
+		if (msg->size > sizeof(msg->payload)) {
+			RTE_LOG(ERR, VHOST_CONFIG,
+				"invalid msg size: %d\n", msg->size);
+			return -1;
+		}
+		ret = read(sockfd, &msg->payload, msg->size);
+		if (ret <= 0)
+			return ret;
+		if (ret != (int)msg->size) {
+			RTE_LOG(ERR, VHOST_CONFIG,
+				"read control message failed\n");
+			return -1;
+		}
+	}
+
+	return ret;
+}
+
+static int
+send_vhost_message(int sockfd, struct VhostUserMsg *msg)
+{
+	int ret;
+
+	if (!msg)
+		return 0;
+
+	msg->flags &= ~VHOST_USER_VERSION_MASK;
+	msg->flags |= VHOST_USER_VERSION;
+	msg->flags |= VHOST_USER_REPLY_MASK;
+
+	ret = send_fd_message(sockfd, (char *)msg,
+		VHOST_USER_HDR_SIZE + msg->size, NULL, 0);
+
+	return ret;
+}
+
+int
+vhost_user_msg_handler(int vid, int fd)
+{
+	struct virtio_net *dev;
+	struct VhostUserMsg msg;
+	int ret;
+
+	dev = get_device(vid);
+	if (dev == NULL)
+		return -1;
+
+	ret = read_vhost_message(fd, &msg);
+	if (ret <= 0 || msg.request >= VHOST_USER_MAX) {
+		if (ret < 0)
+			RTE_LOG(ERR, VHOST_CONFIG,
+				"vhost read message failed\n");
+		else if (ret == 0)
+			RTE_LOG(INFO, VHOST_CONFIG,
+				"vhost peer closed\n");
+		else
+			RTE_LOG(ERR, VHOST_CONFIG,
+				"vhost read incorrect message\n");
+
+		return -1;
+	}
+
+	RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n",
+		vhost_message_str[msg.request]);
+	switch (msg.request) {
+	case VHOST_USER_GET_FEATURES:
+		msg.payload.u64 = vhost_user_get_features();
+		msg.size = sizeof(msg.payload.u64);
+		send_vhost_message(fd, &msg);
+		break;
+	case VHOST_USER_SET_FEATURES:
+		vhost_user_set_features(dev, msg.payload.u64);
+		break;
+
+	case VHOST_USER_GET_PROTOCOL_FEATURES:
+		msg.payload.u64 = VHOST_USER_PROTOCOL_FEATURES;
+		msg.size = sizeof(msg.payload.u64);
+		send_vhost_message(fd, &msg);
+		break;
+	case VHOST_USER_SET_PROTOCOL_FEATURES:
+		vhost_user_set_protocol_features(dev, msg.payload.u64);
+		break;
+
+	case VHOST_USER_SET_OWNER:
+		vhost_user_set_owner();
+		break;
+	case VHOST_USER_RESET_OWNER:
+		vhost_user_reset_owner(dev);
+		break;
+
+	case VHOST_USER_SET_MEM_TABLE:
+		vhost_user_set_mem_table(dev, &msg);
+		break;
+
+	case VHOST_USER_SET_LOG_BASE:
+		vhost_user_set_log_base(dev, &msg);
+
+		/* it needs a reply */
+		msg.size = sizeof(msg.payload.u64);
+		send_vhost_message(fd, &msg);
+		break;
+	case VHOST_USER_SET_LOG_FD:
+		close(msg.fds[0]);
+		RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n");
+		break;
+
+	case VHOST_USER_SET_VRING_NUM:
+		vhost_user_set_vring_num(dev, &msg.payload.state);
+		break;
+	case VHOST_USER_SET_VRING_ADDR:
+		vhost_user_set_vring_addr(dev, &msg.payload.addr);
+		break;
+	case VHOST_USER_SET_VRING_BASE:
+		vhost_user_set_vring_base(dev, &msg.payload.state);
+		break;
+
+	case VHOST_USER_GET_VRING_BASE:
+		ret = vhost_user_get_vring_base(dev, &msg.payload.state);
+		msg.size = sizeof(msg.payload.state);
+		send_vhost_message(fd, &msg);
+		break;
+
+	case VHOST_USER_SET_VRING_KICK:
+		vhost_user_set_vring_kick(dev, &msg);
+		break;
+	case VHOST_USER_SET_VRING_CALL:
+		vhost_user_set_vring_call(dev, &msg);
+		break;
+
+	case VHOST_USER_SET_VRING_ERR:
+		if (!(msg.payload.u64 & VHOST_USER_VRING_NOFD_MASK))
+			close(msg.fds[0]);
+		RTE_LOG(INFO, VHOST_CONFIG, "not implemented\n");
+		break;
+
+	case VHOST_USER_GET_QUEUE_NUM:
+		msg.payload.u64 = VHOST_MAX_QUEUE_PAIRS;
+		msg.size = sizeof(msg.payload.u64);
+		send_vhost_message(fd, &msg);
+		break;
+
+	case VHOST_USER_SET_VRING_ENABLE:
+		vhost_user_set_vring_enable(dev, &msg.payload.state);
+		break;
+	case VHOST_USER_SEND_RARP:
+		vhost_user_send_rarp(dev, &msg);
+		break;
+
+	default:
+		break;
+
+	}
+
+	return 0;
+}
diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.h b/lib/librte_vhost/vhost_user.h
index f5332396..ba78d326 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.h
+++ b/lib/librte_vhost/vhost_user.h
@@ -43,6 +43,14 @@
 
 #define VHOST_MEMORY_MAX_NREGIONS 8
 
+#define VHOST_USER_PROTOCOL_F_MQ	0
+#define VHOST_USER_PROTOCOL_F_LOG_SHMFD	1
+#define VHOST_USER_PROTOCOL_F_RARP	2
+
+#define VHOST_USER_PROTOCOL_FEATURES	((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \
+					 (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |\
+					 (1ULL << VHOST_USER_PROTOCOL_F_RARP))
+
 typedef enum VhostUserRequest {
 	VHOST_USER_NONE = 0,
 	VHOST_USER_GET_FEATURES = 1,
@@ -109,5 +117,12 @@ typedef struct VhostUserMsg {
 /* The version of the protocol we support */
 #define VHOST_USER_VERSION    0x1
 
-/*****************************************************************************/
+
+/* vhost_user.c */
+int vhost_user_msg_handler(int vid, int fd);
+
+/* socket.c */
+int read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num);
+int send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num);
+
 #endif
diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c b/lib/librte_vhost/vhost_user/virtio-net-user.c
deleted file mode 100644
index e7c43479..00000000
--- a/lib/librte_vhost/vhost_user/virtio-net-user.c
+++ /dev/null
@@ -1,470 +0,0 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <sys/mman.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include <rte_common.h>
-#include <rte_log.h>
-
-#include "virtio-net-user.h"
-#include "vhost-net-user.h"
-#include "vhost-net.h"
-
-struct orig_region_map {
-	int fd;
-	uint64_t mapped_address;
-	uint64_t mapped_size;
-	uint64_t blksz;
-};
-
-#define orig_region(ptr, nregions) \
-	((struct orig_region_map *)RTE_PTR_ADD((ptr), \
-		sizeof(struct virtio_memory) + \
-		sizeof(struct virtio_memory_regions) * (nregions)))
-
-static uint64_t
-get_blk_size(int fd)
-{
-	struct stat stat;
-	int ret;
-
-	ret = fstat(fd, &stat);
-	return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize;
-}
-
-static void
-free_mem_region(struct virtio_net *dev)
-{
-	struct orig_region_map *region;
-	unsigned int idx;
-
-	if (!dev || !dev->mem)
-		return;
-
-	region = orig_region(dev->mem, dev->mem->nregions);
-	for (idx = 0; idx < dev->mem->nregions; idx++) {
-		if (region[idx].mapped_address) {
-			munmap((void *)(uintptr_t)region[idx].mapped_address,
-					region[idx].mapped_size);
-			close(region[idx].fd);
-		}
-	}
-}
-
-void
-vhost_backend_cleanup(struct virtio_net *dev)
-{
-	if (dev->mem) {
-		free_mem_region(dev);
-		free(dev->mem);
-		dev->mem = NULL;
-	}
-	if (dev->log_addr) {
-		munmap((void *)(uintptr_t)dev->log_addr, dev->log_size);
-		dev->log_addr = 0;
-	}
-}
-
-int
-user_set_mem_table(int vid, struct VhostUserMsg *pmsg)
-{
-	struct VhostUserMemory memory = pmsg->payload.memory;
-	struct virtio_memory_regions *pregion;
-	uint64_t mapped_address, mapped_size;
-	struct virtio_net *dev;
-	unsigned int idx = 0;
-	struct orig_region_map *pregion_orig;
-	uint64_t alignment;
-
-	/* unmap old memory regions one by one*/
-	dev = get_device(vid);
-	if (dev == NULL)
-		return -1;
-
-	/* Remove from the data plane. */
-	if (dev->flags & VIRTIO_DEV_RUNNING) {
-		dev->flags &= ~VIRTIO_DEV_RUNNING;
-		notify_ops->destroy_device(vid);
-	}
-
-	if (dev->mem) {
-		free_mem_region(dev);
-		free(dev->mem);
-		dev->mem = NULL;
-	}
-
-	dev->mem = calloc(1,
-		sizeof(struct virtio_memory) +
-		sizeof(struct virtio_memory_regions) * memory.nregions +
-		sizeof(struct orig_region_map) * memory.nregions);
-	if (dev->mem == NULL) {
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"(%d) failed to allocate memory for dev->mem\n",
-			dev->vid);
-		return -1;
-	}
-	dev->mem->nregions = memory.nregions;
-
-	pregion_orig = orig_region(dev->mem, memory.nregions);
-	for (idx = 0; idx < memory.nregions; idx++) {
-		pregion = &dev->mem->regions[idx];
-		pregion->guest_phys_address =
-			memory.regions[idx].guest_phys_addr;
-		pregion->guest_phys_address_end =
-			memory.regions[idx].guest_phys_addr +
-			memory.regions[idx].memory_size;
-		pregion->memory_size =
-			memory.regions[idx].memory_size;
-		pregion->userspace_address =
-			memory.regions[idx].userspace_addr;
-
-		/* This is ugly */
-		mapped_size = memory.regions[idx].memory_size +
-			memory.regions[idx].mmap_offset;
-
-		/* mmap() without flag of MAP_ANONYMOUS, should be called
-		 * with length argument aligned with hugepagesz at older
-		 * longterm version Linux, like 2.6.32 and 3.2.72, or
-		 * mmap() will fail with EINVAL.
-		 *
-		 * to avoid failure, make sure in caller to keep length
-		 * aligned.
-		 */
-		alignment = get_blk_size(pmsg->fds[idx]);
-		if (alignment == (uint64_t)-1) {
-			RTE_LOG(ERR, VHOST_CONFIG,
-				"couldn't get hugepage size through fstat\n");
-			goto err_mmap;
-		}
-		mapped_size = RTE_ALIGN_CEIL(mapped_size, alignment);
-
-		mapped_address = (uint64_t)(uintptr_t)mmap(NULL,
-			mapped_size,
-			PROT_READ | PROT_WRITE, MAP_SHARED,
-			pmsg->fds[idx],
-			0);
-
-		RTE_LOG(INFO, VHOST_CONFIG,
-			"mapped region %d fd:%d to:%p sz:0x%"PRIx64" "
-			"off:0x%"PRIx64" align:0x%"PRIx64"\n",
-			idx, pmsg->fds[idx], (void *)(uintptr_t)mapped_address,
-			mapped_size, memory.regions[idx].mmap_offset,
-			alignment);
-
-		if (mapped_address == (uint64_t)(uintptr_t)MAP_FAILED) {
-			RTE_LOG(ERR, VHOST_CONFIG,
-				"mmap qemu guest failed.\n");
-			goto err_mmap;
-		}
-
-		pregion_orig[idx].mapped_address = mapped_address;
-		pregion_orig[idx].mapped_size = mapped_size;
-		pregion_orig[idx].blksz = alignment;
-		pregion_orig[idx].fd = pmsg->fds[idx];
-
-		mapped_address +=  memory.regions[idx].mmap_offset;
-
-		pregion->address_offset = mapped_address -
-			pregion->guest_phys_address;
-
-		if (memory.regions[idx].guest_phys_addr == 0) {
-			dev->mem->base_address =
-				memory.regions[idx].userspace_addr;
-			dev->mem->mapped_address =
-				pregion->address_offset;
-		}
-
-		LOG_DEBUG(VHOST_CONFIG,
-			"REGION: %u GPA: %p QEMU VA: %p SIZE (%"PRIu64")\n",
-			idx,
-			(void *)(uintptr_t)pregion->guest_phys_address,
-			(void *)(uintptr_t)pregion->userspace_address,
-			 pregion->memory_size);
-	}
-
-	return 0;
-
-err_mmap:
-	while (idx--) {
-		munmap((void *)(uintptr_t)pregion_orig[idx].mapped_address,
-				pregion_orig[idx].mapped_size);
-		close(pregion_orig[idx].fd);
-	}
-	free(dev->mem);
-	dev->mem = NULL;
-	return -1;
-}
-
-static int
-vq_is_ready(struct vhost_virtqueue *vq)
-{
-	return vq && vq->desc   &&
-	       vq->kickfd != VIRTIO_UNINITIALIZED_EVENTFD &&
-	       vq->callfd != VIRTIO_UNINITIALIZED_EVENTFD;
-}
-
-static int
-virtio_is_ready(struct virtio_net *dev)
-{
-	struct vhost_virtqueue *rvq, *tvq;
-	uint32_t i;
-
-	for (i = 0; i < dev->virt_qp_nb; i++) {
-		rvq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_RXQ];
-		tvq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_TXQ];
-
-		if (!vq_is_ready(rvq) || !vq_is_ready(tvq)) {
-			RTE_LOG(INFO, VHOST_CONFIG,
-				"virtio is not ready for processing.\n");
-			return 0;
-		}
-	}
-
-	RTE_LOG(INFO, VHOST_CONFIG,
-		"virtio is now ready for processing.\n");
-	return 1;
-}
-
-void
-user_set_vring_call(int vid, struct VhostUserMsg *pmsg)
-{
-	struct vhost_vring_file file;
-
-	file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
-	if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
-		file.fd = VIRTIO_INVALID_EVENTFD;
-	else
-		file.fd = pmsg->fds[0];
-	RTE_LOG(INFO, VHOST_CONFIG,
-		"vring call idx:%d file:%d\n", file.index, file.fd);
-	vhost_set_vring_call(vid, &file);
-}
-
-
-/*
- *  In vhost-user, when we receive kick message, will test whether virtio
- *  device is ready for packet processing.
- */
-void
-user_set_vring_kick(int vid, struct VhostUserMsg *pmsg)
-{
-	struct vhost_vring_file file;
-	struct virtio_net *dev = get_device(vid);
-
-	if (!dev)
-		return;
-
-	file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
-	if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
-		file.fd = VIRTIO_INVALID_EVENTFD;
-	else
-		file.fd = pmsg->fds[0];
-	RTE_LOG(INFO, VHOST_CONFIG,
-		"vring kick idx:%d file:%d\n", file.index, file.fd);
-	vhost_set_vring_kick(vid, &file);
-
-	if (virtio_is_ready(dev) && !(dev->flags & VIRTIO_DEV_RUNNING)) {
-		if (notify_ops->new_device(vid) == 0)
-			dev->flags |= VIRTIO_DEV_RUNNING;
-	}
-}
-
-/*
- * when virtio is stopped, qemu will send us the GET_VRING_BASE message.
- */
-int
-user_get_vring_base(int vid, struct vhost_vring_state *state)
-{
-	struct virtio_net *dev = get_device(vid);
-
-	if (dev == NULL)
-		return -1;
-	/* We have to stop the queue (virtio) if it is running. */
-	if (dev->flags & VIRTIO_DEV_RUNNING) {
-		dev->flags &= ~VIRTIO_DEV_RUNNING;
-		notify_ops->destroy_device(vid);
-	}
-
-	/* Here we are safe to get the last used index */
-	vhost_get_vring_base(vid, state->index, state);
-
-	RTE_LOG(INFO, VHOST_CONFIG,
-		"vring base idx:%d file:%d\n", state->index, state->num);
-	/*
-	 * Based on current qemu vhost-user implementation, this message is
-	 * sent and only sent in vhost_vring_stop.
-	 * TODO: cleanup the vring, it isn't usable since here.
-	 */
-	if (dev->virtqueue[state->index]->kickfd >= 0)
-		close(dev->virtqueue[state->index]->kickfd);
-
-	dev->virtqueue[state->index]->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
-
-	return 0;
-}
-
-/*
- * when virtio queues are ready to work, qemu will send us to
- * enable the virtio queue pair.
- */
-int
-user_set_vring_enable(int vid, struct vhost_vring_state *state)
-{
-	struct virtio_net *dev;
-	int enable = (int)state->num;
-
-	dev = get_device(vid);
-	if (dev == NULL)
-		return -1;
-
-	RTE_LOG(INFO, VHOST_CONFIG,
-		"set queue enable: %d to qp idx: %d\n",
-		enable, state->index);
-
-	if (notify_ops->vring_state_changed)
-		notify_ops->vring_state_changed(vid, state->index, enable);
-
-	dev->virtqueue[state->index]->enabled = enable;
-
-	return 0;
-}
-
-void
-user_set_protocol_features(int vid, uint64_t protocol_features)
-{
-	struct virtio_net *dev;
-
-	dev = get_device(vid);
-	if (dev == NULL || protocol_features & ~VHOST_USER_PROTOCOL_FEATURES)
-		return;
-
-	dev->protocol_features = protocol_features;
-}
-
-int
-user_set_log_base(int vid, struct VhostUserMsg *msg)
-{
-	struct virtio_net *dev;
-	int fd = msg->fds[0];
-	uint64_t size, off;
-	void *addr;
-
-	dev = get_device(vid);
-	if (!dev)
-		return -1;
-
-	if (fd < 0) {
-		RTE_LOG(ERR, VHOST_CONFIG, "invalid log fd: %d\n", fd);
-		return -1;
-	}
-
-	if (msg->size != sizeof(VhostUserLog)) {
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"invalid log base msg size: %"PRId32" != %d\n",
-			msg->size, (int)sizeof(VhostUserLog));
-		return -1;
-	}
-
-	size = msg->payload.log.mmap_size;
-	off  = msg->payload.log.mmap_offset;
-	RTE_LOG(INFO, VHOST_CONFIG,
-		"log mmap size: %"PRId64", offset: %"PRId64"\n",
-		size, off);
-
-	/*
-	 * mmap from 0 to workaround a hugepage mmap bug: mmap will
-	 * fail when offset is not page size aligned.
-	 */
-	addr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
-	close(fd);
-	if (addr == MAP_FAILED) {
-		RTE_LOG(ERR, VHOST_CONFIG, "mmap log base failed!\n");
-		return -1;
-	}
-
-	/*
-	 * Free previously mapped log memory on occasionally
-	 * multiple VHOST_USER_SET_LOG_BASE.
-	 */
-	if (dev->log_addr) {
-		munmap((void *)(uintptr_t)dev->log_addr, dev->log_size);
-	}
-	dev->log_addr = (uint64_t)(uintptr_t)addr;
-	dev->log_base = dev->log_addr + off;
-	dev->log_size = size;
-
-	return 0;
-}
-
-/*
- * An rarp packet is constructed and broadcasted to notify switches about
- * the new location of the migrated VM, so that packets from outside will
- * not be lost after migration.
- *
- * However, we don't actually "send" a rarp packet here, instead, we set
- * a flag 'broadcast_rarp' to let rte_vhost_dequeue_burst() inject it.
- */
-int
-user_send_rarp(int vid, struct VhostUserMsg *msg)
-{
-	struct virtio_net *dev;
-	uint8_t *mac = (uint8_t *)&msg->payload.u64;
-
-	dev = get_device(vid);
-	if (!dev)
-		return -1;
-
-	RTE_LOG(DEBUG, VHOST_CONFIG,
-		":: mac: %02x:%02x:%02x:%02x:%02x:%02x\n",
-		mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);
-	memcpy(dev->mac.addr_bytes, mac, 6);
-
-	/*
-	 * Set the flag to inject a RARP broadcast packet at
-	 * rte_vhost_dequeue_burst().
-	 *
-	 * rte_smp_wmb() is for making sure the mac is copied
-	 * before the flag is set.
-	 */
-	rte_smp_wmb();
-	rte_atomic16_set(&dev->broadcast_rarp, 1);
-
-	return 0;
-}
diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
deleted file mode 100644
index 1785695b..00000000
--- a/lib/librte_vhost/virtio-net.c
+++ /dev/null
@@ -1,847 +0,0 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/vhost.h>
-#include <linux/virtio_net.h>
-#include <stddef.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <assert.h>
-#include <sys/mman.h>
-#include <unistd.h>
-#ifdef RTE_LIBRTE_VHOST_NUMA
-#include <numaif.h>
-#endif
-
-#include <sys/socket.h>
-
-#include <rte_ethdev.h>
-#include <rte_log.h>
-#include <rte_string_fns.h>
-#include <rte_memory.h>
-#include <rte_malloc.h>
-#include <rte_virtio_net.h>
-
-#include "vhost-net.h"
-
-#define MAX_VHOST_DEVICE	1024
-static struct virtio_net *vhost_devices[MAX_VHOST_DEVICE];
-
-/* device ops to add/remove device to/from data core. */
-struct virtio_net_device_ops const *notify_ops;
-
-#define VHOST_USER_F_PROTOCOL_FEATURES	30
-
-/* Features supported by this lib. */
-#define VHOST_SUPPORTED_FEATURES ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | \
-				(1ULL << VIRTIO_NET_F_CTRL_VQ) | \
-				(1ULL << VIRTIO_NET_F_CTRL_RX) | \
-				(1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) | \
-				(VHOST_SUPPORTS_MQ)            | \
-				(1ULL << VIRTIO_F_VERSION_1)   | \
-				(1ULL << VHOST_F_LOG_ALL)      | \
-				(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
-				(1ULL << VIRTIO_NET_F_HOST_TSO4) | \
-				(1ULL << VIRTIO_NET_F_HOST_TSO6) | \
-				(1ULL << VIRTIO_NET_F_CSUM)    | \
-				(1ULL << VIRTIO_NET_F_GUEST_CSUM) | \
-				(1ULL << VIRTIO_NET_F_GUEST_TSO4) | \
-				(1ULL << VIRTIO_NET_F_GUEST_TSO6))
-
-static uint64_t VHOST_FEATURES = VHOST_SUPPORTED_FEATURES;
-
-
-/*
- * Converts QEMU virtual address to Vhost virtual address. This function is
- * used to convert the ring addresses to our address space.
- */
-static uint64_t
-qva_to_vva(struct virtio_net *dev, uint64_t qemu_va)
-{
-	struct virtio_memory_regions *region;
-	uint64_t vhost_va = 0;
-	uint32_t regionidx = 0;
-
-	/* Find the region where the address lives. */
-	for (regionidx = 0; regionidx < dev->mem->nregions; regionidx++) {
-		region = &dev->mem->regions[regionidx];
-		if ((qemu_va >= region->userspace_address) &&
-			(qemu_va <= region->userspace_address +
-			region->memory_size)) {
-			vhost_va = qemu_va + region->guest_phys_address +
-				region->address_offset -
-				region->userspace_address;
-			break;
-		}
-	}
-	return vhost_va;
-}
-
-struct virtio_net *
-get_device(int vid)
-{
-	struct virtio_net *dev = vhost_devices[vid];
-
-	if (unlikely(!dev)) {
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"(%d) device not found.\n", vid);
-	}
-
-	return dev;
-}
-
-static void
-cleanup_vq(struct vhost_virtqueue *vq, int destroy)
-{
-	if ((vq->callfd >= 0) && (destroy != 0))
-		close(vq->callfd);
-	if (vq->kickfd >= 0)
-		close(vq->kickfd);
-}
-
-/*
- * Unmap any memory, close any file descriptors and
- * free any memory owned by a device.
- */
-static void
-cleanup_device(struct virtio_net *dev, int destroy)
-{
-	uint32_t i;
-
-	vhost_backend_cleanup(dev);
-
-	for (i = 0; i < dev->virt_qp_nb; i++) {
-		cleanup_vq(dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_RXQ], destroy);
-		cleanup_vq(dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_TXQ], destroy);
-	}
-}
-
-/*
- * Release virtqueues and device memory.
- */
-static void
-free_device(struct virtio_net *dev)
-{
-	uint32_t i;
-
-	for (i = 0; i < dev->virt_qp_nb; i++)
-		rte_free(dev->virtqueue[i * VIRTIO_QNUM]);
-
-	rte_free(dev);
-}
-
-static void
-init_vring_queue(struct vhost_virtqueue *vq, int qp_idx)
-{
-	memset(vq, 0, sizeof(struct vhost_virtqueue));
-
-	vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
-	vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
-
-	/* Backends are set to -1 indicating an inactive device. */
-	vq->backend = -1;
-
-	/* always set the default vq pair to enabled */
-	if (qp_idx == 0)
-		vq->enabled = 1;
-}
-
-static void
-init_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx)
-{
-	uint32_t base_idx = qp_idx * VIRTIO_QNUM;
-
-	init_vring_queue(dev->virtqueue[base_idx + VIRTIO_RXQ], qp_idx);
-	init_vring_queue(dev->virtqueue[base_idx + VIRTIO_TXQ], qp_idx);
-}
-
-static void
-reset_vring_queue(struct vhost_virtqueue *vq, int qp_idx)
-{
-	int callfd;
-
-	callfd = vq->callfd;
-	init_vring_queue(vq, qp_idx);
-	vq->callfd = callfd;
-}
-
-static void
-reset_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx)
-{
-	uint32_t base_idx = qp_idx * VIRTIO_QNUM;
-
-	reset_vring_queue(dev->virtqueue[base_idx + VIRTIO_RXQ], qp_idx);
-	reset_vring_queue(dev->virtqueue[base_idx + VIRTIO_TXQ], qp_idx);
-}
-
-static int
-alloc_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx)
-{
-	struct vhost_virtqueue *virtqueue = NULL;
-	uint32_t virt_rx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_RXQ;
-	uint32_t virt_tx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_TXQ;
-
-	virtqueue = rte_malloc(NULL,
-			       sizeof(struct vhost_virtqueue) * VIRTIO_QNUM, 0);
-	if (virtqueue == NULL) {
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"Failed to allocate memory for virt qp:%d.\n", qp_idx);
-		return -1;
-	}
-
-	dev->virtqueue[virt_rx_q_idx] = virtqueue;
-	dev->virtqueue[virt_tx_q_idx] = virtqueue + VIRTIO_TXQ;
-
-	init_vring_queue_pair(dev, qp_idx);
-
-	dev->virt_qp_nb += 1;
-
-	return 0;
-}
-
-/*
- * Reset some variables in device structure, while keeping few
- * others untouched, such as vid, ifname, virt_qp_nb: they
- * should be same unless the device is removed.
- */
-static void
-reset_device(struct virtio_net *dev)
-{
-	uint32_t i;
-
-	dev->features = 0;
-	dev->protocol_features = 0;
-	dev->flags = 0;
-
-	for (i = 0; i < dev->virt_qp_nb; i++)
-		reset_vring_queue_pair(dev, i);
-}
-
-/*
- * Function is called from the CUSE open function. The device structure is
- * initialised and a new entry is added to the device configuration linked
- * list.
- */
-int
-vhost_new_device(void)
-{
-	struct virtio_net *dev;
-	int i;
-
-	dev = rte_zmalloc(NULL, sizeof(struct virtio_net), 0);
-	if (dev == NULL) {
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"Failed to allocate memory for new dev.\n");
-		return -1;
-	}
-
-	for (i = 0; i < MAX_VHOST_DEVICE; i++) {
-		if (vhost_devices[i] == NULL)
-			break;
-	}
-	if (i == MAX_VHOST_DEVICE) {
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"Failed to find a free slot for new device.\n");
-		return -1;
-	}
-
-	vhost_devices[i] = dev;
-	dev->vid = i;
-
-	return i;
-}
-
-/*
- * Function is called from the CUSE release function. This function will
- * cleanup the device and remove it from device configuration linked list.
- */
-void
-vhost_destroy_device(int vid)
-{
-	struct virtio_net *dev = get_device(vid);
-
-	if (dev == NULL)
-		return;
-
-	if (dev->flags & VIRTIO_DEV_RUNNING) {
-		dev->flags &= ~VIRTIO_DEV_RUNNING;
-		notify_ops->destroy_device(vid);
-	}
-
-	cleanup_device(dev, 1);
-	free_device(dev);
-
-	vhost_devices[vid] = NULL;
-}
-
-void
-vhost_set_ifname(int vid, const char *if_name, unsigned int if_len)
-{
-	struct virtio_net *dev;
-	unsigned int len;
-
-	dev = get_device(vid);
-	if (dev == NULL)
-		return;
-
-	len = if_len > sizeof(dev->ifname) ?
-		sizeof(dev->ifname) : if_len;
-
-	strncpy(dev->ifname, if_name, len);
-	dev->ifname[sizeof(dev->ifname) - 1] = '\0';
-}
-
-
-/*
- * Called from CUSE IOCTL: VHOST_SET_OWNER
- * This function just returns success at the moment unless
- * the device hasn't been initialised.
- */
-int
-vhost_set_owner(int vid)
-{
-	struct virtio_net *dev;
-
-	dev = get_device(vid);
-	if (dev == NULL)
-		return -1;
-
-	return 0;
-}
-
-/*
- * Called from CUSE IOCTL: VHOST_RESET_OWNER
- */
-int
-vhost_reset_owner(int vid)
-{
-	struct virtio_net *dev;
-
-	dev = get_device(vid);
-	if (dev == NULL)
-		return -1;
-
-	if (dev->flags & VIRTIO_DEV_RUNNING) {
-		dev->flags &= ~VIRTIO_DEV_RUNNING;
-		notify_ops->destroy_device(vid);
-	}
-
-	cleanup_device(dev, 0);
-	reset_device(dev);
-	return 0;
-}
-
-/*
- * Called from CUSE IOCTL: VHOST_GET_FEATURES
- * The features that we support are requested.
- */
-int
-vhost_get_features(int vid, uint64_t *pu)
-{
-	struct virtio_net *dev;
-
-	dev = get_device(vid);
-	if (dev == NULL)
-		return -1;
-
-	/* Send our supported features. */
-	*pu = VHOST_FEATURES;
-	return 0;
-}
-
-/*
- * Called from CUSE IOCTL: VHOST_SET_FEATURES
- * We receive the negotiated features supported by us and the virtio device.
- */
-int
-vhost_set_features(int vid, uint64_t *pu)
-{
-	struct virtio_net *dev;
-
-	dev = get_device(vid);
-	if (dev == NULL)
-		return -1;
-	if (*pu & ~VHOST_FEATURES)
-		return -1;
-
-	dev->features = *pu;
-	if (dev->features &
-		((1 << VIRTIO_NET_F_MRG_RXBUF) | (1ULL << VIRTIO_F_VERSION_1))) {
-		dev->vhost_hlen = sizeof(struct virtio_net_hdr_mrg_rxbuf);
-	} else {
-		dev->vhost_hlen = sizeof(struct virtio_net_hdr);
-	}
-	LOG_DEBUG(VHOST_CONFIG,
-		"(%d) mergeable RX buffers %s, virtio 1 %s\n",
-		dev->vid,
-		(dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)) ? "on" : "off",
-		(dev->features & (1ULL << VIRTIO_F_VERSION_1)) ? "on" : "off");
-
-	return 0;
-}
-
-/*
- * Called from CUSE IOCTL: VHOST_SET_VRING_NUM
- * The virtio device sends us the size of the descriptor ring.
- */
-int
-vhost_set_vring_num(int vid, struct vhost_vring_state *state)
-{
-	struct virtio_net *dev;
-
-	dev = get_device(vid);
-	if (dev == NULL)
-		return -1;
-
-	/* State->index refers to the queue index. The txq is 1, rxq is 0. */
-	dev->virtqueue[state->index]->size = state->num;
-
-	return 0;
-}
-
-/*
- * Reallocate virtio_dev and vhost_virtqueue data structure to make them on the
- * same numa node as the memory of vring descriptor.
- */
-#ifdef RTE_LIBRTE_VHOST_NUMA
-static struct virtio_net*
-numa_realloc(struct virtio_net *dev, int index)
-{
-	int oldnode, newnode;
-	struct virtio_net *old_dev;
-	struct vhost_virtqueue *old_vq, *vq;
-	int ret;
-
-	/*
-	 * vq is allocated on pairs, we should try to do realloc
-	 * on first queue of one queue pair only.
-	 */
-	if (index % VIRTIO_QNUM != 0)
-		return dev;
-
-	old_dev = dev;
-	vq = old_vq = dev->virtqueue[index];
-
-	ret = get_mempolicy(&newnode, NULL, 0, old_vq->desc,
-			    MPOL_F_NODE | MPOL_F_ADDR);
-
-	/* check if we need to reallocate vq */
-	ret |= get_mempolicy(&oldnode, NULL, 0, old_vq,
-			     MPOL_F_NODE | MPOL_F_ADDR);
-	if (ret) {
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"Unable to get vq numa information.\n");
-		return dev;
-	}
-	if (oldnode != newnode) {
-		RTE_LOG(INFO, VHOST_CONFIG,
-			"reallocate vq from %d to %d node\n", oldnode, newnode);
-		vq = rte_malloc_socket(NULL, sizeof(*vq) * VIRTIO_QNUM, 0,
-				       newnode);
-		if (!vq)
-			return dev;
-
-		memcpy(vq, old_vq, sizeof(*vq) * VIRTIO_QNUM);
-		rte_free(old_vq);
-	}
-
-	/* check if we need to reallocate dev */
-	ret = get_mempolicy(&oldnode, NULL, 0, old_dev,
-			    MPOL_F_NODE | MPOL_F_ADDR);
-	if (ret) {
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"Unable to get dev numa information.\n");
-		goto out;
-	}
-	if (oldnode != newnode) {
-		RTE_LOG(INFO, VHOST_CONFIG,
-			"reallocate dev from %d to %d node\n",
-			oldnode, newnode);
-		dev = rte_malloc_socket(NULL, sizeof(*dev), 0, newnode);
-		if (!dev) {
-			dev = old_dev;
-			goto out;
-		}
-
-		memcpy(dev, old_dev, sizeof(*dev));
-		rte_free(old_dev);
-	}
-
-out:
-	dev->virtqueue[index] = vq;
-	dev->virtqueue[index + 1] = vq + 1;
-	vhost_devices[dev->vid] = dev;
-
-	return dev;
-}
-#else
-static struct virtio_net*
-numa_realloc(struct virtio_net *dev, int index __rte_unused)
-{
-	return dev;
-}
-#endif
-
-/*
- * Called from CUSE IOCTL: VHOST_SET_VRING_ADDR
- * The virtio device sends us the desc, used and avail ring addresses.
- * This function then converts these to our address space.
- */
-int
-vhost_set_vring_addr(int vid, struct vhost_vring_addr *addr)
-{
-	struct virtio_net *dev;
-	struct vhost_virtqueue *vq;
-
-	dev = get_device(vid);
-	if ((dev == NULL) || (dev->mem == NULL))
-		return -1;
-
-	/* addr->index refers to the queue index. The txq 1, rxq is 0. */
-	vq = dev->virtqueue[addr->index];
-
-	/* The addresses are converted from QEMU virtual to Vhost virtual. */
-	vq->desc = (struct vring_desc *)(uintptr_t)qva_to_vva(dev,
-			addr->desc_user_addr);
-	if (vq->desc == 0) {
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"(%d) failed to find desc ring address.\n",
-			dev->vid);
-		return -1;
-	}
-
-	dev = numa_realloc(dev, addr->index);
-	vq = dev->virtqueue[addr->index];
-
-	vq->avail = (struct vring_avail *)(uintptr_t)qva_to_vva(dev,
-			addr->avail_user_addr);
-	if (vq->avail == 0) {
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"(%d) failed to find avail ring address.\n",
-			dev->vid);
-		return -1;
-	}
-
-	vq->used = (struct vring_used *)(uintptr_t)qva_to_vva(dev,
-			addr->used_user_addr);
-	if (vq->used == 0) {
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"(%d) failed to find used ring address.\n",
-			dev->vid);
-		return -1;
-	}
-
-	if (vq->last_used_idx != vq->used->idx) {
-		RTE_LOG(WARNING, VHOST_CONFIG,
-			"last_used_idx (%u) and vq->used->idx (%u) mismatches; "
-			"some packets maybe resent for Tx and dropped for Rx\n",
-			vq->last_used_idx, vq->used->idx);
-		vq->last_used_idx     = vq->used->idx;
-	}
-
-	vq->log_guest_addr = addr->log_guest_addr;
-
-	LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address desc: %p\n",
-			dev->vid, vq->desc);
-	LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address avail: %p\n",
-			dev->vid, vq->avail);
-	LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address used: %p\n",
-			dev->vid, vq->used);
-	LOG_DEBUG(VHOST_CONFIG, "(%d) log_guest_addr: %" PRIx64 "\n",
-			dev->vid, vq->log_guest_addr);
-
-	return 0;
-}
-
-/*
- * Called from CUSE IOCTL: VHOST_SET_VRING_BASE
- * The virtio device sends us the available ring last used index.
- */
-int
-vhost_set_vring_base(int vid, struct vhost_vring_state *state)
-{
-	struct virtio_net *dev;
-
-	dev = get_device(vid);
-	if (dev == NULL)
-		return -1;
-
-	/* State->index refers to the queue index. The txq is 1, rxq is 0. */
-	dev->virtqueue[state->index]->last_used_idx = state->num;
-
-	return 0;
-}
-
-/*
- * Called from CUSE IOCTL: VHOST_GET_VRING_BASE
- * We send the virtio device our available ring last used index.
- */
-int
-vhost_get_vring_base(int vid, uint32_t index,
-	struct vhost_vring_state *state)
-{
-	struct virtio_net *dev;
-
-	dev = get_device(vid);
-	if (dev == NULL)
-		return -1;
-
-	state->index = index;
-	/* State->index refers to the queue index. The txq is 1, rxq is 0. */
-	state->num = dev->virtqueue[state->index]->last_used_idx;
-
-	return 0;
-}
-
-
-/*
- * Called from CUSE IOCTL: VHOST_SET_VRING_CALL
- * The virtio device sends an eventfd to interrupt the guest. This fd gets
- * copied into our process space.
- */
-int
-vhost_set_vring_call(int vid, struct vhost_vring_file *file)
-{
-	struct virtio_net *dev;
-	struct vhost_virtqueue *vq;
-	uint32_t cur_qp_idx = file->index / VIRTIO_QNUM;
-
-	dev = get_device(vid);
-	if (dev == NULL)
-		return -1;
-
-	/*
-	 * FIXME: VHOST_SET_VRING_CALL is the first per-vring message
-	 * we get, so we do vring queue pair allocation here.
-	 */
-	if (cur_qp_idx + 1 > dev->virt_qp_nb) {
-		if (alloc_vring_queue_pair(dev, cur_qp_idx) < 0)
-			return -1;
-	}
-
-	/* file->index refers to the queue index. The txq is 1, rxq is 0. */
-	vq = dev->virtqueue[file->index];
-	assert(vq != NULL);
-
-	if (vq->callfd >= 0)
-		close(vq->callfd);
-
-	vq->callfd = file->fd;
-
-	return 0;
-}
-
-/*
- * Called from CUSE IOCTL: VHOST_SET_VRING_KICK
- * The virtio device sends an eventfd that it can use to notify us.
- * This fd gets copied into our process space.
- */
-int
-vhost_set_vring_kick(int vid, struct vhost_vring_file *file)
-{
-	struct virtio_net *dev;
-	struct vhost_virtqueue *vq;
-
-	dev = get_device(vid);
-	if (dev == NULL)
-		return -1;
-
-	/* file->index refers to the queue index. The txq is 1, rxq is 0. */
-	vq = dev->virtqueue[file->index];
-
-	if (vq->kickfd >= 0)
-		close(vq->kickfd);
-
-	vq->kickfd = file->fd;
-
-	return 0;
-}
-
-/*
- * Called from CUSE IOCTL: VHOST_NET_SET_BACKEND
- * To complete device initialisation when the virtio driver is loaded,
- * we are provided with a valid fd for a tap device (not used by us).
- * If this happens then we can add the device to a data core.
- * When the virtio driver is removed we get fd=-1.
- * At that point we remove the device from the data core.
- * The device will still exist in the device configuration linked list.
- */
-int
-vhost_set_backend(int vid, struct vhost_vring_file *file)
-{
-	struct virtio_net *dev;
-
-	dev = get_device(vid);
-	if (dev == NULL)
-		return -1;
-
-	/* file->index refers to the queue index. The txq is 1, rxq is 0. */
-	dev->virtqueue[file->index]->backend = file->fd;
-
-	/*
-	 * If the device isn't already running and both backend fds are set,
-	 * we add the device.
-	 */
-	if (!(dev->flags & VIRTIO_DEV_RUNNING)) {
-		if (dev->virtqueue[VIRTIO_TXQ]->backend != VIRTIO_DEV_STOPPED &&
-		    dev->virtqueue[VIRTIO_RXQ]->backend != VIRTIO_DEV_STOPPED) {
-			if (notify_ops->new_device(vid) < 0)
-				return -1;
-			dev->flags |= VIRTIO_DEV_RUNNING;
-		}
-	} else if (file->fd == VIRTIO_DEV_STOPPED) {
-		dev->flags &= ~VIRTIO_DEV_RUNNING;
-		notify_ops->destroy_device(vid);
-	}
-
-	return 0;
-}
-
-int
-rte_vhost_get_numa_node(int vid)
-{
-#ifdef RTE_LIBRTE_VHOST_NUMA
-	struct virtio_net *dev = get_device(vid);
-	int numa_node;
-	int ret;
-
-	if (dev == NULL)
-		return -1;
-
-	ret = get_mempolicy(&numa_node, NULL, 0, dev,
-			    MPOL_F_NODE | MPOL_F_ADDR);
-	if (ret < 0) {
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"(%d) failed to query numa node: %d\n", vid, ret);
-		return -1;
-	}
-
-	return numa_node;
-#else
-	RTE_SET_USED(vid);
-	return -1;
-#endif
-}
-
-uint32_t
-rte_vhost_get_queue_num(int vid)
-{
-	struct virtio_net *dev = get_device(vid);
-
-	if (dev == NULL)
-		return 0;
-
-	return dev->virt_qp_nb;
-}
-
-int
-rte_vhost_get_ifname(int vid, char *buf, size_t len)
-{
-	struct virtio_net *dev = get_device(vid);
-
-	if (dev == NULL)
-		return -1;
-
-	len = RTE_MIN(len, sizeof(dev->ifname));
-
-	strncpy(buf, dev->ifname, len);
-	buf[len - 1] = '\0';
-
-	return 0;
-}
-
-uint16_t
-rte_vhost_avail_entries(int vid, uint16_t queue_id)
-{
-	struct virtio_net *dev;
-	struct vhost_virtqueue *vq;
-
-	dev = get_device(vid);
-	if (!dev)
-		return 0;
-
-	vq = dev->virtqueue[queue_id];
-	if (!vq->enabled)
-		return 0;
-
-	return *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx;
-}
-
-int
-rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable)
-{
-	struct virtio_net *dev = get_device(vid);
-
-	if (dev == NULL)
-		return -1;
-
-	if (enable) {
-		RTE_LOG(ERR, VHOST_CONFIG,
-			"guest notification isn't supported.\n");
-		return -1;
-	}
-
-	dev->virtqueue[queue_id]->used->flags = VRING_USED_F_NO_NOTIFY;
-	return 0;
-}
-
-uint64_t rte_vhost_feature_get(void)
-{
-	return VHOST_FEATURES;
-}
-
-int rte_vhost_feature_disable(uint64_t feature_mask)
-{
-	VHOST_FEATURES = VHOST_FEATURES & ~feature_mask;
-	return 0;
-}
-
-int rte_vhost_feature_enable(uint64_t feature_mask)
-{
-	if ((feature_mask & VHOST_SUPPORTED_FEATURES) == feature_mask) {
-		VHOST_FEATURES = VHOST_FEATURES | feature_mask;
-		return 0;
-	}
-	return -1;
-}
-
-/*
- * Register ops so that we can add/remove device to data core.
- */
-int
-rte_vhost_driver_callback_register(struct virtio_net_device_ops const * const ops)
-{
-	notify_ops = ops;
-
-	return 0;
-}
diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/virtio_net.c
index 5806f99a..595f67c4 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -45,7 +45,7 @@
 #include <rte_sctp.h>
 #include <rte_arp.h>
 
-#include "vhost-net.h"
+#include "vhost.h"
 
 #define MAX_PKT_BURST 32
 #define VHOST_LOG_PAGE	4096
@@ -91,6 +91,56 @@ is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t qp_nb)
 	return (is_tx ^ (idx & 1)) == 0 && idx < qp_nb * VIRTIO_QNUM;
 }
 
+static inline void __attribute__((always_inline))
+do_flush_shadow_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq,
+			  uint16_t to, uint16_t from, uint16_t size)
+{
+	rte_memcpy(&vq->used->ring[to],
+			&vq->shadow_used_ring[from],
+			size * sizeof(struct vring_used_elem));
+	vhost_log_used_vring(dev, vq,
+			offsetof(struct vring_used, ring[to]),
+			size * sizeof(struct vring_used_elem));
+}
+
+static inline void __attribute__((always_inline))
+flush_shadow_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq)
+{
+	uint16_t used_idx = vq->last_used_idx & (vq->size - 1);
+
+	if (used_idx + vq->shadow_used_idx <= vq->size) {
+		do_flush_shadow_used_ring(dev, vq, used_idx, 0,
+					  vq->shadow_used_idx);
+	} else {
+		uint16_t size;
+
+		/* update used ring interval [used_idx, vq->size] */
+		size = vq->size - used_idx;
+		do_flush_shadow_used_ring(dev, vq, used_idx, 0, size);
+
+		/* update the left half used ring interval [0, left_size] */
+		do_flush_shadow_used_ring(dev, vq, 0, size,
+					  vq->shadow_used_idx - size);
+	}
+	vq->last_used_idx += vq->shadow_used_idx;
+
+	rte_smp_wmb();
+
+	*(volatile uint16_t *)&vq->used->idx += vq->shadow_used_idx;
+	vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx),
+		sizeof(vq->used->idx));
+}
+
+static inline void __attribute__((always_inline))
+update_shadow_used_ring(struct vhost_virtqueue *vq,
+			 uint16_t desc_idx, uint16_t len)
+{
+	uint16_t i = vq->shadow_used_idx++;
+
+	vq->shadow_used_ring[i].id  = desc_idx;
+	vq->shadow_used_ring[i].len = len;
+}
+
 static void
 virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr)
 {
@@ -136,8 +186,8 @@ copy_virtio_net_hdr(struct virtio_net *dev, uint64_t desc_addr,
 }
 
 static inline int __attribute__((always_inline))
-copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
-		  struct rte_mbuf *m, uint16_t desc_idx)
+copy_mbuf_to_desc(struct virtio_net *dev, struct vring_desc *descs,
+		  struct rte_mbuf *m, uint16_t desc_idx, uint32_t size)
 {
 	uint32_t desc_avail, desc_offset;
 	uint32_t mbuf_avail, mbuf_offset;
@@ -146,7 +196,7 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	uint64_t desc_addr;
 	struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
 
-	desc = &vq->desc[desc_idx];
+	desc = &descs[desc_idx];
 	desc_addr = gpa_to_vva(dev, desc->addr);
 	/*
 	 * Checking of 'desc_addr' placed outside of 'unlikely' macro to avoid
@@ -183,10 +233,10 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
 				/* Room in vring buffer is not enough */
 				return -1;
 			}
-			if (unlikely(desc->next >= vq->size))
+			if (unlikely(desc->next >= size))
 				return -1;
 
-			desc = &vq->desc[desc->next];
+			desc = &descs[desc->next];
 			desc_addr = gpa_to_vva(dev, desc->addr);
 			if (unlikely(!desc_addr))
 				return -1;
@@ -226,8 +276,9 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 	struct vhost_virtqueue *vq;
 	uint16_t avail_idx, free_entries, start_idx;
 	uint16_t desc_indexes[MAX_PKT_BURST];
+	struct vring_desc *descs;
 	uint16_t used_idx;
-	uint32_t i;
+	uint32_t i, sz;
 
 	LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
 	if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->virt_qp_nb))) {
@@ -269,7 +320,22 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 		uint16_t desc_idx = desc_indexes[i];
 		int err;
 
-		err = copy_mbuf_to_desc(dev, vq, pkts[i], desc_idx);
+		if (vq->desc[desc_idx].flags & VRING_DESC_F_INDIRECT) {
+			descs = (struct vring_desc *)(uintptr_t)gpa_to_vva(dev,
+					vq->desc[desc_idx].addr);
+			if (unlikely(!descs)) {
+				count = i;
+				break;
+			}
+
+			desc_idx = 0;
+			sz = vq->desc[desc_idx].len / sizeof(*descs);
+		} else {
+			descs = vq->desc;
+			sz = vq->size;
+		}
+
+		err = copy_mbuf_to_desc(dev, descs, pkts[i], desc_idx, sz);
 		if (unlikely(err)) {
 			used_idx = (start_idx + i) & (vq->size - 1);
 			vq->used->ring[used_idx].len = dev->vhost_hlen;
@@ -300,33 +366,46 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 	return count;
 }
 
-static inline int
-fill_vec_buf(struct vhost_virtqueue *vq, uint32_t avail_idx,
-	     uint32_t *allocated, uint32_t *vec_idx,
-	     struct buf_vector *buf_vec)
+static inline int __attribute__((always_inline))
+fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
+			 uint32_t avail_idx, uint32_t *vec_idx,
+			 struct buf_vector *buf_vec, uint16_t *desc_chain_head,
+			 uint16_t *desc_chain_len)
 {
 	uint16_t idx = vq->avail->ring[avail_idx & (vq->size - 1)];
 	uint32_t vec_id = *vec_idx;
-	uint32_t len    = *allocated;
+	uint32_t len    = 0;
+	struct vring_desc *descs = vq->desc;
+
+	*desc_chain_head = idx;
+
+	if (vq->desc[idx].flags & VRING_DESC_F_INDIRECT) {
+		descs = (struct vring_desc *)(uintptr_t)
+					gpa_to_vva(dev, vq->desc[idx].addr);
+		if (unlikely(!descs))
+			return -1;
+
+		idx = 0;
+	}
 
 	while (1) {
 		if (unlikely(vec_id >= BUF_VECTOR_MAX || idx >= vq->size))
 			return -1;
 
-		len += vq->desc[idx].len;
-		buf_vec[vec_id].buf_addr = vq->desc[idx].addr;
-		buf_vec[vec_id].buf_len  = vq->desc[idx].len;
+		len += descs[idx].len;
+		buf_vec[vec_id].buf_addr = descs[idx].addr;
+		buf_vec[vec_id].buf_len  = descs[idx].len;
 		buf_vec[vec_id].desc_idx = idx;
 		vec_id++;
 
-		if ((vq->desc[idx].flags & VRING_DESC_F_NEXT) == 0)
+		if ((descs[idx].flags & VRING_DESC_F_NEXT) == 0)
 			break;
 
-		idx = vq->desc[idx].next;
+		idx = descs[idx].next;
 	}
 
-	*allocated = len;
-	*vec_idx   = vec_id;
+	*desc_chain_len = len;
+	*vec_idx = vec_id;
 
 	return 0;
 }
@@ -335,31 +414,34 @@ fill_vec_buf(struct vhost_virtqueue *vq, uint32_t avail_idx,
  * Returns -1 on fail, 0 on success
  */
 static inline int
-reserve_avail_buf_mergeable(struct vhost_virtqueue *vq, uint32_t size,
-			    uint16_t *end, struct buf_vector *buf_vec)
+reserve_avail_buf_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
+				uint32_t size, struct buf_vector *buf_vec,
+				uint16_t *num_buffers, uint16_t avail_head)
 {
 	uint16_t cur_idx;
-	uint16_t avail_idx;
-	uint32_t allocated = 0;
 	uint32_t vec_idx = 0;
 	uint16_t tries = 0;
 
-	cur_idx  = vq->last_used_idx;
+	uint16_t head_idx = 0;
+	uint16_t len = 0;
 
-	while (1) {
-		avail_idx = *((volatile uint16_t *)&vq->avail->idx);
-		if (unlikely(cur_idx == avail_idx))
+	*num_buffers = 0;
+	cur_idx  = vq->last_avail_idx;
+
+	while (size > 0) {
+		if (unlikely(cur_idx == avail_head))
 			return -1;
 
-		if (unlikely(fill_vec_buf(vq, cur_idx, &allocated,
-					  &vec_idx, buf_vec) < 0))
+		if (unlikely(fill_vec_buf(dev, vq, cur_idx, &vec_idx, buf_vec,
+						&head_idx, &len) < 0))
 			return -1;
+		len = RTE_MIN(len, size);
+		update_shadow_used_ring(vq, head_idx, len);
+		size -= len;
 
 		cur_idx++;
 		tries++;
-
-		if (allocated >= size)
-			break;
+		*num_buffers += 1;
 
 		/*
 		 * if we tried all available ring items, and still
@@ -370,77 +452,50 @@ reserve_avail_buf_mergeable(struct vhost_virtqueue *vq, uint32_t size,
 			return -1;
 	}
 
-	*end = cur_idx;
 	return 0;
 }
 
-static inline uint32_t __attribute__((always_inline))
-copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
-			    uint16_t end_idx, struct rte_mbuf *m,
-			    struct buf_vector *buf_vec)
+static inline int __attribute__((always_inline))
+copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct rte_mbuf *m,
+			    struct buf_vector *buf_vec, uint16_t num_buffers)
 {
 	struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
 	uint32_t vec_idx = 0;
-	uint16_t start_idx = vq->last_used_idx;
-	uint16_t cur_idx = start_idx;
 	uint64_t desc_addr;
-	uint32_t desc_chain_head;
-	uint32_t desc_chain_len;
 	uint32_t mbuf_offset, mbuf_avail;
 	uint32_t desc_offset, desc_avail;
 	uint32_t cpy_len;
-	uint16_t desc_idx, used_idx;
+	uint64_t hdr_addr, hdr_phys_addr;
+	struct rte_mbuf *hdr_mbuf;
 
 	if (unlikely(m == NULL))
-		return 0;
-
-	LOG_DEBUG(VHOST_DATA, "(%d) current index %d | end index %d\n",
-		dev->vid, cur_idx, end_idx);
+		return -1;
 
 	desc_addr = gpa_to_vva(dev, buf_vec[vec_idx].buf_addr);
 	if (buf_vec[vec_idx].buf_len < dev->vhost_hlen || !desc_addr)
-		return 0;
+		return -1;
 
-	rte_prefetch0((void *)(uintptr_t)desc_addr);
+	hdr_mbuf = m;
+	hdr_addr = desc_addr;
+	hdr_phys_addr = buf_vec[vec_idx].buf_addr;
+	rte_prefetch0((void *)(uintptr_t)hdr_addr);
 
-	virtio_hdr.num_buffers = end_idx - start_idx;
+	virtio_hdr.num_buffers = num_buffers;
 	LOG_DEBUG(VHOST_DATA, "(%d) RX: num merge buffers %d\n",
-		dev->vid, virtio_hdr.num_buffers);
-
-	virtio_enqueue_offload(m, &virtio_hdr.hdr);
-	copy_virtio_net_hdr(dev, desc_addr, virtio_hdr);
-	vhost_log_write(dev, buf_vec[vec_idx].buf_addr, dev->vhost_hlen);
-	PRINT_PACKET(dev, (uintptr_t)desc_addr, dev->vhost_hlen, 0);
+		dev->vid, num_buffers);
 
 	desc_avail  = buf_vec[vec_idx].buf_len - dev->vhost_hlen;
 	desc_offset = dev->vhost_hlen;
-	desc_chain_head = buf_vec[vec_idx].desc_idx;
-	desc_chain_len = desc_offset;
 
 	mbuf_avail  = rte_pktmbuf_data_len(m);
 	mbuf_offset = 0;
 	while (mbuf_avail != 0 || m->next != NULL) {
 		/* done with current desc buf, get the next one */
 		if (desc_avail == 0) {
-			desc_idx = buf_vec[vec_idx].desc_idx;
 			vec_idx++;
-
-			if (!(vq->desc[desc_idx].flags & VRING_DESC_F_NEXT)) {
-				/* Update used ring with desc information */
-				used_idx = cur_idx++ & (vq->size - 1);
-				vq->used->ring[used_idx].id = desc_chain_head;
-				vq->used->ring[used_idx].len = desc_chain_len;
-				vhost_log_used_vring(dev, vq,
-					offsetof(struct vring_used,
-						 ring[used_idx]),
-					sizeof(vq->used->ring[used_idx]));
-				desc_chain_head = buf_vec[vec_idx].desc_idx;
-				desc_chain_len = 0;
-			}
-
 			desc_addr = gpa_to_vva(dev, buf_vec[vec_idx].buf_addr);
 			if (unlikely(!desc_addr))
-				return 0;
+				return -1;
 
 			/* Prefetch buffer address. */
 			rte_prefetch0((void *)(uintptr_t)desc_addr);
@@ -456,6 +511,16 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
 			mbuf_avail  = rte_pktmbuf_data_len(m);
 		}
 
+		if (hdr_addr) {
+			virtio_enqueue_offload(hdr_mbuf, &virtio_hdr.hdr);
+			copy_virtio_net_hdr(dev, hdr_addr, virtio_hdr);
+			vhost_log_write(dev, hdr_phys_addr, dev->vhost_hlen);
+			PRINT_PACKET(dev, (uintptr_t)hdr_addr,
+				     dev->vhost_hlen, 0);
+
+			hdr_addr = 0;
+		}
+
 		cpy_len = RTE_MIN(desc_avail, mbuf_avail);
 		rte_memcpy((void *)((uintptr_t)(desc_addr + desc_offset)),
 			rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
@@ -469,17 +534,9 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
 		mbuf_offset += cpy_len;
 		desc_avail  -= cpy_len;
 		desc_offset += cpy_len;
-		desc_chain_len += cpy_len;
 	}
 
-	used_idx = cur_idx & (vq->size - 1);
-	vq->used->ring[used_idx].id = desc_chain_head;
-	vq->used->ring[used_idx].len = desc_chain_len;
-	vhost_log_used_vring(dev, vq,
-		offsetof(struct vring_used, ring[used_idx]),
-		sizeof(vq->used->ring[used_idx]));
-
-	return end_idx - start_idx;
+	return 0;
 }
 
 static inline uint32_t __attribute__((always_inline))
@@ -487,9 +544,10 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
 	struct rte_mbuf **pkts, uint32_t count)
 {
 	struct vhost_virtqueue *vq;
-	uint32_t pkt_idx = 0, nr_used = 0;
-	uint16_t end;
+	uint32_t pkt_idx = 0;
+	uint16_t num_buffers;
 	struct buf_vector buf_vec[BUF_VECTOR_MAX];
+	uint16_t avail_head;
 
 	LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
 	if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->virt_qp_nb))) {
@@ -506,28 +564,39 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
 	if (count == 0)
 		return 0;
 
+	rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
+
+	vq->shadow_used_idx = 0;
+	avail_head = *((volatile uint16_t *)&vq->avail->idx);
 	for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
 		uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen;
 
-		if (unlikely(reserve_avail_buf_mergeable(vq, pkt_len,
-							 &end, buf_vec) < 0)) {
+		if (unlikely(reserve_avail_buf_mergeable(dev, vq,
+						pkt_len, buf_vec, &num_buffers,
+						avail_head) < 0)) {
 			LOG_DEBUG(VHOST_DATA,
 				"(%d) failed to get enough desc from vring\n",
 				dev->vid);
+			vq->shadow_used_idx -= num_buffers;
 			break;
 		}
 
-		nr_used = copy_mbuf_to_desc_mergeable(dev, vq, end,
-						      pkts[pkt_idx], buf_vec);
-		rte_smp_wmb();
+		LOG_DEBUG(VHOST_DATA, "(%d) current index %d | end index %d\n",
+			dev->vid, vq->last_avail_idx,
+			vq->last_avail_idx + num_buffers);
 
-		*(volatile uint16_t *)&vq->used->idx += nr_used;
-		vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx),
-			sizeof(vq->used->idx));
-		vq->last_used_idx += nr_used;
+		if (copy_mbuf_to_desc_mergeable(dev, pkts[pkt_idx],
+						buf_vec, num_buffers) < 0) {
+			vq->shadow_used_idx -= num_buffers;
+			break;
+		}
+
+		vq->last_avail_idx += num_buffers;
 	}
 
-	if (likely(pkt_idx)) {
+	if (likely(vq->shadow_used_idx)) {
+		flush_shadow_used_ring(dev, vq);
+
 		/* flush used->idx update before we read avail->flags. */
 		rte_mb();
 
@@ -555,6 +624,18 @@ rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
 		return virtio_dev_rx(dev, queue_id, pkts, count);
 }
 
+static inline bool
+virtio_net_with_host_offload(struct virtio_net *dev)
+{
+	if (dev->features &
+			(VIRTIO_NET_F_CSUM | VIRTIO_NET_F_HOST_ECN |
+			 VIRTIO_NET_F_HOST_TSO4 | VIRTIO_NET_F_HOST_TSO6 |
+			 VIRTIO_NET_F_HOST_UFO))
+		return true;
+
+	return false;
+}
+
 static void
 parse_ethernet(struct rte_mbuf *m, uint16_t *l4_proto, void **l4_hdr)
 {
@@ -607,6 +688,9 @@ vhost_dequeue_offload(struct virtio_net_hdr *hdr, struct rte_mbuf *m)
 	void *l4_hdr = NULL;
 	struct tcp_hdr *tcp_hdr = NULL;
 
+	if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE)
+		return;
+
 	parse_ethernet(m, &l4_proto, &l4_hdr);
 	if (hdr->flags == VIRTIO_NET_HDR_F_NEEDS_CSUM) {
 		if (hdr->csum_start == (m->l2_len + m->l3_len)) {
@@ -685,9 +769,15 @@ make_rarp_packet(struct rte_mbuf *rarp_mbuf, const struct ether_addr *mac)
 	return 0;
 }
 
+static inline void __attribute__((always_inline))
+put_zmbuf(struct zcopy_mbuf *zmbuf)
+{
+	zmbuf->in_use = 0;
+}
+
 static inline int __attribute__((always_inline))
-copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
-		  struct rte_mbuf *m, uint16_t desc_idx,
+copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
+		  uint16_t max_desc, struct rte_mbuf *m, uint16_t desc_idx,
 		  struct rte_mempool *mbuf_pool)
 {
 	struct vring_desc *desc;
@@ -696,20 +786,23 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	uint32_t mbuf_avail, mbuf_offset;
 	uint32_t cpy_len;
 	struct rte_mbuf *cur = m, *prev = m;
-	struct virtio_net_hdr *hdr;
+	struct virtio_net_hdr *hdr = NULL;
 	/* A counter to avoid desc dead loop chain */
 	uint32_t nr_desc = 1;
 
-	desc = &vq->desc[desc_idx];
-	if (unlikely(desc->len < dev->vhost_hlen))
+	desc = &descs[desc_idx];
+	if (unlikely((desc->len < dev->vhost_hlen)) ||
+			(desc->flags & VRING_DESC_F_INDIRECT))
 		return -1;
 
 	desc_addr = gpa_to_vva(dev, desc->addr);
 	if (unlikely(!desc_addr))
 		return -1;
 
-	hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
-	rte_prefetch0(hdr);
+	if (virtio_net_with_host_offload(dev)) {
+		hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
+		rte_prefetch0(hdr);
+	}
 
 	/*
 	 * A virtio driver normally uses at least 2 desc buffers
@@ -718,31 +811,56 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	 */
 	if (likely((desc->len == dev->vhost_hlen) &&
 		   (desc->flags & VRING_DESC_F_NEXT) != 0)) {
-		desc = &vq->desc[desc->next];
+		desc = &descs[desc->next];
+		if (unlikely(desc->flags & VRING_DESC_F_INDIRECT))
+			return -1;
 
 		desc_addr = gpa_to_vva(dev, desc->addr);
 		if (unlikely(!desc_addr))
 			return -1;
 
-		rte_prefetch0((void *)(uintptr_t)desc_addr);
-
 		desc_offset = 0;
 		desc_avail  = desc->len;
 		nr_desc    += 1;
-
-		PRINT_PACKET(dev, (uintptr_t)desc_addr, desc->len, 0);
 	} else {
 		desc_avail  = desc->len - dev->vhost_hlen;
 		desc_offset = dev->vhost_hlen;
 	}
 
+	rte_prefetch0((void *)(uintptr_t)(desc_addr + desc_offset));
+
+	PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), desc_avail, 0);
+
 	mbuf_offset = 0;
 	mbuf_avail  = m->buf_len - RTE_PKTMBUF_HEADROOM;
 	while (1) {
+		uint64_t hpa;
+
 		cpy_len = RTE_MIN(desc_avail, mbuf_avail);
-		rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *, mbuf_offset),
-			(void *)((uintptr_t)(desc_addr + desc_offset)),
-			cpy_len);
+
+		/*
+		 * A desc buf might across two host physical pages that are
+		 * not continuous. In such case (gpa_to_hpa returns 0), data
+		 * will be copied even though zero copy is enabled.
+		 */
+		if (unlikely(dev->dequeue_zero_copy && (hpa = gpa_to_hpa(dev,
+					desc->addr + desc_offset, cpy_len)))) {
+			cur->data_len = cpy_len;
+			cur->data_off = 0;
+			cur->buf_addr = (void *)(uintptr_t)desc_addr;
+			cur->buf_physaddr = hpa;
+
+			/*
+			 * In zero copy mode, one mbuf can only reference data
+			 * for one or partial of one desc buff.
+			 */
+			mbuf_avail = cpy_len;
+		} else {
+			rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *,
+							   mbuf_offset),
+				(void *)((uintptr_t)(desc_addr + desc_offset)),
+				cpy_len);
+		}
 
 		mbuf_avail  -= cpy_len;
 		mbuf_offset += cpy_len;
@@ -754,10 +872,12 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 			if ((desc->flags & VRING_DESC_F_NEXT) == 0)
 				break;
 
-			if (unlikely(desc->next >= vq->size ||
-				     ++nr_desc > vq->size))
+			if (unlikely(desc->next >= max_desc ||
+				     ++nr_desc > max_desc))
+				return -1;
+			desc = &descs[desc->next];
+			if (unlikely(desc->flags & VRING_DESC_F_INDIRECT))
 				return -1;
-			desc = &vq->desc[desc->next];
 
 			desc_addr = gpa_to_vva(dev, desc->addr);
 			if (unlikely(!desc_addr))
@@ -797,12 +917,86 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	prev->data_len = mbuf_offset;
 	m->pkt_len    += mbuf_offset;
 
-	if (hdr->flags != 0 || hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE)
+	if (hdr)
 		vhost_dequeue_offload(hdr, m);
 
 	return 0;
 }
 
+static inline void __attribute__((always_inline))
+update_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq,
+		 uint32_t used_idx, uint32_t desc_idx)
+{
+	vq->used->ring[used_idx].id  = desc_idx;
+	vq->used->ring[used_idx].len = 0;
+	vhost_log_used_vring(dev, vq,
+			offsetof(struct vring_used, ring[used_idx]),
+			sizeof(vq->used->ring[used_idx]));
+}
+
+static inline void __attribute__((always_inline))
+update_used_idx(struct virtio_net *dev, struct vhost_virtqueue *vq,
+		uint32_t count)
+{
+	if (unlikely(count == 0))
+		return;
+
+	rte_smp_wmb();
+	rte_smp_rmb();
+
+	vq->used->idx += count;
+	vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx),
+			sizeof(vq->used->idx));
+
+	/* Kick guest if required. */
+	if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
+			&& (vq->callfd >= 0))
+		eventfd_write(vq->callfd, (eventfd_t)1);
+}
+
+static inline struct zcopy_mbuf *__attribute__((always_inline))
+get_zmbuf(struct vhost_virtqueue *vq)
+{
+	uint16_t i;
+	uint16_t last;
+	int tries = 0;
+
+	/* search [last_zmbuf_idx, zmbuf_size) */
+	i = vq->last_zmbuf_idx;
+	last = vq->zmbuf_size;
+
+again:
+	for (; i < last; i++) {
+		if (vq->zmbufs[i].in_use == 0) {
+			vq->last_zmbuf_idx = i + 1;
+			vq->zmbufs[i].in_use = 1;
+			return &vq->zmbufs[i];
+		}
+	}
+
+	tries++;
+	if (tries == 1) {
+		/* search [0, last_zmbuf_idx) */
+		i = 0;
+		last = vq->last_zmbuf_idx;
+		goto again;
+	}
+
+	return NULL;
+}
+
+static inline bool __attribute__((always_inline))
+mbuf_is_consumed(struct rte_mbuf *m)
+{
+	while (m) {
+		if (rte_mbuf_refcnt_read(m) > 1)
+			return false;
+		m = m->next;
+	}
+
+	return true;
+}
+
 uint16_t
 rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
@@ -830,6 +1024,30 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 	if (unlikely(vq->enabled == 0))
 		return 0;
 
+	if (unlikely(dev->dequeue_zero_copy)) {
+		struct zcopy_mbuf *zmbuf, *next;
+		int nr_updated = 0;
+
+		for (zmbuf = TAILQ_FIRST(&vq->zmbuf_list);
+		     zmbuf != NULL; zmbuf = next) {
+			next = TAILQ_NEXT(zmbuf, next);
+
+			if (mbuf_is_consumed(zmbuf->mbuf)) {
+				used_idx = vq->last_used_idx++ & (vq->size - 1);
+				update_used_ring(dev, vq, used_idx,
+						 zmbuf->desc_idx);
+				nr_updated += 1;
+
+				TAILQ_REMOVE(&vq->zmbuf_list, zmbuf, next);
+				rte_pktmbuf_free(zmbuf->mbuf);
+				put_zmbuf(zmbuf);
+				vq->nr_zmbuf -= 1;
+			}
+		}
+
+		update_used_idx(dev, vq, nr_updated);
+	}
+
 	/*
 	 * Construct a RARP broadcast packet, and inject it to the "pkts"
 	 * array, to looks like that guest actually send such packet.
@@ -853,16 +1071,17 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 		}
 	}
 
-	avail_idx =  *((volatile uint16_t *)&vq->avail->idx);
-	free_entries = avail_idx - vq->last_used_idx;
+	free_entries = *((volatile uint16_t *)&vq->avail->idx) -
+			vq->last_avail_idx;
 	if (free_entries == 0)
 		goto out;
 
 	LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
 
-	/* Prefetch available ring to retrieve head indexes. */
-	used_idx = vq->last_used_idx & (vq->size - 1);
-	rte_prefetch0(&vq->avail->ring[used_idx]);
+	/* Prefetch available and used ring */
+	avail_idx = vq->last_avail_idx & (vq->size - 1);
+	used_idx  = vq->last_used_idx  & (vq->size - 1);
+	rte_prefetch0(&vq->avail->ring[avail_idx]);
 	rte_prefetch0(&vq->used->ring[used_idx]);
 
 	count = RTE_MIN(count, MAX_PKT_BURST);
@@ -872,49 +1091,81 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 
 	/* Retrieve all of the head indexes first to avoid caching issues. */
 	for (i = 0; i < count; i++) {
-		used_idx = (vq->last_used_idx + i) & (vq->size - 1);
-		desc_indexes[i] = vq->avail->ring[used_idx];
+		avail_idx = (vq->last_avail_idx + i) & (vq->size - 1);
+		used_idx  = (vq->last_used_idx  + i) & (vq->size - 1);
+		desc_indexes[i] = vq->avail->ring[avail_idx];
 
-		vq->used->ring[used_idx].id  = desc_indexes[i];
-		vq->used->ring[used_idx].len = 0;
-		vhost_log_used_vring(dev, vq,
-				offsetof(struct vring_used, ring[used_idx]),
-				sizeof(vq->used->ring[used_idx]));
+		if (likely(dev->dequeue_zero_copy == 0))
+			update_used_ring(dev, vq, used_idx, desc_indexes[i]);
 	}
 
 	/* Prefetch descriptor index. */
 	rte_prefetch0(&vq->desc[desc_indexes[0]]);
 	for (i = 0; i < count; i++) {
+		struct vring_desc *desc;
+		uint16_t sz, idx;
 		int err;
 
 		if (likely(i + 1 < count))
 			rte_prefetch0(&vq->desc[desc_indexes[i + 1]]);
 
+		if (vq->desc[desc_indexes[i]].flags & VRING_DESC_F_INDIRECT) {
+			desc = (struct vring_desc *)(uintptr_t)gpa_to_vva(dev,
+					vq->desc[desc_indexes[i]].addr);
+			if (unlikely(!desc))
+				break;
+
+			rte_prefetch0(desc);
+			sz = vq->desc[desc_indexes[i]].len / sizeof(*desc);
+			idx = 0;
+		} else {
+			desc = vq->desc;
+			sz = vq->size;
+			idx = desc_indexes[i];
+		}
+
 		pkts[i] = rte_pktmbuf_alloc(mbuf_pool);
 		if (unlikely(pkts[i] == NULL)) {
 			RTE_LOG(ERR, VHOST_DATA,
 				"Failed to allocate memory for mbuf.\n");
 			break;
 		}
-		err = copy_desc_to_mbuf(dev, vq, pkts[i], desc_indexes[i],
-					mbuf_pool);
+
+		err = copy_desc_to_mbuf(dev, desc, sz, pkts[i], idx, mbuf_pool);
 		if (unlikely(err)) {
 			rte_pktmbuf_free(pkts[i]);
 			break;
 		}
-	}
 
-	rte_smp_wmb();
-	rte_smp_rmb();
-	vq->used->idx += i;
-	vq->last_used_idx += i;
-	vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx),
-			sizeof(vq->used->idx));
+		if (unlikely(dev->dequeue_zero_copy)) {
+			struct zcopy_mbuf *zmbuf;
 
-	/* Kick guest if required. */
-	if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
-			&& (vq->callfd >= 0))
-		eventfd_write(vq->callfd, (eventfd_t)1);
+			zmbuf = get_zmbuf(vq);
+			if (!zmbuf) {
+				rte_pktmbuf_free(pkts[i]);
+				break;
+			}
+			zmbuf->mbuf = pkts[i];
+			zmbuf->desc_idx = desc_indexes[i];
+
+			/*
+			 * Pin lock the mbuf; we will check later to see
+			 * whether the mbuf is freed (when we are the last
+			 * user) or not. If that's the case, we then could
+			 * update the used ring safely.
+			 */
+			rte_mbuf_refcnt_update(pkts[i], 1);
+
+			vq->nr_zmbuf += 1;
+			TAILQ_INSERT_TAIL(&vq->zmbuf_list, zmbuf, next);
+		}
+	}
+	vq->last_avail_idx += i;
+
+	if (likely(dev->dequeue_zero_copy == 0)) {
+		vq->last_used_idx += i;
+		update_used_idx(dev, vq, i);
+	}
 
 out:
 	if (unlikely(rarp_mbuf != NULL)) {
author	Christian Ehrhardt <christian.ehrhardt@canonical.com>	2016-12-08 14:07:29 +0100
committer	Christian Ehrhardt <christian.ehrhardt@canonical.com>	2016-12-08 14:10:05 +0100
commit	6b3e017e5d25f15da73f7700f7f2ac553ef1a2e9 (patch)
tree	1b1fb3f903b2282e261ade69e3c17952b3fd3464 /lib
parent	32e04ea00cd159613e04acef75e52bfca6eeff2f (diff)